From f81e72f1c865455a2d8c4134a27091a31f6f223b Mon Sep 17 00:00:00 2001 From: Andrew Singleton Date: Thu, 11 Nov 2021 11:43:05 +0100 Subject: [PATCH 1/6] streamline scripts --- verify.bash | 3 +- verify_SYNOP.R | 180 ++++++++++++++++++++++--------------------------- verify_TEMP.R | 121 ++++++++++++++++----------------- 3 files changed, 144 insertions(+), 160 deletions(-) diff --git a/verify.bash b/verify.bash index 8c6e29b..0ced1a9 100755 --- a/verify.bash +++ b/verify.bash @@ -7,7 +7,6 @@ date # Modules -module load proj4 module load R/4.0.4 # Environment variables @@ -25,7 +24,7 @@ export END_DATE=2021012112 export FCST_FREQ=24h export LEAD_TIME=48 -export FCST_MODELS="c('heps_43h22_tg3','heps_43h211')" +export FCST_MODELS="heps_43h22_tg3, heps_43h211" # Run harp export TZ="GMT" diff --git a/verify_SYNOP.R b/verify_SYNOP.R index b87a16d..2a807ce 100644 --- a/verify_SYNOP.R +++ b/verify_SYNOP.R @@ -1,118 +1,102 @@ -library(harpIO) library(harpPoint) -params <- c("Pmsl","T2m","S10m","RH2m","vis","Q2m", - "CCtot","CClow","Cbase", - "Td2m", - #"Gmax","Tmax","Tmin","AccPcp1h", - "AccPcp3h","AccPcp6h","AccPcp12h") - thresholds <- list( - - "Pmsl"=c(960,970,980,990,1000,1010,1020), - "T2m"=c(-20,-10,-5,0,5,10,15,20,25,30), - "Td2m"=c(-20,-10,-5,0,5,10,15,20,25,30), - "Tmin"=c(-20,-10,-5,0,5,10,15,20,25,30), - "Tmax"=c(-20,-10,-5,0,5,10,15,20,25,30), + Pmsl = c(960, 970, 980, 990, 1000, 1010, 1020), + + T2m = c(-20, -10, -5, 0, 5, 10, 15, 20, 25, 30), + Td2m = c(-20, -10, -5, 0, 5, 10, 15, 20, 25, 30), + Tmin = c(-20, -10, -5, 0, 5, 10, 15, 20, 25, 30), + Tmax = c(-20, -10, -5, 0, 5, 10, 15, 20, 25, 30), - "Q2m"=c(2,4,6,8,10,12), - "RH2m"=c(50,60,70,80,90), + Q2m = c(2, 4, 6, 8, 10, 12), + RH2m = c(50, 60, 70, 80, 90), - "S10m"=c(5,10,15,20,25,30,35), - "Gmax"=c(5,10,15,20,25,30,35), + S10m = c(5, 10, 15, 20, 25, 30, 35), + Gmax = c(5, 10, 15, 20, 25, 30, 35), - "AccPcp1h"=c(0.1,0.3,0.5,1,2,4,7,10,15,20), - "AccPcp3h"=c(0.1,0.3,0.5,1,2,4,7,10,15,20), - "AccPcp6h"=c(0.1,0.3,0.5,1,2,4,7,10,15,20), - "AccPcp12h"=c(0.1,0.3,0.5,1,2,4,7,10,15,20), + AccPcp1h = c(0.1, 0.3, 0.5, 1, 2, 4, 7, 10, 15, 20), + AccPcp3h = c(0.1, 0.3, 0.5, 1, 2, 4, 7, 10, 15, 20), + AccPcp6h = c(0.1, 0.3, 0.5, 1, 2, 4, 7, 10, 15, 20), + AccPcp12h = c(0.1, 0.3, 0.5, 1, 2, 4, 7, 10, 15, 20), - "CCtot"=c(0,1,2,3,4,5,6,7,8), - "CClow"=c(0,1,2,3,4,5,6,7,8), - "Cbase"=c(25,50,75,100,200,500,1000,2000,3000,4000,5000), + CCtot = c(0, 1, 2, 3, 4, 5, 6, 7, 8), + CClow = c(0, 1, 2, 3, 4, 5, 6, 7, 8), + Cbase = c(25, 50, 75, 100, 200, 500, 1000, 2000, 3000, 4000, 5000), - "vis"=c(100,1000,5000,10000,20000,40000,50000) + vis = c(100, 1000, 5000, 10000, 20000, 40000, 50000) ) -fctable_dir <- Sys.getenv("FCTABLE_DIR") -veriftable_dir <- Sys.getenv("VERIFTABLE_DIR") -start_date <- Sys.getenv("START_DATE") -end_date <- Sys.getenv("END_DATE") -fcst_freq <- Sys.getenv("FCST_FREQ") +params <- names(thresholds) -fcst_models <- eval(parse(text=Sys.getenv("FCST_MODELS"))) +fctable_dir <- Sys.getenv("FCTABLE_DIR") +obstable_dir <- Sys.getenv("OBSTABLE_DIR") +veriftable_dir <- Sys.getenv("VERIFTABLE_DIR") +start_date <- Sys.getenv("START_DATE") +end_date <- Sys.getenv("END_DATE") +fcst_freq <- Sys.getenv("FCST_FREQ") +fcst_models <- strsplit(gsub("\\s", "", Sys.getenv("FCST_MODELS")), ",") +max_lead <- Sys.getenv("LEAD_TIME") -max_lead <- Sys.getenv("LEAD_TIME") fre_lead <- 1 -lead_times <- seq(0, max_lead, fre_lead) -sqlite_template <- "{eps_model}/{YYYY}/{MM}/FCTABLE_{parameter}_{YYYY}{MM}_{HH}.sqlite" - -for ( param in params ) { - - if (param == "AccPcp12h") { - lead_times <- seq(12, max_lead, 6) - } else if (param == "AccPcp6h") { - lead_times <- seq(6, max_lead, 6) - } else if (param == "AccPcp3h") { - lead_times <- seq(3, max_lead, 3) - } else if (param == "AccPcp1h") { - lead_times <- seq(1, max_lead, 1) - } else if (param == "Tmin" | param == "Tmax" ) { - lead_times <- seq(18, 18, 6) - } else if (param == "Gmax") { - lead_times <- seq(3, max_lead, fre_lead) - } else { - lead_times <- seq(0, max_lead,fre_lead) - } - - obstable_dir <- Sys.getenv("OBSTABLE_DIR") - - - thresholds_ <- NULL - fcst_scales <- NULL - obs_scales <- NULL - - if ( length(grep(pattern = '^(T2m|Tmin|Tmax|Td2m)$', x = param)) > 0 ) { - fcst_scales <- list() - for ( model in fcst_models ) { - fcst_scales[[model]] = list(scale_factor = -273.15, new_units = "C", multiplicative = FALSE) - } - obs_scales <- list(scale_factor = -273.15, new_units = "C", multiplicative = FALSE) - } else if ( length(grep(pattern = '^(Q|Q2m)$', x = param)) > 0 ) { - fcst_scales <- list() - for ( model in fcst_models ) { - fcst_scales[[model]] = list(scale_factor = 1000., new_units = "g/kg", multiplicative = TRUE) +for (param in params) { + + lead_times <- switch( + param, + "AccPcp12h" = seq(12, max_lead, 6), + "AccPcp6h" = seq(6, max_lead, 6), + "AccPcp3h" = seq(3, max_lead, 3), + "AccPcp1h" = seq(1, max_lead, 1), + "Tmin" = , + "Tmax" = seq(18, 18, 6), + "Gmax" = seq(3, max_lead, fre_lead), + seq(0, max_lead, fre_lead) + ) + + switch( + param, + "T2m" = , + "Tmin" = , + "Tmax" = , + "Td2m" = { + fcst_scales <- list(scale_factor = -273.15, new_units = "degC") + obs_scales <- list(scale_factor = -273.15, new_units = "degC") + }, + "Q2m" = { + fcst_scales <- list(scale_factor = 1000, new_units = "g/kg", multiplicative = TRUE) + obs_scales <- list(scale_factor = 1000, new_units = "g/kg", multiplicative = TRUE) + }, + "Cbase" = { + fcst_scales <- list(scale_factor = 0, new_units = "m") + obs_scales <- list(scale_factor = 0, new_units = "m") + }, + { + fcst_scales <- NULL + obs_scales <- NULL + } + ) + + if (!is.null(fcst_scales)) { + fcst_scales <- sapply(fcst_models, function(x) fcst_scales, simplify = FALSE) } - obs_scales <- list(scale_factor = 1000., new_units = "g/kg", multiplicative = TRUE) - - } else if (param == "Cbase") { - - fcst_scales <- list() - for ( model in fcst_models ) { - fcst_scales[[model]] = list(scale_factor = 1.00, new_units = "m", multiplicative = TRUE) - } - obs_scales <- list(scale_factor = 1.0, new_units = "m", multiplicative = TRUE) - - } - -verif_val <- ens_read_and_verify( - num_iterations = 2, - start_date = start_date, - end_date = end_date, - parameter = param, - by = fcst_freq, - fcst_model = fcst_models, - fcst_path = fctable_dir, - fctable_file_template = sqlite_template, - obs_path = obstable_dir, - lead_time = lead_times, - thresholds = thresholds[[param]], - scale_fcst = fcst_scales, - scale_obs = obs_scales, - verif_path = veriftable_dir -) + verif_val <- ens_read_and_verify( + num_iterations = 2, + start_date = start_date, + end_date = end_date, + parameter = param, + by = fcst_freq, + fcst_model = fcst_models, + fcst_path = fctable_dir, + fctable_file_template = "fctable", + obs_path = obstable_dir, + lead_time = lead_times, + thresholds = thresholds[[param]], + scale_fcst = fcst_scales, + scale_obs = obs_scales, + verif_path = veriftable_dir + ) } diff --git a/verify_TEMP.R b/verify_TEMP.R index 4b3e6a1..e073801 100644 --- a/verify_TEMP.R +++ b/verify_TEMP.R @@ -1,69 +1,70 @@ -library(harpIO) library(harpPoint) -params <- c("Z","Q","T","S","RH","Td") - thresholds <- list( - "T"=c(-30,-20,-10,-5,0,5,10,15,20,25,30), - "Td"=c(-30,-20,-10,-5,0,5,10,15,20,25,30), - "S"=c(5,10,15,20,25,30,35,50), - "RH"=c(30,40,50,60,70,80,90) + T = c(-30, -20, -10, -5, 0, 5, 10, 15, 20, 25, 30), + Td = c(-30, -20, -10, -5, 0, 5, 10, 15, 20, 25, 30), + S = c(5, 10, 15, 20, 25, 30, 35, 50), + RH = c(30, 40, 50, 60, 70, 80, 90), + Z = NULL, + Q = NULL ) -quantiles <- c(0.25, 0.5, 0.75, 0.9, 0.95) - - -fctable_dir <- Sys.getenv("FCTABLE_DIR") -obstable_dir <- Sys.getenv("OBSTABLE_DIR") -veriftable_dir <- Sys.getenv("VERIFTABLE_DIR") -start_date <- Sys.getenv("START_DATE") -end_date <- Sys.getenv("END_DATE") -fcst_freq <- Sys.getenv("FCST_FREQ") - - -fcst_models <- eval(parse(text=Sys.getenv("FCST_MODELS"))) - - -lead_times <- seq(0,48,6) - - -for ( param in params ) { - - param_sym <- rlang::sym(param) - - fcst <- read_point_forecast( - vertical_coordinate="pressure", - start_date=start_date,end_date=end_date,by=fcst_freq, - lead_time=lead_times, - fcst_model=fcst_models, - fcst_type="eps",parameter=param, - file_path=fctable_dir,file_template = "fctable_eps_all_leads" - ) - - fcst <- common_cases(fcst,p) - - dt <- as.Date(end_date,format = "%Y%m%d%H") - dt <- dt+2 - end_date_obs <- format(dt,"%Y%m%d%H") - - obs <- read_point_obs( - vertical_coordinate="pressure", - start_date=start_date, end_date=end_date_obs, - parameter=param, obs_path=obstable_dir) - - fcst <- join_to_fcst(fcst, obs) %>% check_obs_against_fcst(param) - - if ( is.null(thresholds[[param]]) ) { - thresholds_=thresholds=thresholds[[param]] - } else { - thresholds_=quantile(obs[[param]],quantiles) - } - - verify <- ens_verify(fcst,!!param_sym, - groupings = c("leadtime","p"),thresholds=thresholds_ ) - - save_point_verif(verify,verif_path = veriftable_dir ) +params <- names(thresholds) + +quantiles <- c(0.25, 0.5, 0.75, 0.9, 0.95) + +fctable_dir <- Sys.getenv("FCTABLE_DIR") +obstable_dir <- Sys.getenv("OBSTABLE_DIR") +veriftable_dir <- Sys.getenv("VERIFTABLE_DIR") +start_date <- Sys.getenv("START_DATE") +end_date <- Sys.getenv("END_DATE") +fcst_freq <- Sys.getenv("FCST_FREQ") +fcst_models <- strsplit(gsub("\\s", "", Sys.getenv("FCST_MODELS")), ",") + +lead_times <- seq(0, 48, 6) + +for (param in params) { + + fcst <- read_point_forecast( + start_date = start_date, + end_date = end_date, + by = fcst_freq, + fcst_model = fcst_models, + fcst_type = "eps", + parameter = param, + lead_time = lead_times, + file_path = fctable_dir, + vertical_coordinate = "pressure" + ) + + fcst <- common_cases(fcst, p) + + obs <- read_point_obs( + start_date = first_validdate(fcst), + end_date = last_validdate(fcst), + parameter = param, + obs_path = obstable_dir, + vertical_coordinate = "pressure" + ) + + fcst <- join_to_fcst(fcst, obs) %>% + check_obs_against_fcst({{param}}) + + thresholds_ <- thresholds[[param]] + + if (is.null(thresholds_)) { + thresholds_ <- quantile(fcst[[1]][[param]], quantiles) + } + + verify <- ens_verify( + fcst, + {{param}}, + groupings = c("leadtime", "p"), + thresholds = thresholds_ + ) + + save_point_verif(verify, verif_path = veriftable_dir) } From 55e31eb9a35fc3a1063e57c0235d3d0bb6b62454 Mon Sep 17 00:00:00 2001 From: Andrew Singleton Date: Thu, 11 Nov 2021 11:54:53 +0100 Subject: [PATCH 2/6] Still need harpIO for TEMP script --- verify_TEMP.R | 1 + 1 file changed, 1 insertion(+) diff --git a/verify_TEMP.R b/verify_TEMP.R index e073801..5275af8 100644 --- a/verify_TEMP.R +++ b/verify_TEMP.R @@ -1,3 +1,4 @@ +library(harpIO) library(harpPoint) thresholds <- list( From e311f0b6c8cdad7ca1c7d7ee836bd6aa55d53038 Mon Sep 17 00:00:00 2001 From: Andrew Singleton Date: Fri, 12 Nov 2021 14:00:41 +0100 Subject: [PATCH 3/6] Add obs_stats script --- .gitignore | 4 +++ get_map.R | 37 ++++++++++++++++++++++ harp-wrapper.Rproj | 16 ++++++++++ obs_stats.R | 76 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 133 insertions(+) create mode 100644 .gitignore create mode 100644 get_map.R create mode 100644 harp-wrapper.Rproj create mode 100644 obs_stats.R diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5b6a065 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata diff --git a/get_map.R b/get_map.R new file mode 100644 index 0000000..b7ae642 --- /dev/null +++ b/get_map.R @@ -0,0 +1,37 @@ +# Function to get map background based on a geodomain +# From development branch of harpVis + +get_map <- function(map = "world2", dom = NULL, ...) { + + map <- ggplot2::map_data(map, ...) + # Antarctica causes problems + map <- map[map$region != "Antarctica", ] + + if (isTRUE(meteogrid::is.geodomain(dom) || meteogrid::is.geofield(dom))) { + dom <- meteogrid::as.geodomain(dom) + glimits <- meteogrid::DomainExtent(dom) + map_names <- unique(paste(map$region, map$group, sep = ":")) + map <- split(map, map$group) + map <- dplyr::bind_rows(lapply(map, rbind, NA)) + map <- map[1:(nrow(map) - 1), ] + + map <- as.list(meteogrid::project(map, proj = dom$projection)) + map$names <- map_names + + map <- maps::map.clip.poly( + as.list(map), + xlim = c(glimits$x0, glimits$x1) + glimits$dx * c(-1, 1) / 2, + ylim = c(glimits$y0, glimits$y1) + glimits$dy * c(-1, 1) / 2, + poly = TRUE + ) + + map <- ggplot2::map_data(map) + + names(map)[names(map) == "long"] <- "x" + names(map)[names(map) == "lat"] <- "y" + + } + + map + +} diff --git a/harp-wrapper.Rproj b/harp-wrapper.Rproj new file mode 100644 index 0000000..e83436a --- /dev/null +++ b/harp-wrapper.Rproj @@ -0,0 +1,16 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +AutoAppendNewline: Yes +StripTrailingWhitespace: Yes diff --git a/obs_stats.R b/obs_stats.R new file mode 100644 index 0000000..4a43d9c --- /dev/null +++ b/obs_stats.R @@ -0,0 +1,76 @@ +library(harpIO) +library(harpData) +library(meteogrid) +library(dplyr) +library(ggplot2) +library(RColorBrewer) + +# Assume get_map.R is in the current working directory. Add full path if necessary +source("get_map.R") + +# Define domain +meps_proj4 <- "+proj=lcc +lat_0=63.3 +lon_0=15 +lat_1=63.3 +lat_2=63.3 +R=6371000" +meps_domain <- structure( + list( + projection = proj4.str2list(meps_proj4), + nx = 949L, + ny = 1069L, + SW = c(0.2782807, 50.3196164), + NE = c(54.24126, 71.57601), + dx = 2500, + dy = 2500 + ), + class = "geodomain" +) + +# Read observations (You will need to add your own path here - I use example data from the harpData package) +oo <- harpData_info("obstable") + +param <- "T2m" + +obs <- read_point_obs( + oo$start_date, + oo$end_date, + param, + obs_path = oo$dir +) + +# Remove stations outside domain and reproject +obs <- cbind(obs, point.index(meps_domain, obs$lon, obs$lat)) %>% + filter(if_all(c(i, j), ~!is.na(.x))) %>% + cbind(project(.$lon, .$lat, meps_domain$projection)) + +# Plot number of stations per time +ggplot(obs, aes(validdate)) + + geom_bar(fill = "steelblue") + + scale_x_datetime(date_breaks = "3 hours") + + labs(x = NULL, y = "Number of stations") + + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) + +# Plot stations on a map +map_poly <- get_map(dom = meps_domain) +obs_count <- summarize(group_by(obs, SID, x, y), Count = n()) +ggplot(obs_count, aes(x, y)) + + geom_polygon( + aes(group = group), map_poly, + fill = "grey90", colour = "grey50" + ) + + geom_point(aes(colour = Count)) + + scale_colour_stepsn( + colors = brewer.pal(9, "YlOrRd"), + breaks = pretty(obs_count$Count, 10) + ) + + coord_equal(xlim = range(obs$x), ylim = range(obs$y), expand = FALSE) + + theme_bw() + + theme( + axis.title = element_blank(), + axis.text = element_blank(), + axis.ticks = element_blank(), + panel.grid = element_blank() + ) + + labs( + title = paste("Number of observations for", param), + subtitle = paste(range(obs$validdate), collapse = " - ") + ) + + From 40d632c99876cc4a9b32bfc9b9b0e2ce8419e20f Mon Sep 17 00:00:00 2001 From: Andrew Singleton Date: Fri, 12 Nov 2021 14:11:45 +0100 Subject: [PATCH 4/6] Use DomainExtent to set plot limits --- obs_stats.R | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/obs_stats.R b/obs_stats.R index 4a43d9c..288e9c2 100644 --- a/obs_stats.R +++ b/obs_stats.R @@ -23,10 +23,11 @@ meps_domain <- structure( class = "geodomain" ) -# Read observations (You will need to add your own path here - I use example data from the harpData package) +# Read observations (You will need to add your own path here - +# I'm using example data from the harpData package) oo <- harpData_info("obstable") -param <- "T2m" +param <- "AccPcp1h" obs <- read_point_obs( oo$start_date, @@ -47,6 +48,9 @@ ggplot(obs, aes(validdate)) + labs(x = NULL, y = "Number of stations") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +# to save use: +# ggsave("filename.png") + # Plot stations on a map map_poly <- get_map(dom = meps_domain) obs_count <- summarize(group_by(obs, SID, x, y), Count = n()) @@ -60,7 +64,11 @@ ggplot(obs_count, aes(x, y)) + colors = brewer.pal(9, "YlOrRd"), breaks = pretty(obs_count$Count, 10) ) + - coord_equal(xlim = range(obs$x), ylim = range(obs$y), expand = FALSE) + + coord_equal( + xlim = c(DomainExtent(meps_domain)$x0, DomainExtent(meps_domain)$x1), + ylim = c(DomainExtent(meps_domain)$y0, DomainExtent(meps_domain)$y1), + expand = FALSE + ) + theme_bw() + theme( axis.title = element_blank(), @@ -73,4 +81,6 @@ ggplot(obs_count, aes(x, y)) + subtitle = paste(range(obs$validdate), collapse = " - ") ) +# to save use: +# ggsave("filename.png") From 526d3fdd606a5c6d91afbdeca383cfeb62688861 Mon Sep 17 00:00:00 2001 From: Andrew Singleton Date: Fri, 12 Nov 2021 14:19:41 +0100 Subject: [PATCH 5/6] Use world instead of world2 due to 0deg lon on edge of map --- obs_stats.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/obs_stats.R b/obs_stats.R index 288e9c2..ce5cb71 100644 --- a/obs_stats.R +++ b/obs_stats.R @@ -27,7 +27,7 @@ meps_domain <- structure( # I'm using example data from the harpData package) oo <- harpData_info("obstable") -param <- "AccPcp1h" +param <- "T2m" obs <- read_point_obs( oo$start_date, @@ -52,7 +52,7 @@ ggplot(obs, aes(validdate)) + # ggsave("filename.png") # Plot stations on a map -map_poly <- get_map(dom = meps_domain) +map_poly <- get_map("world", dom = meps_domain) obs_count <- summarize(group_by(obs, SID, x, y), Count = n()) ggplot(obs_count, aes(x, y)) + geom_polygon( From 00d74e9576c825c9ceb565e65c1599c004cc6cf5 Mon Sep 17 00:00:00 2001 From: Andrew Singleton Date: Fri, 12 Nov 2021 18:29:51 +0100 Subject: [PATCH 6/6] tidy formatting --- obs_stats.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/obs_stats.R b/obs_stats.R index ce5cb71..d9cac29 100644 --- a/obs_stats.R +++ b/obs_stats.R @@ -65,8 +65,8 @@ ggplot(obs_count, aes(x, y)) + breaks = pretty(obs_count$Count, 10) ) + coord_equal( - xlim = c(DomainExtent(meps_domain)$x0, DomainExtent(meps_domain)$x1), - ylim = c(DomainExtent(meps_domain)$y0, DomainExtent(meps_domain)$y1), + xlim = c(DomainExtent(meps_domain)$x0, DomainExtent(meps_domain)$x1), + ylim = c(DomainExtent(meps_domain)$y0, DomainExtent(meps_domain)$y1), expand = FALSE ) + theme_bw() +