From e97a46c3551160b5696b13aae672597496d374db Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 9 Jul 2024 13:57:48 -0600 Subject: [PATCH 001/260] Bug fix to support the %H format in METplus via printf. --- ush/bash_utils/eval_METplus_timestr_tmpl.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ush/bash_utils/eval_METplus_timestr_tmpl.sh b/ush/bash_utils/eval_METplus_timestr_tmpl.sh index 245369509b..0f2c4c0716 100644 --- a/ush/bash_utils/eval_METplus_timestr_tmpl.sh +++ b/ush/bash_utils/eval_METplus_timestr_tmpl.sh @@ -163,9 +163,12 @@ cannot be empty: #----------------------------------------------------------------------- # case "${METplus_time_fmt}" in - "%Y%m%d%H"|"%Y%m%d"|"%H%M%S"|"%H") + "%Y%m%d%H"|"%Y%m%d"|"%H%M%S") fmt="${METplus_time_fmt}" ;; + "%H") + fmt="%02.0f" + ;; "%HHH") # # Print format assumes that the argument to printf (i.e. the number to From 815c941f291a764c86e7a0e0c6d2996b2e94ec9a Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 9 Jul 2024 14:42:20 -0600 Subject: [PATCH 002/260] Bug fix to the bug fix! --- ush/bash_utils/eval_METplus_timestr_tmpl.sh | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/ush/bash_utils/eval_METplus_timestr_tmpl.sh b/ush/bash_utils/eval_METplus_timestr_tmpl.sh index 0f2c4c0716..572f7c68c4 100644 --- a/ush/bash_utils/eval_METplus_timestr_tmpl.sh +++ b/ush/bash_utils/eval_METplus_timestr_tmpl.sh @@ -167,7 +167,18 @@ cannot be empty: fmt="${METplus_time_fmt}" ;; "%H") - fmt="%02.0f" +# +# The "%H" format needs to be treated differently depending on if it's +# formatting a "lead" time type or another (e.g. "init" or "vald") because +# for "lead", the printf function is used below (which doesn't understand +# the "%H" format) whereas for the others, the date utility is used (which +# does understand "%H"). +# + if [ "${METplus_time_type}" = "lead" ]; then + fmt="%02.0f" + else + fmt="${METplus_time_fmt}" + fi ;; "%HHH") # From bc8548060558ecc9e3f8b2a8f64f9bc7910ac608 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 11 Jul 2024 12:10:29 -0600 Subject: [PATCH 003/260] Bug fix from Michelle H. for prepbufr files: "On May 22, the name of the tar file where the prepbufr files live changed" --- parm/data_locations.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/parm/data_locations.yml b/parm/data_locations.yml index 7901f4c085..dd3b5ddd17 100644 --- a/parm/data_locations.yml +++ b/parm/data_locations.yml @@ -351,11 +351,13 @@ NDAS_obs: - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} + - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} archive_file_names: - "com2_nam_prod_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" - "gpfs_dell1_nco_ops_com_nam_prod_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" - "com_nam_prod_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" - "com_obsproc_v1.1_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" + - "com_obsproc_v1.2_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" file_names: obs: - "./nam.t{hh}z.prepbufr.tm*.nr" From 81d61b8ea2d233eece6abcbd08086dec393f1ba3 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 11 Jul 2024 12:22:09 -0600 Subject: [PATCH 004/260] Bug fix for removing phantom 00-hour tasks from workflow. Bug found by Michelle Harrold, solution by Michael Kavulich. --- parm/wflow/verify_det.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index e82d7c61e1..3acfa3e836 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -1,6 +1,6 @@ default_task_verify_det: &default_task_verify_det account: '&ACCOUNT;' - attrs: + attrs: &default_attrs cycledefs: forecast maxtries: '1' envars: &default_vars @@ -30,6 +30,7 @@ metatask_GridStat_CCPA_all_accums_all_mems: task_run_MET_GridStat_vx_APCP#ACCUM_HH#h_mem#mem#: <<: *default_task_verify_det attrs: + <<: *default_attrs maxtries: '2' command: '&LOAD_MODULES_RUN_TASK_FP; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX"' envars: @@ -62,6 +63,7 @@ metatask_GridStat_NOHRSC_all_accums_all_mems: task_run_MET_GridStat_vx_ASNOW#ACCUM_HH#h_mem#mem#: <<: *default_task_verify_det attrs: + <<: *default_attrs maxtries: '2' command: '&LOAD_MODULES_RUN_TASK_FP; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX"' envars: From 35530abd74948fc50d1e6ebc25a25a8a7cd2f8c2 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 16 Jul 2024 11:01:43 -0600 Subject: [PATCH 005/260] Bug fix: Append cycle date to names of deterministic GridStat and PointStat tasks' METplus log files. --- scripts/exregional_run_met_gridstat_or_pointstat_vx.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index 4f871e6e1b..91c5a7896b 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -277,7 +277,7 @@ fi # metplus_config_tmpl_bn="GridStat_or_PointStat" metplus_config_bn="${MetplusToolName}_${FIELDNAME_IN_MET_FILEDIR_NAMES}_${ensmem_name}" -metplus_log_bn="${metplus_config_bn}" +metplus_log_bn="${metplus_config_bn}_$CDATE" # # Add prefixes and suffixes (extensions) to the base file names. # From 6c548ceeb17f60d7fa11417ae8ef7451a5269321 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 16 Jul 2024 14:05:06 -0600 Subject: [PATCH 006/260] Version of ex-script for pulling obs that works for multiple overlapping cycles for CCPA and MRMS but not yet for NDAS or NOHRSC. --- scripts/exregional_get_verif_obs.sh | 295 +++++++++++++++++++--------- 1 file changed, 199 insertions(+), 96 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index a74f11cd3a..564860899f 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -123,6 +123,11 @@ imm=$(echo ${PDY} | cut -c5-6) idd=$(echo ${PDY} | cut -c7-8) ihh=${cyc} +echo +echo "HELLO GGGGGGGG" +iyyyymmddhh=${PDY}${cyc} +echo "iyyyymmddhh = ${iyyyymmddhh}" + # Unix date utility needs dates in yyyy-mm-dd hh:mm:ss format unix_init_DATE="${iyyyy}-${imm}-${idd} ${ihh}:00:00" @@ -144,126 +149,184 @@ while [[ ${current_fcst} -le ${fcst_length} ]]; do vdate_p1=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 day" +%Y%m%d%H) vyyyymmdd_p1=$(echo ${vdate_p1} | cut -c1-8) +echo +echo "HELLO HHHHHHHH" +echo "vyyyymmdd = ${vyyyymmdd}" +echo "vyyyymmdd_p1 = ${vyyyymmdd_p1}" +echo "ihh = ${ihh}" +#exit + #remove leading zero again, this time keep original vhh_noZero=$((10#${vhh})) - - # Retrieve CCPA observations +# +#----------------------------------------------------------------------- +# +# Retrieve CCPA observations. +# +#----------------------------------------------------------------------- +# if [[ ${OBTYPE} == "CCPA" ]]; then - #CCPA is accumulation observations, so none to retrieve for hour zero + # CCPA is accumulation observations, so for hour 0 there are no files + # to retrieve. if [[ ${current_fcst} -eq 0 ]]; then current_fcst=$((${current_fcst} + 1)) continue fi - # Staging location for raw CCPA data from HPSS - ccpa_raw=${OBS_DIR}/raw + # Accumulation is for accumulation of CCPA data to pull (hardcoded to + # 01h, see note above). + accum=01 - # Reorganized CCPA location + # Directory in which the daily subdirectories containing the CCPA grib2 + # files will appear after this script is done. Make sure this exists. ccpa_proc=${OBS_DIR} + if [[ ! -d "${ccpa_proc}/${vyyyymmdd}" ]]; then + mkdir -p ${ccpa_proc}/${vyyyymmdd} + fi - # Accumulation is for accumulation of CCPA data to pull (hardcoded to 01h, see note above.) - accum=01 + # File name within the HPSS archive file. Note that this only includes + # the valid hour in its name; the year, month, and day are specified in + # the name of the directory in which it is located within the archive. + ccpa_fn="ccpa.t${vhh}z.${accum}h.hrap.conus.gb2" + + # Full path to final location of the CCPA grib2 file for the current valid + # time. Note that this path includes the valid date (year, month, and day) + # information in the name of a subdirectory and the valid hour-of-day in + # the name of the file. + ccpa_fp_proc="${ccpa_proc}/${vyyyymmdd}/${ccpa_fn}" + + # Temporary staging directory for raw CCPA files from HPSS. These "raw" + # directories are temporary directories in which archive files from HPSS + # are placed and files within those archives extracted. Note that the + # name of this subdirectory is cycle-specific to avoid other get_obs_ccpa + # workflow tasks (i.e. those corresponding to cycles other than the current + # one) writing into the same directory. + ccpa_raw="${ccpa_proc}/raw_${iyyyymmddhh}" # Check if file exists on disk; if not, pull it. - ccpa_file="$ccpa_proc/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2" - if [[ -f "${ccpa_file}" ]]; then + if [[ -f "${ccpa_fp_proc}" ]]; then + echo "${OBTYPE} file exists on disk:" - echo "${ccpa_file}" + echo " ccpa_fp_proc = \"${ccpa_fp_proc}\"" + echo "Will NOT attempt to retrieve from remote locations." + else - echo "${OBTYPE} file does not exist on disk:" - echo "${ccpa_file}" - echo "Will attempt to retrieve from remote locations" - # Create necessary raw and prop directories - if [[ ! -d "$ccpa_raw/${vyyyymmdd}" ]]; then - mkdir -p $ccpa_raw/${vyyyymmdd} - fi - if [[ ! -d "$ccpa_raw/${vyyyymmdd_p1}" ]]; then - mkdir -p $ccpa_raw/${vyyyymmdd_p1} + echo "${OBTYPE} file does not exist on disk:" + echo " ccpa_fp_proc = \"${ccpa_fp_proc}\"" + echo "Will attempt to retrieve from remote locations." + + # Create the necessary raw (sub)directories on disk. Note that we need + # to create a subdirectory for 1 day + the current valid date because + # that is needed to get around a metadata error in the CCPA files on HPSS + # (in particular, one hour CCPA files have incorrect metadata in the files + # under the "00" directory from 20180718 to 20210504). + if [[ ! -d "${ccpa_raw}/${vyyyymmdd}" ]]; then + mkdir -p ${ccpa_raw}/${vyyyymmdd} fi - if [[ ! -d "$ccpa_proc/${vyyyymmdd}" ]]; then - mkdir -p $ccpa_proc/${vyyyymmdd} + if [[ ! -d "${ccpa_raw}/${vyyyymmdd_p1}" ]]; then + mkdir -p ${ccpa_raw}/${vyyyymmdd_p1} fi - # Check if valid hour is 00 + + valid_time=${vyyyymmdd}${vhh} + output_path="${ccpa_raw}/${vyyyymmdd}" if [[ ${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23 ]]; then - # Pull CCPA data from HPSS - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vyyyymmdd_p1}${vhh} \ - --data_stores hpss \ - --data_type CCPA_obs \ - --output_path $ccpa_raw/${vyyyymmdd_p1} \ - --summary_file ${logfile}" + valid_time=${vyyyymmdd_p1}${vhh} + output_path="${ccpa_raw}/${vyyyymmdd_p1}" + fi - echo "CALLING: ${cmd}" - $cmd || print_err_msg_exit "\ - Could not retrieve CCPA data from HPSS + # The retrieve_data.py script below uses the current working directory as + # the location into which to extract the contents of the HPSS archive (tar) + # file. Thus, if there are multiple get_obs_ccpa tasks running (i.e. ones + # for different cycles), they will be extracting files into the same (current) + # directory. That causes errors in the workflow. To avoid this, change + # location to the raw directory. This will avoid such errors because the + # raw directory has a cycle-specific name. + cd ${ccpa_raw} + + # Pull CCPA data from HPSS. This will get a single grib2 (.gb2) file + # corresponding to the current valid time (valid_time). + cmd=" + python3 -u ${USHdir}/retrieve_data.py \ + --debug \ + --file_set obs \ + --config ${PARMdir}/data_locations.yml \ + --cycle_date ${valid_time} \ + --data_stores hpss \ + --data_type CCPA_obs \ + --output_path ${output_path} \ + --summary_file ${logfile}" - The following command exited with a non-zero exit status: - ${cmd} + echo "CALLING: ${cmd}" + $cmd || print_err_msg_exit "\ + Could not retrieve CCPA data from HPSS. + + The following command exited with a non-zero exit status: + ${cmd} " - else - # Pull CCPA data from HPSS - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vyyyymmdd}${vhh} \ - --data_stores hpss \ - --data_type CCPA_obs \ - --output_path $ccpa_raw/${vyyyymmdd} \ - --summary_file ${logfile}" + # Move CCPA file to its final location. + # + # Since this script is part of a workflow, other tasks (for other cycles) + # that call this script may have extracted and placed the current file + # in its final location between the time we checked for its existence + # above above (and didn't find it) and now. This can happen because + # there can be overlap between the verification times for the current + # cycle and those of other cycles. For this reason, check again for the + # existence of the file in its final location. If it's already been + # created by another task, don't bother to move it from its raw location + # to its final location. + if [[ -f "${ccpa_fp_proc}" ]]; then - echo "CALLING: ${cmd}" - $cmd || print_err_msg_exit "\ - Could not retrieve CCPA data from HPSS + echo "${OBTYPE} file exists on disk:" + echo " ccpa_fp_proc = \"{ccpa_fp_proc}\"" + echo "It was likely created by a get_obs_ccpa workflow task for another cycle." + echo "NOT moving file from its temporary (raw) location to its final location." - The following command exited with a non-zero exit status: - ${cmd} -" - fi + else - # One hour CCPA files have incorrect metadata in the files under the "00" directory from 20180718 to 20210504. - # After data is pulled, reorganize into correct valid yyyymmdd structure. - if [[ ${vhh_noZero} -ge 1 && ${vhh_noZero} -le 6 ]]; then - cp $ccpa_raw/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 $ccpa_proc/${vyyyymmdd} - elif [[ ${vhh_noZero} -ge 7 && ${vhh_noZero} -le 12 ]]; then - cp $ccpa_raw/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 $ccpa_proc/${vyyyymmdd} - elif [[ ${vhh_noZero} -ge 13 && ${vhh_noZero} -le 18 ]]; then - cp $ccpa_raw/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 $ccpa_proc/${vyyyymmdd} - elif [[ ${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23 ]]; then - if [[ ${vyyyymmdd} -ge 20180718 && ${vyyyymmdd} -le 20210504 ]]; then - wgrib2 $ccpa_raw/${vyyyymmdd_p1}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 -set_date -24hr -grib $ccpa_proc/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 -s - else - cp $ccpa_raw/${vyyyymmdd_p1}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 $ccpa_proc/${vyyyymmdd} - fi - elif [[ ${vhh_noZero} -eq 0 ]]; then - # One hour CCPA files on HPSS have incorrect metadata in the files under the "00" directory from 20180718 to 20210504. - if [[ ${vyyyymmdd} -ge 20180718 && ${vyyyymmdd} -le 20210504 ]]; then - wgrib2 $ccpa_raw/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 -set_date -24hr -grib $ccpa_proc/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 -s - else - cp $ccpa_raw/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 $ccpa_proc/${vyyyymmdd} + # Full path to the CCPA file that was pulled and extracted above and + # placed in the raw directory. + ccpa_fp_raw="${output_path}/${ccpa_fn}" + + # One hour CCPA files have incorrect metadata in the files under the "00" + # directory from 20180718 to 20210504. After data is pulled, reorganize + # into correct valid yyyymmdd structure. + if [[ ${vhh_noZero} -ge 1 && ${vhh_noZero} -le 18 ]]; then + mv ${ccpa_fp_raw} ${ccpa_fp_proc} + elif [[ (${vhh_noZero} -eq 0) || (${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23) ]]; then + if [[ ${vyyyymmdd} -ge 20180718 && ${vyyyymmdd} -le 20210504 ]]; then + wgrib2 ${ccpa_fp_raw} -set_date -24hr -grib ${ccpa_fp_proc} -s + else + mv ${ccpa_fp_raw} ${ccpa_fp_proc} + fi fi + fi fi - # Retrieve MRMS observations +# +#----------------------------------------------------------------------- +# +# Retrieve MRMS observations. +# +#----------------------------------------------------------------------- +# elif [[ ${OBTYPE} == "MRMS" ]]; then + # Top-level MRMS directory - # raw MRMS data from HPSS - mrms_raw=${OBS_DIR}/raw # Reorganized MRMS location mrms_proc=${OBS_DIR} + # raw MRMS data from HPSS + #mrms_raw=${OBS_DIR}/raw + mrms_raw="${mrms_proc}/raw_${iyyyymmddhh}" + # For each field (REFC and RETOP), check if file exists on disk; if not, pull it. for field in ${VAR[@]}; do + if [ "${field}" = "REFC" ]; then field_base_name="MergedReflectivityQCComposite" level="_00.50_" @@ -279,32 +342,53 @@ while [[ ${current_fcst} -le ${fcst_length} ]]; do " fi - mrms_file="$mrms_proc/${vyyyymmdd}/${field_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" + mrms_fn="${field_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" + mrms_day_dir="${mrms_proc}/${vyyyymmdd}" + mrms_fp="${mrms_proc}/${vyyyymmdd}/${mrms_fn}" + +# if [[ -f "${mrms_fp}" ]]; then +# +# echo "${OBTYPE} file for field \"${field}\" exists on disk:" +# echo " mrms_fp = \"${mrms_fp}\"" +# echo "Will NOT attempt to retrieve from remote locations." + + if [[ -d "${mrms_day_dir}" ]]; then + + echo "${OBTYPE} directory for field \"${field}\" and day ${vyyyymmdd} exists on disk:" + echo " mrms_day_dir = \"${mrms_day_dir}\"" + echo "This means observation files for this field and all hours of this day have been or are being retrieved." + echo "Will NOT attempt to retrieve the current file" + echo " mrms_fp = \"${mrms_fp}\"" + echo "from remote locations." - if [[ -f "${mrms_file}" ]]; then - echo "${OBTYPE} file exists on disk for field ${field}:\n${mrms_file}" else - echo "${OBTYPE} file does not exist on disk for field ${field}:\n${mrms_file}" - echo "Will attempt to retrieve from remote locations" + + echo "${OBTYPE} file for field \"${field}\" does not exist on disk:" + echo " mrms_fp = \"${mrms_fp}\"" + echo "Will attempt to retrieve from remote locations." + # Create directories if necessary - if [[ ! -d "$mrms_raw/${vyyyymmdd}" ]]; then - mkdir -p $mrms_raw/${vyyyymmdd} + if [[ ! -d "${mrms_raw}/${vyyyymmdd}" ]]; then + mkdir -p ${mrms_raw}/${vyyyymmdd} fi if [[ ! -d "$mrms_proc/${vyyyymmdd}" ]]; then mkdir -p $mrms_proc/${vyyyymmdd} fi + valid_time=${vyyyymmdd}${vhh} + output_path="${mrms_raw}/${vyyyymmdd}" + cd ${mrms_raw} # Pull MRMS data from HPSS cmd=" python3 -u ${USHdir}/retrieve_data.py \ --debug \ --file_set obs \ --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vyyyymmdd}${vhh} \ + --cycle_date ${valid_time} \ --data_stores hpss \ --data_type MRMS_obs \ - --output_path $mrms_raw/${vyyyymmdd} \ + --output_path ${output_path} \ --summary_file ${logfile}" echo "CALLING: ${cmd}" @@ -326,8 +410,13 @@ while [[ ${current_fcst} -le ${fcst_length} ]]; do fi done - - # Retrieve NDAS observations +# +#----------------------------------------------------------------------- +# +# Retrieve NDAS observations. +# +#----------------------------------------------------------------------- +# elif [[ ${OBTYPE} == "NDAS" ]]; then # raw NDAS data from HPSS ndas_raw=${OBS_DIR}/raw @@ -363,9 +452,17 @@ while [[ ${current_fcst} -le ${fcst_length} ]]; do continue fi +echo "" +echo "HELLO AAAAA" +echo "vhh_noZero = ${vhh_noZero}" + if [[ ${vhh_noZero} -eq 0 || ${vhh_noZero} -eq 6 || ${vhh_noZero} -eq 12 || ${vhh_noZero} -eq 18 ]]; then +echo "" +echo "HELLO BBBBB" if [[ ! -d "$ndas_raw/${vyyyymmdd}${vhh}" ]]; then +echo "" +echo "HELLO CCCCC" mkdir -p $ndas_raw/${vyyyymmdd}${vhh} fi @@ -459,8 +556,13 @@ while [[ ${current_fcst} -le ${fcst_length} ]]; do fi fi - - # Retrieve NOHRSC observations +# +#----------------------------------------------------------------------- +# +# Retrieve NOHRSC observations. +# +#----------------------------------------------------------------------- +# elif [[ ${OBTYPE} == "NOHRSC" ]]; then #NOHRSC is accumulation observations, so none to retrieve for hour zero @@ -534,7 +636,8 @@ while [[ ${current_fcst} -le ${fcst_length} ]]; do Invalid OBTYPE specified for script; valid options are CCPA, MRMS, NDAS, and NOHRSC " fi # Increment to next forecast hour - # Increment to next forecast hour + + # Increment to next forecast hour echo "Finished fcst hr=${current_fcst}" current_fcst=$((${current_fcst} + 1)) @@ -542,7 +645,7 @@ done # Clean up raw, unprocessed observation files -rm -rf ${OBS_DIR}/raw +#rm -rf ${OBS_DIR}/raw # #----------------------------------------------------------------------- From 307f92ee1f998f303d93859ebd0b26bc63db1385 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 17 Jul 2024 09:53:55 -0600 Subject: [PATCH 007/260] Changes to make get_obs_mrms tasks to work for mulitple cycles and without performing unnecessary repeated pulls. --- scripts/exregional_get_verif_obs.sh | 87 ++++++++++++++--------------- 1 file changed, 42 insertions(+), 45 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 564860899f..c86ba6796a 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -89,7 +89,7 @@ set -x # hh (00 through 05). If using custom staged data, you will have to # rename the files accordingly. # -# If data is retrieved from HPSS, it will automatically staged by this +# If data is retrieved from HPSS, it will be automatically staged by this # this script. # # @@ -293,13 +293,15 @@ echo "ihh = ${ihh}" # One hour CCPA files have incorrect metadata in the files under the "00" # directory from 20180718 to 20210504. After data is pulled, reorganize # into correct valid yyyymmdd structure. + #mv_or_cp="mv" + mv_or_cp="cp" if [[ ${vhh_noZero} -ge 1 && ${vhh_noZero} -le 18 ]]; then - mv ${ccpa_fp_raw} ${ccpa_fp_proc} + ${mv_or_cp} ${ccpa_fp_raw} ${ccpa_fp_proc} elif [[ (${vhh_noZero} -eq 0) || (${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23) ]]; then if [[ ${vyyyymmdd} -ge 20180718 && ${vyyyymmdd} -le 20210504 ]]; then wgrib2 ${ccpa_fp_raw} -set_date -24hr -grib ${ccpa_fp_proc} -s else - mv ${ccpa_fp_raw} ${ccpa_fp_proc} + ${mv_or_cp} ${ccpa_fp_raw} ${ccpa_fp_proc} fi fi @@ -320,59 +322,53 @@ echo "ihh = ${ihh}" # Reorganized MRMS location mrms_proc=${OBS_DIR} - # raw MRMS data from HPSS - #mrms_raw=${OBS_DIR}/raw - mrms_raw="${mrms_proc}/raw_${iyyyymmddhh}" - - # For each field (REFC and RETOP), check if file exists on disk; if not, pull it. - for field in ${VAR[@]}; do + mrms_day_dir="${mrms_proc}/${vyyyymmdd}" - if [ "${field}" = "REFC" ]; then - field_base_name="MergedReflectivityQCComposite" - level="_00.50_" - elif [ "${field}" = "RETOP" ]; then - field_base_name="EchoTop" - level="_18_00.50_" - else - echo "Invalid field: ${field}" - print_err_msg_exit "\ - Invalid field specified: ${field} + if [[ -d "${mrms_day_dir}" ]]; then - Valid options are 'REFC', 'RETOP'. -" - fi + echo "${OBTYPE} directory for day ${vyyyymmdd} exists on disk:" + echo " mrms_day_dir = \"${mrms_day_dir}\"" + echo "This means observation files for this field and all hours of this day have been or are being retrieved." + echo "Thus, we will NOT attempt to retrieve the current data from remote locations" - mrms_fn="${field_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" - mrms_day_dir="${mrms_proc}/${vyyyymmdd}" - mrms_fp="${mrms_proc}/${vyyyymmdd}/${mrms_fn}" - -# if [[ -f "${mrms_fp}" ]]; then -# -# echo "${OBTYPE} file for field \"${field}\" exists on disk:" -# echo " mrms_fp = \"${mrms_fp}\"" -# echo "Will NOT attempt to retrieve from remote locations." - - if [[ -d "${mrms_day_dir}" ]]; then + else - echo "${OBTYPE} directory for field \"${field}\" and day ${vyyyymmdd} exists on disk:" - echo " mrms_day_dir = \"${mrms_day_dir}\"" - echo "This means observation files for this field and all hours of this day have been or are being retrieved." - echo "Will NOT attempt to retrieve the current file" - echo " mrms_fp = \"${mrms_fp}\"" - echo "from remote locations." + # For each field (REFC and RETOP), check if file exists on disk; if not, pull it. + for field in ${VAR[@]}; do + + # raw MRMS data from HPSS + #mrms_raw=${OBS_DIR}/raw + #mrms_raw="${mrms_proc}/raw_${field}_${iyyyymmddhh}" + mrms_raw="${mrms_proc}/raw_${iyyyymmddhh}" + + if [ "${field}" = "REFC" ]; then + field_base_name="MergedReflectivityQCComposite" + level="_00.50_" + elif [ "${field}" = "RETOP" ]; then + field_base_name="EchoTop" + level="_18_00.50_" + else + echo "Invalid field: ${field}" + print_err_msg_exit "\ + Invalid field specified: ${field} + + Valid options are 'REFC', 'RETOP'. +" + fi - else + mrms_fn="${field_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" + mrms_fp="${mrms_proc}/${vyyyymmdd}/${mrms_fn}" echo "${OBTYPE} file for field \"${field}\" does not exist on disk:" echo " mrms_fp = \"${mrms_fp}\"" echo "Will attempt to retrieve from remote locations." - # Create directories if necessary + # Create directories if necessary. if [[ ! -d "${mrms_raw}/${vyyyymmdd}" ]]; then mkdir -p ${mrms_raw}/${vyyyymmdd} fi - if [[ ! -d "$mrms_proc/${vyyyymmdd}" ]]; then - mkdir -p $mrms_proc/${vyyyymmdd} + if [[ ! -d "${mrms_proc}/${vyyyymmdd}" ]]; then + mkdir -p ${mrms_proc}/${vyyyymmdd} fi valid_time=${vyyyymmdd}${vhh} @@ -408,8 +404,9 @@ echo "ihh = ${ihh}" hour=$((${hour} + 1)) # hourly increment done - fi - done + done + + fi # #----------------------------------------------------------------------- # From be542168f738cb7f3b93594bd62413ca30d4428b Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 17 Jul 2024 23:20:49 -0600 Subject: [PATCH 008/260] Minor improvement for consistency. --- parm/wflow/verify_pre.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index b7511bf63f..da43336a0d 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -64,12 +64,12 @@ task_get_obs_mrms: task_get_obs_ndas: <<: *default_task_verify_pre + command: '&LOAD_MODULES_RUN_TASK_FP; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars OBS_DIR: '&NDAS_OBS_DIR;' OBTYPE: 'NDAS' FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' - command: '&LOAD_MODULES_RUN_TASK_FP; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' queue: "&QUEUE_HPSS;" native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' From af2ab4c531aa5ca80a513f8fd164485862217b68 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 19 Jul 2024 01:39:37 -0600 Subject: [PATCH 009/260] New version of CCPA obs fetching (rename variables, include lots more comments). --- scripts/exregional_get_verif_obs.sh | 264 ++++++++++++++++++++-------- 1 file changed, 193 insertions(+), 71 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index c86ba6796a..957770e5f1 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -42,8 +42,8 @@ set -x # # {CCPA_OBS_DIR}/{YYYYMMDD}/ccpa.t{HH}z.01h.hrap.conus.gb2 # -# If data is retrieved from HPSS, it will automatically staged by this -# this script. +# If data is retrieved from HPSS, it will be automatically staged by this +# script. # # Notes about the data and how it's used for verification: # @@ -53,7 +53,7 @@ set -x # 2. There is a problem with the valid time in the metadata for files # valid from 19 - 00 UTC (or files under the '00' directory). This is # accounted for in this script for data retrieved from HPSS, but if you -# have manually staged data on disk you should be sure this is accouned +# have manually staged data on disk you should be sure this is accounted # for. See in-line comments below for details. # # @@ -167,44 +167,45 @@ echo "ihh = ${ihh}" # if [[ ${OBTYPE} == "CCPA" ]]; then - # CCPA is accumulation observations, so for hour 0 there are no files - # to retrieve. + # CCPA is accumulation observations. We do not need to retrieve any + # observed accumulations at forecast hour 0 because there aren't yet + # any accumulations in the forecast(s) to compare it to. if [[ ${current_fcst} -eq 0 ]]; then current_fcst=$((${current_fcst} + 1)) continue fi - # Accumulation is for accumulation of CCPA data to pull (hardcoded to - # 01h, see note above). + # CCPA accumulation period to consider. Here, we only retrieve data for + # 01h accumulations (see note above). Other accumulations (03h, 06h, 24h) + # are obtained elsewhere in the workflow by adding up these 01h accumulations. accum=01 - # Directory in which the daily subdirectories containing the CCPA grib2 - # files will appear after this script is done. Make sure this exists. - ccpa_proc=${OBS_DIR} - if [[ ! -d "${ccpa_proc}/${vyyyymmdd}" ]]; then - mkdir -p ${ccpa_proc}/${vyyyymmdd} - fi - - # File name within the HPSS archive file. Note that this only includes - # the valid hour in its name; the year, month, and day are specified in - # the name of the directory in which it is located within the archive. + # Base directory in which the daily subdirectories containing the CCPA + # grib2 files will appear after this script is done, and the daily such + # subdirectory for the current valid time (year, month, and day). We + # refer to these as the "processed" base and daily subdirectories because + # they contain the final files after all processing by this script is + # complete. + ccpa_basedir_proc=${OBS_DIR} + ccpa_day_dir_proc="${ccpa_basedir_proc}/${vyyyymmdd}" + # Make sure these directories exist. + mkdir -p ${ccpa_day_dir_proc} + + # Name of the grib2 file to extract from the archive (tar) file. Note + # that this only contains the valid hour; the valid year, month, and day + # are specified in the name of the directory within the archive in which + # the file is located. ccpa_fn="ccpa.t${vhh}z.${accum}h.hrap.conus.gb2" - # Full path to final location of the CCPA grib2 file for the current valid - # time. Note that this path includes the valid date (year, month, and day) - # information in the name of a subdirectory and the valid hour-of-day in - # the name of the file. - ccpa_fp_proc="${ccpa_proc}/${vyyyymmdd}/${ccpa_fn}" - - # Temporary staging directory for raw CCPA files from HPSS. These "raw" - # directories are temporary directories in which archive files from HPSS - # are placed and files within those archives extracted. Note that the - # name of this subdirectory is cycle-specific to avoid other get_obs_ccpa - # workflow tasks (i.e. those corresponding to cycles other than the current - # one) writing into the same directory. - ccpa_raw="${ccpa_proc}/raw_${iyyyymmddhh}" - - # Check if file exists on disk; if not, pull it. + # Full path to the location of the processed CCPA grib2 file for the + # current valid time. Note that this path includes the valid date (year, + # month, and day) information in the name of a subdirectory and the valid + # hour-of-day in the name of the file. + ccpa_fp_proc="${ccpa_day_dir_proc}/${ccpa_fn}" + + # Check if the CCPA grib2 file for the current valid time already exists + # at its procedded location on disk. If so, skip and go to the next valid + # time. If not, pull it. if [[ -f "${ccpa_fp_proc}" ]]; then echo "${OBTYPE} file exists on disk:" @@ -216,34 +217,152 @@ echo "ihh = ${ihh}" echo "${OBTYPE} file does not exist on disk:" echo " ccpa_fp_proc = \"${ccpa_fp_proc}\"" echo "Will attempt to retrieve from remote locations." + # + #----------------------------------------------------------------------- + # + # Below, we will use the retrieve_data.py script to retrieve the CCPA + # grib2 file from a data store (e.g. HPSS). Before doing so, note the + # following: + # + # * The daily archive (tar) file containing CCPA obs has a name of the + # form + # + # [PREFIX].YYYYMMDD.tar + # + # where YYYYMMDD is a given year, month, and day combination, and + # [PREFIX] is a string that is not relevant to the discussion here + # (the value it can take on depends on which of several time periods + # YYYYMMDD falls in, and the retrieve_data.py tries various values + # until it finds one for which a tar file exists). Unintuitively, this + # archive file contains accumulation data for valid times starting at + # hour 19 of the PREVIOUS day (YYYYMM[DD-1]) to hour 18 of the current + # day (YYYYMMDD). In other words, the valid times of the contents of + # this archive file are shifted back by 6 hours relative to the time + # string appearing in the name of the file. See section "DETAILS..." + # for a detailed description of the directory structure in the CCPA + # archive files. + # + # * We call retrieve_data.py in a temporary cycle-specific subdirectory + # in order to prevent get_obs_ccpa tasks for different cycles from + # clobbering each other's output. We refer to this as the "raw" CCPA + # base directory because it contains files as they are found in the + # archives before any processing by this script. + # + # * In each (cycle-specific) raw base directory, the data is arranged in + # daily subdirectories with the same timing as in the archive (tar) + # files (which are described in the section "DETAILS..." below). In + # particular, each daily subdirectory has the form YYYYMDD, and it may + # contain CCPA grib2 files for accumulations valid at hour 19 of the + # previous day (YYYYMM[DD-1]) to hour 18 of the current day (YYYYMMDD). + # (Data valid at hours 19-23 of the current day (YYYYMMDD) go into the + # daily subdirectory for the next day, i.e. YYYYMM[DD+1].) We refer + # to these as raw daily (sub)directories to distinguish them from the + # processed daily subdirectories under the processed (final) CCPA base + # directory (ccpa_basedir_proc). + # + # * For a given cycle, some of the valid times at which there is forecast + # output may not have a corresponding file under the raw base directory + # for that cycle. This is because another cycle that overlaps this cycle + # has already obtained the grib2 CCPA file for that valid time and placed + # it in its processed location; as a result, the retrieveal of that grib2 + # file for this cycle is skipped. + # + # * To obtain a more intuitive temporal arrangement of the data in the + # processed CCPA directory structure than the temporal arrangement used + # in the archives and raw directories, we process the raw files such + # that the data in the processed directory structure is shifted forward + # in time 6 hours relative to the data in the archives and raw directories. + # This results in a processed base directory that, like the raw base + # directory, also contains daily subdirectories of the form YYYYMMDD, + # but each such subdirectory may only contain CCPA data at valid hours + # within that day, i.e. at valid times YYYYMMDD[00, 01, ..., 23] (but + # may not contain data that is valid on the previous, next, or any other + # day). + # + # * For data between 20180718 and 20210504, the 01h accumulation data + # (which is the only accumulation we are retrieving) have incorrect + # metadata under the "00" directory in the archive files (meaning for + # hour 00 and hours 19-23, which are the ones in the "00" directory). + # Below, we use wgrib2 to make a correction for this when transferring + # (moving or copying) grib2 files from the raw daily directories to + # the processed daily directories. + # + # + # DETAILS OF DIRECTORY STRUCTURE IN CCPA ARCHIVE (TAR) FILES + # ---------------------------------------------------------- + # + # The daily archive file containing CCPA obs is named + # + # [PREFIX].YYYYMMDD.tar + # + # This file contains accumulation data for valid times starting at hour + # 19 of the PREVIOUS day (YYYYMM[DD-1]) to hour 18 of the current day + # (YYYYMMDD). In particular, when untarred, the daily archive file + # expands into four subdirectories: 00, 06, 12, and 18. The 06, 12, and + # 18 subdirectories contain grib2 files for accumulations valid at or + # below the hour-of-day given by the subdirectory name (and on YYYYMMDD). + # For example, the 06 directory contains data valid at: + # + # * YYYYMMDD[01, 02, 03, 04, 05, 06] for 01h accumulations; + # * YYYYMMDD[03, 06] for 03h accumulations; + # * YYYYMMDD[06] for 06h accumulations. + # + # The valid times for the data in the 12 and 18 subdirectories are + # analogous. However, the 00 subdirectory is different in that it + # contains accumulations at hour 00 on YYYYMMDD as well as ones BEFORE + # this time, i.e. the data for valid times other than YYYYMMDD00 are on + # the PREVIOUS day. Thus, the 00 subdirectory contains data valid at + # (note the DD-1, meaning one day prior): + # + # * YYYYMM[DD-1][19, 20, 21, 22, 23] and YYYYMMDD00 for 01h accumulations; + # * YYYYMM[DD-1][19] and YYYYMMDD00 for 03h accumulations; + # * YYYYMMDD00 for 06h accumulations. + # + #----------------------------------------------------------------------- + # - # Create the necessary raw (sub)directories on disk. Note that we need - # to create a subdirectory for 1 day + the current valid date because - # that is needed to get around a metadata error in the CCPA files on HPSS - # (in particular, one hour CCPA files have incorrect metadata in the files - # under the "00" directory from 20180718 to 20210504). - if [[ ! -d "${ccpa_raw}/${vyyyymmdd}" ]]; then - mkdir -p ${ccpa_raw}/${vyyyymmdd} - fi - if [[ ! -d "${ccpa_raw}/${vyyyymmdd_p1}" ]]; then - mkdir -p ${ccpa_raw}/${vyyyymmdd_p1} - fi - - valid_time=${vyyyymmdd}${vhh} - output_path="${ccpa_raw}/${vyyyymmdd}" - if [[ ${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23 ]]; then + # Set parameters for retrieving CCPA data using retrieve_data.py. + # Definitions: + # + # valid_time: + # The valid time in the name of the archive (tar) file from which data + # will be pulled. Due to the way the data is arranged in the CCPA archive + # files (as described above), for valid hours 19 to 23 of the current day, + # this must be set to the corresponding valid time on the NEXT day. + # + # ccpa_basedir_raw: + # Raw base directory that will contain the raw daily subdirectory in which + # the retrieved CCPA grib2 file will be placed. Note that this must be + # cycle-dependent (where the cycle is given by the variable iyyyymmddhh) + # to avoid get_obs_ccpa workflow tasks for other cycles writing to the + # same directories/files. Note also that this doesn't have to depend on + # the current valid hour (0-18 vs. 19-23), but for clarity and ease of + # debugging, here we do make it valid-hour-dependent. + # + # ccpa_day_dir_raw: + # Raw daily subdirectory under the raw base directory. This is dependent + # on the valid hour (i.e. different for hours 19-23 than for hours 0-18) + # in order to maintain the same data timing arrangement in the raw daily + # directories as in the archive files. + # + if [[ ${vhh_noZero} -ge 0 && ${vhh_noZero} -le 18 ]]; then + valid_time=${vyyyymmdd}${vhh} + ccpa_basedir_raw="${ccpa_basedir_proc}/raw_${iyyyymmddhh}" + ccpa_day_dir_raw="${ccpa_basedir_raw}/${vyyyymmdd}" + elif [[ ${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23 ]]; then valid_time=${vyyyymmdd_p1}${vhh} - output_path="${ccpa_raw}/${vyyyymmdd_p1}" + ccpa_basedir_raw="${ccpa_basedir_proc}/raw_${iyyyymmddhh}_vhh19-23" + ccpa_day_dir_raw="${ccpa_basedir_raw}/${vyyyymmdd_p1}" fi + mkdir -p ${ccpa_day_dir_raw} - # The retrieve_data.py script below uses the current working directory as - # the location into which to extract the contents of the HPSS archive (tar) - # file. Thus, if there are multiple get_obs_ccpa tasks running (i.e. ones - # for different cycles), they will be extracting files into the same (current) - # directory. That causes errors in the workflow. To avoid this, change - # location to the raw directory. This will avoid such errors because the - # raw directory has a cycle-specific name. - cd ${ccpa_raw} + # Before calling retrieve_data.py, change location to the raw base + # directory to avoid get_obs_ccpa tasks for other cycles from clobbering + # the output from this call to retrieve_data.py. Note that retrieve_data.py + # extracts the CCPA tar files into the directory it was called from, + # which is the working directory of this script right before retrieve_data.py + # is called. + cd ${ccpa_basedir_raw} # Pull CCPA data from HPSS. This will get a single grib2 (.gb2) file # corresponding to the current valid time (valid_time). @@ -255,7 +374,7 @@ echo "ihh = ${ihh}" --cycle_date ${valid_time} \ --data_stores hpss \ --data_type CCPA_obs \ - --output_path ${output_path} \ + --output_path ${ccpa_day_dir_raw} \ --summary_file ${logfile}" echo "CALLING: ${cmd}" @@ -266,38 +385,41 @@ echo "ihh = ${ihh}" ${cmd} " - # Move CCPA file to its final location. + # Create the processed CCPA grib2 files. This usually consists of just + # moving or copying the raw file to its processed location, but for valid + # times between 20180718 and 20210504, it involves using wgrib2 to correct + # an error in the metadata of the raw file and writing the corrected data + # to a new grib2 file in the processed location. # # Since this script is part of a workflow, other tasks (for other cycles) # that call this script may have extracted and placed the current file - # in its final location between the time we checked for its existence - # above above (and didn't find it) and now. This can happen because - # there can be overlap between the verification times for the current - # cycle and those of other cycles. For this reason, check again for the - # existence of the file in its final location. If it's already been - # created by another task, don't bother to move it from its raw location - # to its final location. + # in its processed location between the time we checked for its existence + # above (and didn't find it) and now. This can happen because there can + # be overlap between the verification times for the current cycle and + # those of other cycles. For this reason, check again for the existence + # of the file in its processed location. If it has already been created + # by another task, don't bother to create it. if [[ -f "${ccpa_fp_proc}" ]]; then echo "${OBTYPE} file exists on disk:" echo " ccpa_fp_proc = \"{ccpa_fp_proc}\"" - echo "It was likely created by a get_obs_ccpa workflow task for another cycle." - echo "NOT moving file from its temporary (raw) location to its final location." + echo "It was likely created by a get_obs_ccpa workflow task for another cycle that overlaps the current one." + echo "NOT moving or copying file from its raw location to its processed location." else # Full path to the CCPA file that was pulled and extracted above and # placed in the raw directory. - ccpa_fp_raw="${output_path}/${ccpa_fn}" + ccpa_fp_raw="${ccpa_day_dir_raw}/${ccpa_fn}" - # One hour CCPA files have incorrect metadata in the files under the "00" - # directory from 20180718 to 20210504. After data is pulled, reorganize - # into correct valid yyyymmdd structure. #mv_or_cp="mv" mv_or_cp="cp" if [[ ${vhh_noZero} -ge 1 && ${vhh_noZero} -le 18 ]]; then ${mv_or_cp} ${ccpa_fp_raw} ${ccpa_fp_proc} elif [[ (${vhh_noZero} -eq 0) || (${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23) ]]; then + # One hour CCPA files have incorrect metadata in the files under the "00" + # directory from 20180718 to 20210504. After data is pulled, reorganize + # into correct valid yyyymmdd structure. if [[ ${vyyyymmdd} -ge 20180718 && ${vyyyymmdd} -le 20210504 ]]; then wgrib2 ${ccpa_fp_raw} -set_date -24hr -grib ${ccpa_fp_proc} -s else From 85c3d58a0c855ea347a2350c62b0eae88ac38bee Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 19 Jul 2024 13:53:26 -0600 Subject: [PATCH 010/260] Minor changes to ccpa section. --- scripts/exregional_get_verif_obs.sh | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 957770e5f1..1e49d1f45c 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -138,6 +138,11 @@ fcst_length=$((10#${fcst_length})) current_fcst=0 while [[ ${current_fcst} -le ${fcst_length} ]]; do + +echo +echo "HELLO GGGGGGGG" +echo "current_fcst = ${current_fcst}" + # Calculate valid date info using date utility vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" +%Y%m%d%H) unix_vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" "+%Y-%m-%d %H:00:00") @@ -191,10 +196,8 @@ echo "ihh = ${ihh}" # Make sure these directories exist. mkdir -p ${ccpa_day_dir_proc} - # Name of the grib2 file to extract from the archive (tar) file. Note - # that this only contains the valid hour; the valid year, month, and day - # are specified in the name of the directory within the archive in which - # the file is located. + # Name of the grib2 file to extract from the archive (tar) file as well + # as the name of the processed grib2 file. ccpa_fn="ccpa.t${vhh}z.${accum}h.hrap.conus.gb2" # Full path to the location of the processed CCPA grib2 file for the @@ -391,14 +394,14 @@ echo "ihh = ${ihh}" # an error in the metadata of the raw file and writing the corrected data # to a new grib2 file in the processed location. # - # Since this script is part of a workflow, other tasks (for other cycles) - # that call this script may have extracted and placed the current file - # in its processed location between the time we checked for its existence - # above (and didn't find it) and now. This can happen because there can - # be overlap between the verification times for the current cycle and - # those of other cycles. For this reason, check again for the existence - # of the file in its processed location. If it has already been created - # by another task, don't bother to create it. + # Since this script is part of a workflow, another get_obs_ccpa task (i.e. + # for another cycle) may have extracted and placed the current file in its + # processed location between the time we checked for its existence above + # (and didn't find it) and now. This can happen because there can be + # overlap between the verification times for the current cycle and those + # of other cycles. For this reason, check again for the existence of the + # processed file. If it has already been created by another get_obs_ccpa + # task, don't bother to recreate it. if [[ -f "${ccpa_fp_proc}" ]]; then echo "${OBTYPE} file exists on disk:" From b7c6f00d7b75f0534fd1e2789e90d09c787c4309 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 19 Jul 2024 13:56:25 -0600 Subject: [PATCH 011/260] Changes for MRMS. --- scripts/exregional_get_verif_obs.sh | 213 +++++++++++++++++++--------- 1 file changed, 143 insertions(+), 70 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 1e49d1f45c..3ae8405e36 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -442,96 +442,169 @@ echo "ihh = ${ihh}" # elif [[ ${OBTYPE} == "MRMS" ]]; then - # Top-level MRMS directory + # Base directory in which the daily subdirectories containing the MRMS + # grib2 files for REFC (composite reflectivity) and REFC (echo top) will + # appear after this script is done, and the daily such subdirectory for + # the current valid time (year, month, and day). We refer to these as + # the "processed" base and daily subdirectories because they contain the + # final files after all processing by this script is complete. + mrms_basedir_proc=${OBS_DIR} + mrms_day_dir_proc="${mrms_basedir_proc}/${vyyyymmdd}" + + # For each field (REFC and RETOP), check if file exists on disk; if not, pull it. + for field in ${VAR[@]}; do + + # Set parameters needed in setting the MRMS grib2 file name to create in the day directory. + if [ "${field}" = "REFC" ]; then + file_base_name="MergedReflectivityQCComposite" + level="_00.50_" + elif [ "${field}" = "RETOP" ]; then + file_base_name="EchoTop" + level="_18_00.50_" + else + echo "Invalid field: ${field}" + print_err_msg_exit "\ + Invalid field specified: ${field} + + Valid options are 'REFC', 'RETOP'. +" + fi - # Reorganized MRMS location - mrms_proc=${OBS_DIR} +# Name of the MRMS grib2 file for the current field and valid time that +# will appear in the processed daily subdirectory after this script finishes. + mrms_fn="${file_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" + +# Full path to the processed MRMS grib2 file for the current field and +# valid time. + mrms_fp_proc="${mrms_day_dir_proc}/${mrms_fn}" - mrms_day_dir="${mrms_proc}/${vyyyymmdd}" +# Check if the processed MRMS grib2 file for the current field and valid +# time already exists on disk. If so, skip and go to the next valid time. +# If not, pull it. + if [[ -f "${mrms_fp_proc}" ]]; then - if [[ -d "${mrms_day_dir}" ]]; then + echo "${OBTYPE} file exists on disk:" + echo " mrms_fp_proc = \"${mrms_fp_proc}\"" + echo "Will NOT attempt to retrieve from remote locations." - echo "${OBTYPE} directory for day ${vyyyymmdd} exists on disk:" - echo " mrms_day_dir = \"${mrms_day_dir}\"" - echo "This means observation files for this field and all hours of this day have been or are being retrieved." - echo "Thus, we will NOT attempt to retrieve the current data from remote locations" + else - else + echo "${OBTYPE} file does not exist on disk:" + echo " mrms_fp_proc = \"${mrms_fp_proc}\"" + echo "Will attempt to retrieve from remote locations." - # For each field (REFC and RETOP), check if file exists on disk; if not, pull it. - for field in ${VAR[@]}; do - - # raw MRMS data from HPSS - #mrms_raw=${OBS_DIR}/raw - #mrms_raw="${mrms_proc}/raw_${field}_${iyyyymmddhh}" - mrms_raw="${mrms_proc}/raw_${iyyyymmddhh}" + # Raw base directory that will contain the raw daily subdirectory in which + # the gzipped MRMS grib2 retrieved from archive file will be placed. Note + # that the name of this directory depends on (contains) the valid year, + # month, and day (but not on the cycle, i.e. not on iyyyymmddhh) in order + # to avoid having get_obs_mrms tasks from other cycles clobbering the + # output from this one. It is also possible to make this directory name + # depend instead on the cycle, but that turns out to cause an inefficiency + # in that get_obs_mrms tasks for different cycles will not be able to + # detect that another cycle has already retrieved the data for the current + # valid day from an archive and will unnecessarily repeat the retrieval. + #mrms_basedir_raw="${mrms_basedir_proc}/raw_${iyyyymmddhh}" + mrms_basedir_raw="${mrms_basedir_proc}/raw_${vyyyymmdd}" - if [ "${field}" = "REFC" ]; then - field_base_name="MergedReflectivityQCComposite" - level="_00.50_" - elif [ "${field}" = "RETOP" ]; then - field_base_name="EchoTop" - level="_18_00.50_" + # Raw daily subdirectory under the raw base directory. + mrms_day_dir_raw="${mrms_basedir_raw}/${vyyyymmdd}" + + +# Check if the raw daily directory already exists on disk. If so, it +# means all the gzipped MRMS grib2 files -- i.e. for both REFC and RETOP +# and for all times (hours, minutes, and seconds) in the current valid +# day -- have already been or are in the process of being retrieved from +# the archive (tar) files. If so, skip the retrieval process. If not, +# proceed to retrieve all the files and place them in the raw daily +# directory. + if [[ -d "${mrms_day_dir_raw}" ]]; then + +# Change the following comments. + echo "${OBTYPE} directory for day ${vyyyymmdd} exists on disk:" + echo " mrms_day_dir_proc = \"${mrms_day_dir_proc}\"" + echo "This means observation files for this field and all hours of this day have been or are being retrieved." + echo "Thus, we will NOT attempt to retrieve the current data from remote locations" + else - echo "Invalid field: ${field}" - print_err_msg_exit "\ - Invalid field specified: ${field} - - Valid options are 'REFC', 'RETOP'. -" - fi - mrms_fn="${field_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" - mrms_fp="${mrms_proc}/${vyyyymmdd}/${mrms_fn}" + mkdir -p ${mrms_day_dir_raw} + valid_time=${vyyyymmdd}${vhh} + + cd ${mrms_basedir_raw} + +# Use the retrieve_data.py script to retrieve all the gzipped MRMS grib2 +# files -- i.e. for both REFC and RETOP and for all times (hours, minutes, +# and seconds) in the current valid day -- and place them in the raw daily +# directory. Note that this will pull both the REFC and RETOP files in +# one call. + cmd=" + python3 -u ${USHdir}/retrieve_data.py \ + --debug \ + --file_set obs \ + --config ${PARMdir}/data_locations.yml \ + --cycle_date ${valid_time} \ + --data_stores hpss \ + --data_type MRMS_obs \ + --output_path ${mrms_day_dir_raw} \ + --summary_file ${logfile}" + + echo "CALLING: ${cmd}" + + $cmd || print_err_msg_exit "\ + Could not retrieve MRMS data from HPSS + + The following command exited with a non-zero exit status: + ${cmd} +" - echo "${OBTYPE} file for field \"${field}\" does not exist on disk:" - echo " mrms_fp = \"${mrms_fp}\"" - echo "Will attempt to retrieve from remote locations." +# Create a flag file that can be used to confirm the completion of the +# retrieval of all files for the current valid day. + touch ${mrms_day_dir_raw}/pull_completed.txt - # Create directories if necessary. - if [[ ! -d "${mrms_raw}/${vyyyymmdd}" ]]; then - mkdir -p ${mrms_raw}/${vyyyymmdd} - fi - if [[ ! -d "${mrms_proc}/${vyyyymmdd}" ]]; then - mkdir -p ${mrms_proc}/${vyyyymmdd} fi - valid_time=${vyyyymmdd}${vhh} - output_path="${mrms_raw}/${vyyyymmdd}" +# Make sure the retrieval process for the current day (which may have +# been executed above for this cycle or by another cycle) has completed +# by checking for the existence of the flag file that marks complettion. +# If not, keep checking until the flag file shows up. + while [[ ! -f "${mrms_day_dir_raw}/pull_completed.txt" ]]; do + echo "Waiting for the retrieval process for valid day ${vyyyymmdd} to complete..." + sleep 5s + done - cd ${mrms_raw} - # Pull MRMS data from HPSS - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${valid_time} \ - --data_stores hpss \ - --data_type MRMS_obs \ - --output_path ${output_path} \ - --summary_file ${logfile}" +# Since this script is part of a workflow, another get_obs_mrms task (i.e. +# for another cycle) may have extracted and placed the current file in its +# processed location between the time we checked for its existence above +# (and didn't find it) and now. This can happen because there can be +# overlap between the verification times for the current cycle and those +# of other cycles. For this reason, check again for the existence of the +# processed file. If it has already been created by another get_obs_mrms +# task, don't bother to recreate it. + if [[ -f "${mrms_fp_proc}" ]]; then - echo "CALLING: ${cmd}" + echo "${OBTYPE} file exists on disk:" + echo " mrms_fp_proc = \"${mrms_fp_proc}\"" + echo "Will NOT attempt to retrieve from remote locations." - $cmd || print_err_msg_exit "\ - Could not retrieve MRMS data from HPSS + else - The following command exited with a non-zero exit status: - ${cmd} -" +# Search the raw daily directory for the current valid day to find the +# gizipped MRMS grib2 file whose time stamp (in the file name) is closest +# to the current valid day and hour. Then unzip that file and copy it +# to the processed daily directory, in the process renaming it to replace +# the minutes and hours in the file name with "0000". + valid_time=${vyyyymmdd}${vhh} + python ${USHdir}/mrms_pull_topofhour.py \ + --valid_time ${valid_time} \ + --outdir ${mrms_basedir_proc} \ + --source ${mrms_basedir_raw} \ + --product ${file_base_name} - hour=0 - while [[ ${hour} -le 23 ]]; do - HH=$(printf "%02d" $hour) - echo "hour=${hour}" - python ${USHdir}/mrms_pull_topofhour.py --valid_time ${vyyyymmdd}${HH} --outdir ${mrms_proc} --source ${mrms_raw} --product ${field_base_name} - hour=$((${hour} + 1)) # hourly increment - done + fi - done + fi - fi + done # #----------------------------------------------------------------------- # From 2bc8ed1c65bd61c52b490d838dd49afb4d11c95b Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 19 Jul 2024 16:04:45 -0600 Subject: [PATCH 012/260] Clean up comments in the MRMS section. --- scripts/exregional_get_verif_obs.sh | 130 +++++++++++++++------------- 1 file changed, 70 insertions(+), 60 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 3ae8405e36..254b5166a3 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -444,17 +444,17 @@ echo "ihh = ${ihh}" # Base directory in which the daily subdirectories containing the MRMS # grib2 files for REFC (composite reflectivity) and REFC (echo top) will - # appear after this script is done, and the daily such subdirectory for - # the current valid time (year, month, and day). We refer to these as - # the "processed" base and daily subdirectories because they contain the - # final files after all processing by this script is complete. + # be located after this script is done, and the daily such subdirectory + # for the current valid time (year, month, and day). We refer to these + # as the "processed" base and daily subdirectories because they contain + # the final files after all processing by this script is complete. mrms_basedir_proc=${OBS_DIR} mrms_day_dir_proc="${mrms_basedir_proc}/${vyyyymmdd}" - # For each field (REFC and RETOP), check if file exists on disk; if not, pull it. + # Loop over the fields (REFC and RETOP). for field in ${VAR[@]}; do - # Set parameters needed in setting the MRMS grib2 file name to create in the day directory. + # Set field-dependent parameters needed in forming grib2 file names. if [ "${field}" = "REFC" ]; then file_base_name="MergedReflectivityQCComposite" level="_00.50_" @@ -470,17 +470,20 @@ echo "ihh = ${ihh}" " fi -# Name of the MRMS grib2 file for the current field and valid time that -# will appear in the processed daily subdirectory after this script finishes. + # Name of the MRMS grib2 file for the current field and valid time that + # will appear in the processed daily subdirectory after this script finishes. + # This is the name of the processed file. Note that this is generally + # not the name of the gzipped grib2 files that may be retrieved below + # from archive files using the retrieve_data.py script. mrms_fn="${file_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" -# Full path to the processed MRMS grib2 file for the current field and -# valid time. + # Full path to the processed MRMS grib2 file for the current field and + # valid time. mrms_fp_proc="${mrms_day_dir_proc}/${mrms_fn}" -# Check if the processed MRMS grib2 file for the current field and valid -# time already exists on disk. If so, skip and go to the next valid time. -# If not, pull it. + # Check if the processed MRMS grib2 file for the current field and valid + # time already exists on disk. If so, skip this valid time and go to the + # next one. If not, pull it. if [[ -f "${mrms_fp_proc}" ]]; then echo "${OBTYPE} file exists on disk:" @@ -493,50 +496,57 @@ echo "ihh = ${ihh}" echo " mrms_fp_proc = \"${mrms_fp_proc}\"" echo "Will attempt to retrieve from remote locations." - # Raw base directory that will contain the raw daily subdirectory in which - # the gzipped MRMS grib2 retrieved from archive file will be placed. Note - # that the name of this directory depends on (contains) the valid year, - # month, and day (but not on the cycle, i.e. not on iyyyymmddhh) in order - # to avoid having get_obs_mrms tasks from other cycles clobbering the - # output from this one. It is also possible to make this directory name - # depend instead on the cycle, but that turns out to cause an inefficiency - # in that get_obs_mrms tasks for different cycles will not be able to - # detect that another cycle has already retrieved the data for the current - # valid day from an archive and will unnecessarily repeat the retrieval. - #mrms_basedir_raw="${mrms_basedir_proc}/raw_${iyyyymmddhh}" + # Base directory that will contain the daily subdirectories in which the + # gzipped MRMS grib2 files retrieved from archive files will be placed, + # and the daily subdirectory for the current valid year, month, and day. + # We refer to these as the "raw" MRMS base and daily directories because + # they contain files as they are found in the archives before any processing + # by this script. + # + # Note that the name of the raw base directory depends on (contains) the + # valid year, month, and day (but not on the cycle, i.e. not on iyyyymmddhh) + # in order to avoid having get_obs_mrms tasks from other cycles clobbering + # the output from this one. It is also possible to make the name of this + # directory name depend instead on the cycle, but that turns out to cause + # an inefficiency in that get_obs_mrms tasks for different cycles will + # not be able to detect that another cycle has already retrieved the data + # for the current valid day will unnecessarily repeat the retrieval. mrms_basedir_raw="${mrms_basedir_proc}/raw_${vyyyymmdd}" - - # Raw daily subdirectory under the raw base directory. mrms_day_dir_raw="${mrms_basedir_raw}/${vyyyymmdd}" -# Check if the raw daily directory already exists on disk. If so, it -# means all the gzipped MRMS grib2 files -- i.e. for both REFC and RETOP -# and for all times (hours, minutes, and seconds) in the current valid -# day -- have already been or are in the process of being retrieved from -# the archive (tar) files. If so, skip the retrieval process. If not, -# proceed to retrieve all the files and place them in the raw daily -# directory. + # Check if the raw daily directory already exists on disk. If so, it + # means all the gzipped MRMS grib2 files -- i.e. for both REFC and RETOP + # and for all times (hours, minutes, and seconds) in the current valid + # day -- have already been or are in the process of being retrieved from + # the archive (tar) files. If so, skip the retrieval process. If not, + # proceed to retrieve all the files and place them in the raw daily + # directory. if [[ -d "${mrms_day_dir_raw}" ]]; then -# Change the following comments. echo "${OBTYPE} directory for day ${vyyyymmdd} exists on disk:" echo " mrms_day_dir_proc = \"${mrms_day_dir_proc}\"" - echo "This means observation files for this field and all hours of this day have been or are being retrieved." - echo "Thus, we will NOT attempt to retrieve the current data from remote locations" + echo "This means MRMS files for all hours of the current valid day (${vyyyymmdd}) have been or are being retrieved." + echo "Thus, we will NOT attempt to retrieve MRMS data for the current valid time from remote locations." else mkdir -p ${mrms_day_dir_raw} valid_time=${vyyyymmdd}${vhh} + # Before calling retrieve_data.py, change location to the raw base + # directory to avoid get_obs_mrms tasks for other cycles from clobbering + # the output from this call to retrieve_data.py. Note that retrieve_data.py + # extracts the MRMS tar files into the directory it was called from, + # which is the working directory of this script right before retrieve_data.py + # is called. cd ${mrms_basedir_raw} -# Use the retrieve_data.py script to retrieve all the gzipped MRMS grib2 -# files -- i.e. for both REFC and RETOP and for all times (hours, minutes, -# and seconds) in the current valid day -- and place them in the raw daily -# directory. Note that this will pull both the REFC and RETOP files in -# one call. + # Use the retrieve_data.py script to retrieve all the gzipped MRMS grib2 + # files -- i.e. for both REFC and RETOP and for all times (hours, minutes, + # and seconds) in the current valid day -- and place them in the raw daily + # directory. Note that this will pull both the REFC and RETOP files in + # one call. cmd=" python3 -u ${USHdir}/retrieve_data.py \ --debug \ @@ -557,29 +567,29 @@ echo "ihh = ${ihh}" ${cmd} " -# Create a flag file that can be used to confirm the completion of the -# retrieval of all files for the current valid day. + # Create a flag file that can be used to confirm the completion of the + # retrieval of all files for the current valid day. touch ${mrms_day_dir_raw}/pull_completed.txt fi -# Make sure the retrieval process for the current day (which may have -# been executed above for this cycle or by another cycle) has completed -# by checking for the existence of the flag file that marks complettion. -# If not, keep checking until the flag file shows up. + # Make sure the retrieval process for the current day (which may have + # been executed above for this cycle or by another cycle) has completed + # by checking for the existence of the flag file that marks completion. + # If not, keep checking until the flag file shows up. while [[ ! -f "${mrms_day_dir_raw}/pull_completed.txt" ]]; do echo "Waiting for the retrieval process for valid day ${vyyyymmdd} to complete..." sleep 5s done -# Since this script is part of a workflow, another get_obs_mrms task (i.e. -# for another cycle) may have extracted and placed the current file in its -# processed location between the time we checked for its existence above -# (and didn't find it) and now. This can happen because there can be -# overlap between the verification times for the current cycle and those -# of other cycles. For this reason, check again for the existence of the -# processed file. If it has already been created by another get_obs_mrms -# task, don't bother to recreate it. + # Since this script is part of a workflow, another get_obs_mrms task (i.e. + # for another cycle) may have extracted and placed the current file in its + # processed location between the time we checked for its existence above + # (and didn't find it) and now. This can happen because there can be + # overlap between the verification times for the current cycle and those + # of other cycles. For this reason, check again for the existence of the + # processed file. If it has already been created by another get_obs_mrms + # task, don't bother to recreate it. if [[ -f "${mrms_fp_proc}" ]]; then echo "${OBTYPE} file exists on disk:" @@ -588,11 +598,11 @@ echo "ihh = ${ihh}" else -# Search the raw daily directory for the current valid day to find the -# gizipped MRMS grib2 file whose time stamp (in the file name) is closest -# to the current valid day and hour. Then unzip that file and copy it -# to the processed daily directory, in the process renaming it to replace -# the minutes and hours in the file name with "0000". + # Search the raw daily directory for the current valid day to find the + # gizipped MRMS grib2 file whose time stamp (in the file name) is closest + # to the current valid day and hour. Then unzip that file and copy it + # to the processed daily directory, in the process renaming it to replace + # the minutes and hours in the file name with "0000". valid_time=${vyyyymmdd}${vhh} python ${USHdir}/mrms_pull_topofhour.py \ --valid_time ${valid_time} \ From 184534283613620a0d4d88cee26d5cd03b45dc99 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 19 Jul 2024 18:29:44 -0600 Subject: [PATCH 013/260] Minor fixes to NDAS section. --- scripts/exregional_get_verif_obs.sh | 36 ++++++++++++++++++----------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 254b5166a3..c31795441a 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -630,7 +630,7 @@ echo "ihh = ${ihh}" ndas_proc=${OBS_DIR} # Check if file exists on disk - ndas_file="$ndas_proc/prepbufr.ndas.${vyyyymmdd}${vhh}" + ndas_file="${ndas_proc}/prepbufr.ndas.${vyyyymmdd}${vhh}" if [[ -f "${ndas_file}" ]]; then echo "${OBTYPE} file exists on disk:" echo "${ndas_file}" @@ -643,7 +643,7 @@ echo "ihh = ${ihh}" # # The "tm" here means "time minus", so nam.t12z.prepbufr.tm00.nr is valid for 12z, # nam.t00z.prepbufr.tm03.nr is valid for 21z the previous day, etc. - # This means that every six hours we have to obs files valid for the same time: + # This means that every six hours we have two obs files valid for the same time: # nam.tHHz.prepbufr.tm00.nr and nam.t[HH+6]z.prepbufr.tm06.nr # We want to use the tm06 file because it contains more/better obs (confirmed with EMC: even # though the earlier files are larger, this is because the time window is larger) @@ -657,18 +657,24 @@ echo "ihh = ${ihh}" continue fi + # Whether to move or copy extracted files from the raw directories to their + # final locations. + #mv_or_cp="mv" + mv_or_cp="cp" + echo "" echo "HELLO AAAAA" echo "vhh_noZero = ${vhh_noZero}" - if [[ ${vhh_noZero} -eq 0 || ${vhh_noZero} -eq 6 || ${vhh_noZero} -eq 12 || ${vhh_noZero} -eq 18 ]]; then + if [[ ${vhh_noZero} -eq 0 || ${vhh_noZero} -eq 6 || \ + ${vhh_noZero} -eq 12 || ${vhh_noZero} -eq 18 ]]; then echo "" echo "HELLO BBBBB" if [[ ! -d "$ndas_raw/${vyyyymmdd}${vhh}" ]]; then echo "" echo "HELLO CCCCC" - mkdir -p $ndas_raw/${vyyyymmdd}${vhh} + mkdir -p ${ndas_raw}/${vyyyymmdd}${vhh} fi # Pull NDAS data from HPSS @@ -680,7 +686,7 @@ echo "HELLO CCCCC" --cycle_date ${vyyyymmdd}${vhh} \ --data_stores hpss \ --data_type NDAS_obs \ - --output_path $ndas_raw/${vyyyymmdd}${vhh} \ + --output_path ${ndas_raw}/${vyyyymmdd}${vhh} \ --summary_file ${logfile}" echo "CALLING: ${cmd}" @@ -692,8 +698,8 @@ echo "HELLO CCCCC" ${cmd} " - if [[ ! -d "$ndas_proc" ]]; then - mkdir -p $ndas_proc + if [[ ! -d "${ndas_proc}" ]]; then + mkdir -p ${ndas_proc} fi # copy files from the previous 6 hours ("tm" means "time minus") @@ -702,7 +708,8 @@ echo "HELLO CCCCC" vyyyymmddhh_tm=$($DATE_UTIL -d "${unix_vdate} ${tm} hours ago" +%Y%m%d%H) tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') - cp $ndas_raw/${vyyyymmdd}${vhh}/nam.t${vhh}z.prepbufr.tm${tm2}.nr $ndas_proc/prepbufr.ndas.${vyyyymmddhh_tm} + ${mv_or_cp} ${ndas_raw}/${vyyyymmdd}${vhh}/nam.t${vhh}z.prepbufr.tm${tm2}.nr \ + ${ndas_proc}/prepbufr.ndas.${vyyyymmddhh_tm} done fi @@ -720,8 +727,8 @@ echo "HELLO CCCCC" vhh=${vhh_noZero} fi - if [[ ! -d "$ndas_raw/${vyyyymmdd}${vhh}" ]]; then - mkdir -p $ndas_raw/${vyyyymmdd}${vhh} + if [[ ! -d "${ndas_raw}/${vyyyymmdd}${vhh}" ]]; then + mkdir -p ${ndas_raw}/${vyyyymmdd}${vhh} fi # Pull NDAS data from HPSS @@ -733,7 +740,7 @@ echo "HELLO CCCCC" --cycle_date ${vyyyymmdd}${vhh} \ --data_stores hpss \ --data_type NDAS_obs \ - --output_path $ndas_raw/${vyyyymmdd}${vhh} \ + --output_path ${ndas_raw}/${vyyyymmdd}${vhh} \ --summary_file ${logfile}" echo "CALLING: ${cmd}" @@ -745,8 +752,8 @@ echo "HELLO CCCCC" ${cmd} " - if [[ ! -d "$ndas_proc" ]]; then - mkdir -p $ndas_proc + if [[ ! -d "${ndas_proc}" ]]; then + mkdir -p ${ndas_proc} fi for tm in $(seq 1 6); do @@ -755,7 +762,8 @@ echo "HELLO CCCCC" vyyyymmddhh_tm=$($DATE_UTIL -d "${unix_fdate} ${tm} hours ago" +%Y%m%d%H) tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') - cp $ndas_raw/${vyyyymmdd}${vhh}/nam.t${vhh}z.prepbufr.tm${tm2}.nr $ndas_proc/prepbufr.ndas.${vyyyymmddhh_tm} + ${mv_or_cp} ${ndas_raw}/${vyyyymmdd}${vhh}/nam.t${vhh}z.prepbufr.tm${tm2}.nr \ + ${ndas_proc}/prepbufr.ndas.${vyyyymmddhh_tm} done fi From 8c38c19a54642506b4a6dca673a6aaabf667e066 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sat, 20 Jul 2024 10:21:58 -0600 Subject: [PATCH 014/260] Change names of raw directories for CCPA and MRMS to indicate whether they're per-cycle or per-day. --- scripts/exregional_get_verif_obs.sh | 77 ++++++++++++++--------------- 1 file changed, 38 insertions(+), 39 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index c31795441a..4427434b1c 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -36,7 +36,7 @@ set -x # # CCPA (Climatology-Calibrated Precipitation Analysis) precipitation accumulation obs # ---------- -# If data is available on disk, it must be in the following +# If data is available on disk, it must be in the following # directory structure and file name conventions expected by verification # tasks: # @@ -46,8 +46,8 @@ set -x # script. # # Notes about the data and how it's used for verification: -# -# 1. Accumulation is currently hardcoded to 01h. The verification will +# +# 1. Accumulation is currently hardcoded to 01h. The verification will # use MET/pcp-combine to sum 01h files into desired accumulations. # # 2. There is a problem with the valid time in the metadata for files @@ -59,17 +59,17 @@ set -x # # MRMS (Multi-Radar Multi-Sensor) radar observations # ---------- -# If data is available on disk, it must be in the following +# If data is available on disk, it must be in the following # directory structure and file name conventions expected by verification # tasks: # # {MRMS_OBS_DIR}/{YYYYMMDD}/[PREFIX]{YYYYMMDD}-{HH}0000.grib2, -# +# # Where [PREFIX] is MergedReflectivityQCComposite_00.50_ for reflectivity # data and EchoTop_18_00.50_ for echo top data. If data is not available # at the top of the hour, you should rename the file closest in time to # your hour(s) of interest to the above naming format. A script -# "ush/mrms_pull_topofhour.py" is provided for this purpose. +# "ush/mrms_pull_topofhour.py" is provided for this purpose. # # If data is retrieved from HPSS, it will automatically staged by this # this script. @@ -77,30 +77,30 @@ set -x # # NDAS (NAM Data Assimilation System) conventional observations # ---------- -# If data is available on disk, it must be in the following +# If data is available on disk, it must be in the following # directory structure and file name conventions expected by verification # tasks: # # {NDAS_OBS_DIR}/{YYYYMMDD}/prepbufr.ndas.{YYYYMMDDHH} -# +# # Note that data retrieved from HPSS and other sources may be in a -# different format: nam.t{hh}z.prepbufr.tm{prevhour}.nr, where hh is +# different format: nam.t{hh}z.prepbufr.tm{prevhour}.nr, where hh is # either 00, 06, 12, or 18, and prevhour is the number of hours prior to # hh (00 through 05). If using custom staged data, you will have to # rename the files accordingly. -# +# # If data is retrieved from HPSS, it will be automatically staged by this # this script. # # # NOHRSC snow accumulation observations # ---------- -# If data is available on disk, it must be in the following +# If data is available on disk, it must be in the following # directory structure and file name conventions expected by verification # tasks: # # {NOHRSC_OBS_DIR}/{YYYYMMDD}/sfav2_CONUS_{AA}h_{YYYYMMDD}{HH}_grid184.grb2 -# +# # where AA is the 2-digit accumulation duration in hours: 06 or 24 # # METplus is configured to verify snowfall using 06- and 24-h accumulated @@ -143,7 +143,7 @@ echo echo "HELLO GGGGGGGG" echo "current_fcst = ${current_fcst}" - # Calculate valid date info using date utility + # Calculate valid date info using date utility vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" +%Y%m%d%H) unix_vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" "+%Y-%m-%d %H:00:00") vyyyymmdd=$(echo ${vdate} | cut -c1-8) @@ -176,7 +176,7 @@ echo "ihh = ${ihh}" # observed accumulations at forecast hour 0 because there aren't yet # any accumulations in the forecast(s) to compare it to. if [[ ${current_fcst} -eq 0 ]]; then - current_fcst=$((${current_fcst} + 1)) + current_fcst=$((current_fcst + 1)) continue fi @@ -235,7 +235,7 @@ echo "ihh = ${ihh}" # where YYYYMMDD is a given year, month, and day combination, and # [PREFIX] is a string that is not relevant to the discussion here # (the value it can take on depends on which of several time periods - # YYYYMMDD falls in, and the retrieve_data.py tries various values + # YYYYMMDD falls in, and the retrieve_data.py tries various values # until it finds one for which a tar file exists). Unintuitively, this # archive file contains accumulation data for valid times starting at # hour 19 of the PREVIOUS day (YYYYMM[DD-1]) to hour 18 of the current @@ -247,7 +247,7 @@ echo "ihh = ${ihh}" # # * We call retrieve_data.py in a temporary cycle-specific subdirectory # in order to prevent get_obs_ccpa tasks for different cycles from - # clobbering each other's output. We refer to this as the "raw" CCPA + # clobbering each other's output. We refer to this as the "raw" CCPA # base directory because it contains files as they are found in the # archives before any processing by this script. # @@ -260,9 +260,9 @@ echo "ihh = ${ihh}" # (Data valid at hours 19-23 of the current day (YYYYMMDD) go into the # daily subdirectory for the next day, i.e. YYYYMM[DD+1].) We refer # to these as raw daily (sub)directories to distinguish them from the - # processed daily subdirectories under the processed (final) CCPA base + # processed daily subdirectories under the processed (final) CCPA base # directory (ccpa_basedir_proc). - # + # # * For a given cycle, some of the valid times at which there is forecast # output may not have a corresponding file under the raw base directory # for that cycle. This is because another cycle that overlaps this cycle @@ -274,7 +274,7 @@ echo "ihh = ${ihh}" # processed CCPA directory structure than the temporal arrangement used # in the archives and raw directories, we process the raw files such # that the data in the processed directory structure is shifted forward - # in time 6 hours relative to the data in the archives and raw directories. + # in time 6 hours relative to the data in the archives and raw directories. # This results in a processed base directory that, like the raw base # directory, also contains daily subdirectories of the form YYYYMMDD, # but each such subdirectory may only contain CCPA data at valid hours @@ -293,7 +293,7 @@ echo "ihh = ${ihh}" # # DETAILS OF DIRECTORY STRUCTURE IN CCPA ARCHIVE (TAR) FILES # ---------------------------------------------------------- - # + # # The daily archive file containing CCPA obs is named # # [PREFIX].YYYYMMDD.tar @@ -345,16 +345,16 @@ echo "ihh = ${ihh}" # ccpa_day_dir_raw: # Raw daily subdirectory under the raw base directory. This is dependent # on the valid hour (i.e. different for hours 19-23 than for hours 0-18) - # in order to maintain the same data timing arrangement in the raw daily + # in order to maintain the same data timing arrangement in the raw daily # directories as in the archive files. # if [[ ${vhh_noZero} -ge 0 && ${vhh_noZero} -le 18 ]]; then valid_time=${vyyyymmdd}${vhh} - ccpa_basedir_raw="${ccpa_basedir_proc}/raw_${iyyyymmddhh}" + ccpa_basedir_raw="${ccpa_basedir_proc}/raw_cyc${iyyyymmddhh}" ccpa_day_dir_raw="${ccpa_basedir_raw}/${vyyyymmdd}" elif [[ ${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23 ]]; then valid_time=${vyyyymmdd_p1}${vhh} - ccpa_basedir_raw="${ccpa_basedir_proc}/raw_${iyyyymmddhh}_vhh19-23" + ccpa_basedir_raw="${ccpa_basedir_proc}/raw_cyc${iyyyymmddhh}_vhh19-23" ccpa_day_dir_raw="${ccpa_basedir_raw}/${vyyyymmdd_p1}" fi mkdir -p ${ccpa_day_dir_raw} @@ -362,7 +362,7 @@ echo "ihh = ${ihh}" # Before calling retrieve_data.py, change location to the raw base # directory to avoid get_obs_ccpa tasks for other cycles from clobbering # the output from this call to retrieve_data.py. Note that retrieve_data.py - # extracts the CCPA tar files into the directory it was called from, + # extracts the CCPA tar files into the directory it was called from, # which is the working directory of this script right before retrieve_data.py # is called. cd ${ccpa_basedir_raw} @@ -402,7 +402,7 @@ echo "ihh = ${ihh}" # of other cycles. For this reason, check again for the existence of the # processed file. If it has already been created by another get_obs_ccpa # task, don't bother to recreate it. - if [[ -f "${ccpa_fp_proc}" ]]; then + if [[ -f "${ccpa_fp_proc}" ]]; then echo "${OBTYPE} file exists on disk:" echo " ccpa_fp_proc = \"{ccpa_fp_proc}\"" @@ -446,14 +446,14 @@ echo "ihh = ${ihh}" # grib2 files for REFC (composite reflectivity) and REFC (echo top) will # be located after this script is done, and the daily such subdirectory # for the current valid time (year, month, and day). We refer to these - # as the "processed" base and daily subdirectories because they contain + # as the "processed" base and daily subdirectories because they contain # the final files after all processing by this script is complete. mrms_basedir_proc=${OBS_DIR} mrms_day_dir_proc="${mrms_basedir_proc}/${vyyyymmdd}" # Loop over the fields (REFC and RETOP). for field in ${VAR[@]}; do - + # Set field-dependent parameters needed in forming grib2 file names. if [ "${field}" = "REFC" ]; then file_base_name="MergedReflectivityQCComposite" @@ -465,7 +465,7 @@ echo "ihh = ${ihh}" echo "Invalid field: ${field}" print_err_msg_exit "\ Invalid field specified: ${field} - + Valid options are 'REFC', 'RETOP'. " fi @@ -476,7 +476,7 @@ echo "ihh = ${ihh}" # not the name of the gzipped grib2 files that may be retrieved below # from archive files using the retrieve_data.py script. mrms_fn="${file_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" - + # Full path to the processed MRMS grib2 file for the current field and # valid time. mrms_fp_proc="${mrms_day_dir_proc}/${mrms_fn}" @@ -511,16 +511,15 @@ echo "ihh = ${ihh}" # an inefficiency in that get_obs_mrms tasks for different cycles will # not be able to detect that another cycle has already retrieved the data # for the current valid day will unnecessarily repeat the retrieval. - mrms_basedir_raw="${mrms_basedir_proc}/raw_${vyyyymmdd}" + mrms_basedir_raw="${mrms_basedir_proc}/raw_day${vyyyymmdd}" mrms_day_dir_raw="${mrms_basedir_raw}/${vyyyymmdd}" - - # Check if the raw daily directory already exists on disk. If so, it + # Check if the raw daily directory already exists on disk. If so, it # means all the gzipped MRMS grib2 files -- i.e. for both REFC and RETOP # and for all times (hours, minutes, and seconds) in the current valid # day -- have already been or are in the process of being retrieved from # the archive (tar) files. If so, skip the retrieval process. If not, - # proceed to retrieve all the files and place them in the raw daily + # proceed to retrieve all the files and place them in the raw daily # directory. if [[ -d "${mrms_day_dir_raw}" ]]; then @@ -537,7 +536,7 @@ echo "ihh = ${ihh}" # Before calling retrieve_data.py, change location to the raw base # directory to avoid get_obs_mrms tasks for other cycles from clobbering # the output from this call to retrieve_data.py. Note that retrieve_data.py - # extracts the MRMS tar files into the directory it was called from, + # extracts the MRMS tar files into the directory it was called from, # which is the working directory of this script right before retrieve_data.py # is called. cd ${mrms_basedir_raw} @@ -608,7 +607,7 @@ echo "ihh = ${ihh}" --valid_time ${valid_time} \ --outdir ${mrms_basedir_proc} \ --source ${mrms_basedir_raw} \ - --product ${file_base_name} + --product ${file_base_name} fi @@ -641,7 +640,7 @@ echo "ihh = ${ihh}" # NDAS data is available in 6-hourly combined tar files, each with 7 1-hour prepbufr files: # nam.tHHz.prepbufr.tm00.nr, nam.tHHz.prepbufr.tm01.nr, ... , nam.tHHz.prepbufr.tm06.nr # - # The "tm" here means "time minus", so nam.t12z.prepbufr.tm00.nr is valid for 12z, + # The "tm" here means "time minus", so nam.t12z.prepbufr.tm00.nr is valid for 12z, # nam.t00z.prepbufr.tm03.nr is valid for 21z the previous day, etc. # This means that every six hours we have two obs files valid for the same time: # nam.tHHz.prepbufr.tm00.nr and nam.t[HH+6]z.prepbufr.tm06.nr @@ -652,7 +651,7 @@ echo "ihh = ${ihh}" # pull more HPSS tarballs than necessary if [[ ${current_fcst} -eq 0 && ${current_fcst} -ne ${fcst_length} ]]; then - # If at forecast hour zero, skip to next hour. + # If at forecast hour zero, skip to next hour. current_fcst=$((${current_fcst} + 1)) continue fi @@ -793,7 +792,7 @@ echo "HELLO CCCCC" # If 24-hour files should be available (at 00z and 12z) then look for both files # Otherwise just look for 6hr file if (( ${current_fcst} % 12 == 0 )) && (( ${current_fcst} >= 24 )) ; then - if [[ ! -f "${nohrsc06h_file}" || ! -f "${nohrsc24h_file}" ]] ; then + if [[ ! -f "${nohrsc06h_file}" || ! -f "${nohrsc24h_file}" ]] ; then retrieve=1 echo "${OBTYPE} files do not exist on disk:" echo "${nohrsc06h_file}" @@ -848,7 +847,7 @@ echo "HELLO CCCCC" print_err_msg_exit "\ Invalid OBTYPE specified for script; valid options are CCPA, MRMS, NDAS, and NOHRSC " - fi # Increment to next forecast hour + fi # Increment to next forecast hour # Increment to next forecast hour echo "Finished fcst hr=${current_fcst}" From 7f531871c2909e9aaa4b300561e59f67d5e8bc55 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sat, 20 Jul 2024 11:14:42 -0600 Subject: [PATCH 015/260] Version with NDAS changes that seems to work. Still need lots of cleanup and comments. --- scripts/exregional_get_verif_obs.sh | 42 +++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 4427434b1c..673e4edc76 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -622,14 +622,34 @@ echo "ihh = ${ihh}" #----------------------------------------------------------------------- # elif [[ ${OBTYPE} == "NDAS" ]]; then +# Fix these comments. + # Calculate valid date - 1 day; this is needed because some obs files + # are stored in the *previous* day's 00h directory + vdate_m1h=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 hour ago" +%Y%m%d%H) + #vyyyymmdd_m1h=$(echo ${vdate_m1h} | cut -c1-8) + +echo "" +echo "HELLO PPPPPPP" +echo "vyyyymmdd = ${vyyyymmdd}" +echo "vhh = ${vhh}" +echo "vhh_noZero = ${vhh_noZero}" +#echo "vyyyymmdd_m1h = ${vyyyymmdd_m1h}" +echo "vdate_m1h = ${vdate_m1h}" + # raw NDAS data from HPSS ndas_raw=${OBS_DIR}/raw # Reorganized NDAS location ndas_proc=${OBS_DIR} + # raw NDAS data from HPSS + #ndas_raw=${OBS_DIR}/raw + ndas_raw="${ndas_proc}/raw_cyc${iyyyymmddhh}" + # Check if file exists on disk - ndas_file="${ndas_proc}/prepbufr.ndas.${vyyyymmdd}${vhh}" + #ndas_file="${ndas_proc}/prepbufr.ndas.${vyyyymmdd}${vhh}" + #ndas_file_m1h="${ndas_proc}/prepbufr.ndas.${vyyyymmdd_m1h}${vhh}" + ndas_file="${ndas_proc}/prepbufr.ndas.${vdate_m1h}" if [[ -f "${ndas_file}" ]]; then echo "${OBTYPE} file exists on disk:" echo "${ndas_file}" @@ -650,9 +670,14 @@ echo "ihh = ${ihh}" # The current logic of this script will likely stage more files than you need, but will never # pull more HPSS tarballs than necessary - if [[ ${current_fcst} -eq 0 && ${current_fcst} -ne ${fcst_length} ]]; then - # If at forecast hour zero, skip to next hour. - current_fcst=$((${current_fcst} + 1)) +# + +# This seems like a strange statement since the only way it can be true +# is if the forecast length is zero. + # If at forecast hour zero, skip to next hour. + #if [[ ${current_fcst} -eq 0 && ${current_fcst} -ne ${fcst_length} ]]; then + if [[ ${current_fcst} -eq 0 ]]; then + current_fcst=$((current_fcst + 1)) continue fi @@ -670,12 +695,16 @@ echo "vhh_noZero = ${vhh_noZero}" echo "" echo "HELLO BBBBB" - if [[ ! -d "$ndas_raw/${vyyyymmdd}${vhh}" ]]; then + #valid_time=${vyyyymmdd}${vhh} + #output_path="${ndas_raw}/${vyyyymmdd}" + + if [[ ! -d "${ndas_raw}/${vyyyymmdd}${vhh}" ]]; then echo "" echo "HELLO CCCCC" mkdir -p ${ndas_raw}/${vyyyymmdd}${vhh} fi + cd ${ndas_raw} # Pull NDAS data from HPSS cmd=" python3 -u ${USHdir}/retrieve_data.py \ @@ -704,6 +733,7 @@ echo "HELLO CCCCC" # copy files from the previous 6 hours ("tm" means "time minus") # The tm06 files contain more/better observations than tm00 for the equivalent time for tm in $(seq 1 6); do +# for tm in $(seq --format="%02g" 6 -1 1); do vyyyymmddhh_tm=$($DATE_UTIL -d "${unix_vdate} ${tm} hours ago" +%Y%m%d%H) tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') @@ -715,6 +745,7 @@ echo "HELLO CCCCC" # If at last forecast hour, make sure we're getting the last observations if [[ ${current_fcst} -eq ${fcst_length} ]]; then + echo "Retrieving NDAS obs for final forecast hour" vhh_noZero=$((vhh_noZero + 6 - (vhh_noZero % 6))) if [[ ${vhh_noZero} -eq 24 ]]; then @@ -730,6 +761,7 @@ echo "HELLO CCCCC" mkdir -p ${ndas_raw}/${vyyyymmdd}${vhh} fi + cd ${ndas_raw} # Pull NDAS data from HPSS cmd=" python3 -u ${USHdir}/retrieve_data.py \ From 7926705a1c7a92f01958ed939d899118d0004d0c Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sat, 20 Jul 2024 22:02:17 -0600 Subject: [PATCH 016/260] Second set of NDAS changes so that there are no repeat pulls of NDAS files from HPSS (and works with multiple cycles). --- scripts/exregional_get_verif_obs.sh | 297 +++++++++++++++++----------- 1 file changed, 185 insertions(+), 112 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 673e4edc76..033dd3c0fb 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -133,6 +133,16 @@ unix_init_DATE="${iyyyy}-${imm}-${idd} ${ihh}:00:00" # This awk expression gets the last item of the list $FHR fcst_length=$(echo ${FHR} | awk '{ print $NF }') + +if [[ ${OBTYPE} == "NDAS" ]]; then + vdate_last=$($DATE_UTIL -d "${unix_init_DATE} ${fcst_length} hours" +%Y%m%d%H) + vhh_last=$(echo ${vdate_last} | cut -c9-10) + hours_to_add=$(( vhh_last + 6 - (vhh_last % 6) )) + fcst_length_rounded_up=$(( fcst_length + hours_to_add )) +# vdate_last_rounded_up=$($DATE_UTIL -d "${unix_init_DATE} ${fcst_length_rounded_up} hours" +%Y%m%d%H) + fcst_length=${fcst_length_rounded_up} +fi + # Make sure fcst_length isn't octal (leading zero) fcst_length=$((10#${fcst_length})) @@ -145,19 +155,19 @@ echo "current_fcst = ${current_fcst}" # Calculate valid date info using date utility vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" +%Y%m%d%H) - unix_vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" "+%Y-%m-%d %H:00:00") + #unix_vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" "+%Y-%m-%d %H:00:00") vyyyymmdd=$(echo ${vdate} | cut -c1-8) vhh=$(echo ${vdate} | cut -c9-10) # Calculate valid date + 1 day; this is needed because some obs files # are stored in the *next* day's 00h directory - vdate_p1=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 day" +%Y%m%d%H) - vyyyymmdd_p1=$(echo ${vdate_p1} | cut -c1-8) + vdate_p1d=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 day" +%Y%m%d%H) + vyyyymmdd_p1d=$(echo ${vdate_p1d} | cut -c1-8) echo echo "HELLO HHHHHHHH" echo "vyyyymmdd = ${vyyyymmdd}" -echo "vyyyymmdd_p1 = ${vyyyymmdd_p1}" +echo "vyyyymmdd_p1d = ${vyyyymmdd_p1d}" echo "ihh = ${ihh}" #exit @@ -353,9 +363,9 @@ echo "ihh = ${ihh}" ccpa_basedir_raw="${ccpa_basedir_proc}/raw_cyc${iyyyymmddhh}" ccpa_day_dir_raw="${ccpa_basedir_raw}/${vyyyymmdd}" elif [[ ${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23 ]]; then - valid_time=${vyyyymmdd_p1}${vhh} + valid_time=${vyyyymmdd_p1d}${vhh} ccpa_basedir_raw="${ccpa_basedir_proc}/raw_cyc${iyyyymmddhh}_vhh19-23" - ccpa_day_dir_raw="${ccpa_basedir_raw}/${vyyyymmdd_p1}" + ccpa_day_dir_raw="${ccpa_basedir_raw}/${vyyyymmdd_p1d}" fi mkdir -p ${ccpa_day_dir_raw} @@ -622,40 +632,48 @@ echo "ihh = ${ihh}" #----------------------------------------------------------------------- # elif [[ ${OBTYPE} == "NDAS" ]]; then + # Fix these comments. # Calculate valid date - 1 day; this is needed because some obs files # are stored in the *previous* day's 00h directory vdate_m1h=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 hour ago" +%Y%m%d%H) #vyyyymmdd_m1h=$(echo ${vdate_m1h} | cut -c1-8) + vdate_p1h=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 hour" +%Y%m%d%H) + vhh_p1h=$(echo ${vdate_p1h} | cut -c9-10) + vhh_p1h_noZero=$((10#${vhh_p1h})) + vyyyymmdd_p1h=$(echo ${vdate_p1h} | cut -c1-8) + echo "" echo "HELLO PPPPPPP" echo "vyyyymmdd = ${vyyyymmdd}" echo "vhh = ${vhh}" echo "vhh_noZero = ${vhh_noZero}" #echo "vyyyymmdd_m1h = ${vyyyymmdd_m1h}" +echo "vdate = ${vdate}" echo "vdate_m1h = ${vdate_m1h}" - - # raw NDAS data from HPSS - ndas_raw=${OBS_DIR}/raw +echo "vdate_p1h = ${vdate_m1h}" # Reorganized NDAS location - ndas_proc=${OBS_DIR} + ndas_basedir_proc=${OBS_DIR} + ndas_day_dir_proc="${ndas_basedir_proc}" # raw NDAS data from HPSS #ndas_raw=${OBS_DIR}/raw - ndas_raw="${ndas_proc}/raw_cyc${iyyyymmddhh}" + #ndas_raw="${ndas_basedir_proc}/raw_cyc${iyyyymmddhh}" # Check if file exists on disk - #ndas_file="${ndas_proc}/prepbufr.ndas.${vyyyymmdd}${vhh}" - #ndas_file_m1h="${ndas_proc}/prepbufr.ndas.${vyyyymmdd_m1h}${vhh}" - ndas_file="${ndas_proc}/prepbufr.ndas.${vdate_m1h}" - if [[ -f "${ndas_file}" ]]; then + #ndas_file="${ndas_basedir_proc}/prepbufr.ndas.${vyyyymmdd}${vhh}" + #ndas_file_m1h="${ndas_basedir_proc}/prepbufr.ndas.${vyyyymmdd_m1h}${vhh}" + #ndas_fn_check="prepbufr.ndas.${vdate_p1h}" + ndas_fn="prepbufr.ndas.${vyyyymmdd}${vhh}" + ndas_fp_proc="${ndas_basedir_proc}/${ndas_fn}" + if [[ -f "${ndas_fp_proc}" ]]; then echo "${OBTYPE} file exists on disk:" - echo "${ndas_file}" + echo "${ndas_fp_proc}" else echo "${OBTYPE} file does not exist on disk:" - echo "${ndas_file}" + echo "${ndas_fp_proc}" echo "Will attempt to retrieve from remote locations" # NDAS data is available in 6-hourly combined tar files, each with 7 1-hour prepbufr files: # nam.tHHz.prepbufr.tm00.nr, nam.tHHz.prepbufr.tm01.nr, ... , nam.tHHz.prepbufr.tm06.nr @@ -670,16 +688,14 @@ echo "vdate_m1h = ${vdate_m1h}" # The current logic of this script will likely stage more files than you need, but will never # pull more HPSS tarballs than necessary -# - # This seems like a strange statement since the only way it can be true # is if the forecast length is zero. # If at forecast hour zero, skip to next hour. #if [[ ${current_fcst} -eq 0 && ${current_fcst} -ne ${fcst_length} ]]; then - if [[ ${current_fcst} -eq 0 ]]; then - current_fcst=$((current_fcst + 1)) - continue - fi + # if [[ ${current_fcst} -eq 0 ]]; then + # current_fcst=$((current_fcst + 1)) + # continue + # fi # Whether to move or copy extracted files from the raw directories to their # final locations. @@ -689,116 +705,173 @@ echo "vdate_m1h = ${vdate_m1h}" echo "" echo "HELLO AAAAA" echo "vhh_noZero = ${vhh_noZero}" +echo "vhh_p1h_noZero = ${vhh_p1h_noZero}" - if [[ ${vhh_noZero} -eq 0 || ${vhh_noZero} -eq 6 || \ - ${vhh_noZero} -eq 12 || ${vhh_noZero} -eq 18 ]]; then + if [[ ${vhh_p1h_noZero} -eq 0 || ${vhh_p1h_noZero} -eq 6 || \ + ${vhh_p1h_noZero} -eq 12 || ${vhh_p1h_noZero} -eq 18 ]]; then echo "" echo "HELLO BBBBB" - #valid_time=${vyyyymmdd}${vhh} - #output_path="${ndas_raw}/${vyyyymmdd}" + #ndas_basedir_raw="${ndas_basedir_proc}/raw_cyc${iyyyymmddhh}" + #ndas_basedir_raw="${ndas_basedir_proc}/raw_qrtrday${vyyyymmdd_p1h}${vhh_p1h}" + ndas_basedir_raw="${ndas_basedir_proc}/raw_day${vyyyymmdd_p1h}" + ndas_day_dir_raw="${ndas_basedir_raw}/${vyyyymmdd_p1h}${vhh_p1h}" + #mkdir -p ${ndas_day_dir_raw} - if [[ ! -d "${ndas_raw}/${vyyyymmdd}${vhh}" ]]; then -echo "" -echo "HELLO CCCCC" - mkdir -p ${ndas_raw}/${vyyyymmdd}${vhh} - fi - cd ${ndas_raw} - # Pull NDAS data from HPSS - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vyyyymmdd}${vhh} \ - --data_stores hpss \ - --data_type NDAS_obs \ - --output_path ${ndas_raw}/${vyyyymmdd}${vhh} \ - --summary_file ${logfile}" - - echo "CALLING: ${cmd}" - - $cmd || print_err_msg_exit "\ - Could not retrieve NDAS data from HPSS - - The following command exited with a non-zero exit status: - ${cmd} -" - if [[ ! -d "${ndas_proc}" ]]; then - mkdir -p ${ndas_proc} - fi + # Check if the raw daily directory already exists on disk. If so, it + # means +#all the gzipped NDAS grib2 files -- i.e. for both REFC and RETOP +# and for all times (hours, minutes, and seconds) in the current valid +# day -- have already been or are in the process of being retrieved from +# the archive (tar) files. +# If so, skip the retrieval process. If not, + # proceed to retrieve all the files and place them in the raw daily + # directory. + if [[ -d "${ndas_day_dir_raw}" ]]; then - # copy files from the previous 6 hours ("tm" means "time minus") - # The tm06 files contain more/better observations than tm00 for the equivalent time - for tm in $(seq 1 6); do -# for tm in $(seq --format="%02g" 6 -1 1); do - vyyyymmddhh_tm=$($DATE_UTIL -d "${unix_vdate} ${tm} hours ago" +%Y%m%d%H) - tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') +# Fix up these messages. + echo "${OBTYPE} directory for day ${vyyyymmdd} exists on disk:" + echo " ndas_day_dir_proc = \"${ndas_day_dir_proc}\"" + echo "This means NDAS files for all hours of the current valid day (${vyyyymmdd}) have been or are being retrieved." + echo "Thus, we will NOT attempt to retrieve NDAS data for the current valid time from remote locations." - ${mv_or_cp} ${ndas_raw}/${vyyyymmdd}${vhh}/nam.t${vhh}z.prepbufr.tm${tm2}.nr \ - ${ndas_proc}/prepbufr.ndas.${vyyyymmddhh_tm} - done + else - fi + mkdir -p ${ndas_day_dir_raw} + valid_time=${vyyyymmdd_p1h}${vhh_p1h} + +# Before calling retrieve_data.py, change location to the raw base +# directory to avoid get_obs_ndas tasks for other cycles from clobbering +# the output from this call to retrieve_data.py. Note that retrieve_data.py +# extracts the NDAS tar files into the directory it was called from, +# which is the working directory of this script right before retrieve_data.py +# is called. + cd ${ndas_basedir_raw} + +# Use the retrieve_data.py script to retrieve all the gzipped NDAS grib2 +# files -- i.e. for both REFC and RETOP and for all times (hours, minutes, +# and seconds) in the current valid day -- and place them in the raw daily +# directory. Note that this will pull both the REFC and RETOP files in +# one call. + cmd=" + python3 -u ${USHdir}/retrieve_data.py \ + --debug \ + --file_set obs \ + --config ${PARMdir}/data_locations.yml \ + --cycle_date ${vyyyymmdd_p1h}${vhh_p1h} \ + --data_stores hpss \ + --data_type NDAS_obs \ + --output_path ${ndas_day_dir_raw} \ + --summary_file ${logfile}" - # If at last forecast hour, make sure we're getting the last observations - if [[ ${current_fcst} -eq ${fcst_length} ]]; then - - echo "Retrieving NDAS obs for final forecast hour" - vhh_noZero=$((vhh_noZero + 6 - (vhh_noZero % 6))) - if [[ ${vhh_noZero} -eq 24 ]]; then - vyyyymmdd=${vyyyymmdd_p1} - vhh=00 - elif [[ ${vhh_noZero} -eq 6 ]]; then - vhh=06 - else - vhh=${vhh_noZero} - fi + echo "CALLING: ${cmd}" - if [[ ! -d "${ndas_raw}/${vyyyymmdd}${vhh}" ]]; then - mkdir -p ${ndas_raw}/${vyyyymmdd}${vhh} - fi + $cmd || print_err_msg_exit "\ + Could not retrieve NDAS data from HPSS - cd ${ndas_raw} - # Pull NDAS data from HPSS - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vyyyymmdd}${vhh} \ - --data_stores hpss \ - --data_type NDAS_obs \ - --output_path ${ndas_raw}/${vyyyymmdd}${vhh} \ - --summary_file ${logfile}" - - echo "CALLING: ${cmd}" - - $cmd || print_err_msg_exit "\ - Could not retrieve NDAS data from HPSS - - The following command exited with a non-zero exit status: - ${cmd} + The following command exited with a non-zero exit status: + ${cmd} " +# Create a flag file that can be used to confirm the completion of the +# retrieval of all files for the current valid day. + touch ${ndas_day_dir_raw}/pull_completed.txt - if [[ ! -d "${ndas_proc}" ]]; then - mkdir -p ${ndas_proc} fi - for tm in $(seq 1 6); do - last_fhr=$((fcst_length + 6 - (vhh_noZero % 6))) - unix_fdate=$($DATE_UTIL -d "${unix_init_DATE} ${last_fhr} hours" "+%Y-%m-%d %H:00:00") - vyyyymmddhh_tm=$($DATE_UTIL -d "${unix_fdate} ${tm} hours ago" +%Y%m%d%H) - tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') - - ${mv_or_cp} ${ndas_raw}/${vyyyymmdd}${vhh}/nam.t${vhh}z.prepbufr.tm${tm2}.nr \ - ${ndas_proc}/prepbufr.ndas.${vyyyymmddhh_tm} +# Make sure the retrieval process for the current day (which may have +# been executed above for this cycle or by another cycle) has completed +# by checking for the existence of the flag file that marks completion. +# If not, keep checking until the flag file shows up. + while [[ ! -f "${ndas_day_dir_raw}/pull_completed.txt" ]]; do + echo "Waiting for the retrieval process for valid quarter-day ending on ${vyyyymmdd_p1h}${vhh_p1h} to complete..." + sleep 5s done + if [[ -f "${ndas_fp_proc}" ]]; then + + echo "${OBTYPE} file exists on disk:" + echo " ndas_fp_proc = \"${ndas_fp_proc}\"" + echo "Will NOT attempt to retrieve from remote locations." + + else + + #mkdir -p ${ndas_basedir_proc} + + unix_vdate_p1h=$($DATE_UTIL -d "${unix_init_DATE} $((current_fcst+1)) hours" "+%Y-%m-%d %H:00:00") + # copy files from the previous 6 hours ("tm" means "time minus") + # The tm06 files contain more/better observations than tm00 for the equivalent time + for tm in $(seq 1 6); do +# for tm in $(seq --format="%02g" 6 -1 1); do + vyyyymmddhh_p1h_tm=$($DATE_UTIL -d "${unix_vdate_p1h} ${tm} hours ago" +%Y%m%d%H) + if [ ${vyyyymmddhh_p1h_tm} -le ${vdate_last} ]; then + tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') + ${mv_or_cp} ${ndas_day_dir_raw}/nam.t${vhh_p1h}z.prepbufr.tm${tm2}.nr \ + ${ndas_basedir_proc}/prepbufr.ndas.${vyyyymmddhh_p1h_tm} + fi + done + + fi + fi + # If at last forecast hour, make sure we're getting the last observations +# if [[ ${current_fcst} -eq ${fcst_length} ]]; then +# +# echo "Retrieving NDAS obs for final forecast hour" +# vhh_noZero=$((vhh_noZero + 6 - (vhh_noZero % 6))) +# if [[ ${vhh_noZero} -eq 24 ]]; then +# vyyyymmdd=${vyyyymmdd_p1d} +# vhh=00 +# elif [[ ${vhh_noZero} -eq 6 ]]; then +# vhh=06 +# else +# vhh=${vhh_noZero} +# fi +# +# if [[ ! -d "${ndas_raw}/${vyyyymmdd}${vhh}" ]]; then +# mkdir -p ${ndas_raw}/${vyyyymmdd}${vhh} +# fi +# +# cd ${ndas_raw} +# # Pull NDAS data from HPSS +# cmd=" +# python3 -u ${USHdir}/retrieve_data.py \ +# --debug \ +# --file_set obs \ +# --config ${PARMdir}/data_locations.yml \ +# --cycle_date ${vyyyymmdd}${vhh} \ +# --data_stores hpss \ +# --data_type NDAS_obs \ +# --output_path ${ndas_raw}/${vyyyymmdd}${vhh} \ +# --summary_file ${logfile}" +# +# echo "CALLING: ${cmd}" +# +# $cmd || print_err_msg_exit "\ +# Could not retrieve NDAS data from HPSS +# +# The following command exited with a non-zero exit status: +# ${cmd} +#" +# +# if [[ ! -d "${ndas_basedir_proc}" ]]; then +# mkdir -p ${ndas_basedir_proc} +# fi +# +# for tm in $(seq 1 6); do +# last_fhr=$((fcst_length + 6 - (vhh_noZero % 6))) +# unix_fdate=$($DATE_UTIL -d "${unix_init_DATE} ${last_fhr} hours" "+%Y-%m-%d %H:00:00") +# vyyyymmddhh_tm=$($DATE_UTIL -d "${unix_fdate} ${tm} hours ago" +%Y%m%d%H) +# tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') +# +# ${mv_or_cp} ${ndas_raw}/${vyyyymmdd}${vhh}/nam.t${vhh}z.prepbufr.tm${tm2}.nr \ +# ${ndas_basedir_proc}/prepbufr.ndas.${vyyyymmddhh_tm} +# done +# +# fi + fi # #----------------------------------------------------------------------- From f8c3ec67d41aca784b102498931f0eb1e6eda3d1 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 22 Jul 2024 16:32:12 -0600 Subject: [PATCH 017/260] Clean up NDAS section in get_obs_... ex-script. --- scripts/exregional_get_verif_obs.sh | 240 +++++++++++----------------- 1 file changed, 94 insertions(+), 146 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 033dd3c0fb..93f17bfa60 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -583,7 +583,7 @@ echo "ihh = ${ihh}" fi # Make sure the retrieval process for the current day (which may have - # been executed above for this cycle or by another cycle) has completed + # been executed above for this cycle or for another cycle) has completed # by checking for the existence of the flag file that marks completion. # If not, keep checking until the flag file shows up. while [[ ! -f "${mrms_day_dir_raw}/pull_completed.txt" ]]; do @@ -633,48 +633,53 @@ echo "ihh = ${ihh}" # elif [[ ${OBTYPE} == "NDAS" ]]; then -# Fix these comments. - # Calculate valid date - 1 day; this is needed because some obs files - # are stored in the *previous* day's 00h directory - vdate_m1h=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 hour ago" +%Y%m%d%H) - #vyyyymmdd_m1h=$(echo ${vdate_m1h} | cut -c1-8) - - vdate_p1h=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 hour" +%Y%m%d%H) + # Calculate valid date plus 1 hour. This is needed because we need to + # check whether this date corresponds to one of the valid hours-of-day + # 00, 06, 12, and 18 on which the NDAS archives are provided. + unix_vdate_p1h=$($DATE_UTIL -d "${unix_init_DATE} $((current_fcst+1)) hours" "+%Y-%m-%d %H:00:00") + vdate_p1h=$($DATE_UTIL -d "${unix_vdate_p1h}" +%Y%m%d%H) + vyyyymmdd_p1h=$(echo ${vdate_p1h} | cut -c1-8) vhh_p1h=$(echo ${vdate_p1h} | cut -c9-10) vhh_p1h_noZero=$((10#${vhh_p1h})) - vyyyymmdd_p1h=$(echo ${vdate_p1h} | cut -c1-8) echo "" echo "HELLO PPPPPPP" echo "vyyyymmdd = ${vyyyymmdd}" echo "vhh = ${vhh}" echo "vhh_noZero = ${vhh_noZero}" -#echo "vyyyymmdd_m1h = ${vyyyymmdd_m1h}" echo "vdate = ${vdate}" -echo "vdate_m1h = ${vdate_m1h}" -echo "vdate_p1h = ${vdate_m1h}" +echo "vdate_p1h = ${vdate_p1h}" - # Reorganized NDAS location + # Base directory in which the hourly NDAS prepbufr files will be located. + # We refer to this as the "processed" base directory because it contains + # the final files after all processing by this script is complete. ndas_basedir_proc=${OBS_DIR} - ndas_day_dir_proc="${ndas_basedir_proc}" - - # raw NDAS data from HPSS - #ndas_raw=${OBS_DIR}/raw - #ndas_raw="${ndas_basedir_proc}/raw_cyc${iyyyymmddhh}" - # Check if file exists on disk - #ndas_file="${ndas_basedir_proc}/prepbufr.ndas.${vyyyymmdd}${vhh}" - #ndas_file_m1h="${ndas_basedir_proc}/prepbufr.ndas.${vyyyymmdd_m1h}${vhh}" - #ndas_fn_check="prepbufr.ndas.${vdate_p1h}" + # Name of the NDAS prepbufr file for the current valid time that will + # appear in the processed daily subdirectory after this script finishes. + # This is the name of the processed file. Note that this is not the + # same as the name of the raw file, i.e. the file extracted from the + # archive (tar) file retrieved below by the retrieve_data.py script. ndas_fn="prepbufr.ndas.${vyyyymmdd}${vhh}" + + # Full path to the processed NDAS prepbufr file for the current field and + # valid time. ndas_fp_proc="${ndas_basedir_proc}/${ndas_fn}" + + # Check if the processed NDAS prepbufr file for the current valid time + # already exists on disk. If so, skip this valid time and go to the next + # one. if [[ -f "${ndas_fp_proc}" ]]; then + echo "${OBTYPE} file exists on disk:" - echo "${ndas_fp_proc}" + echo " ndas_fp_proc = \"${ndas_fp_proc}\"" + echo "Will NOT attempt to retrieve from remote locations." + else + echo "${OBTYPE} file does not exist on disk:" - echo "${ndas_fp_proc}" - echo "Will attempt to retrieve from remote locations" + echo " ndas_fp_proc = \"${ndas_fp_proc}\"" + echo "Will attempt to retrieve from remote locations." # NDAS data is available in 6-hourly combined tar files, each with 7 1-hour prepbufr files: # nam.tHHz.prepbufr.tm00.nr, nam.tHHz.prepbufr.tm01.nr, ... , nam.tHHz.prepbufr.tm06.nr # @@ -685,18 +690,6 @@ echo "vdate_p1h = ${vdate_m1h}" # We want to use the tm06 file because it contains more/better obs (confirmed with EMC: even # though the earlier files are larger, this is because the time window is larger) - # The current logic of this script will likely stage more files than you need, but will never - # pull more HPSS tarballs than necessary - -# This seems like a strange statement since the only way it can be true -# is if the forecast length is zero. - # If at forecast hour zero, skip to next hour. - #if [[ ${current_fcst} -eq 0 && ${current_fcst} -ne ${fcst_length} ]]; then - # if [[ ${current_fcst} -eq 0 ]]; then - # current_fcst=$((current_fcst + 1)) - # continue - # fi - # Whether to move or copy extracted files from the raw directories to their # final locations. #mv_or_cp="mv" @@ -707,60 +700,59 @@ echo "HELLO AAAAA" echo "vhh_noZero = ${vhh_noZero}" echo "vhh_p1h_noZero = ${vhh_p1h_noZero}" + # Due to the way NDAS archives are organized, we can only retrieve the + # archive (tar) file containing data for the current valid hour (and the + # 5 hours preceeding it) if the hour-of-day corresponding to the current + # valid time plus 1 hour corresponds to one of 0, 6, 12, and 18. if [[ ${vhh_p1h_noZero} -eq 0 || ${vhh_p1h_noZero} -eq 6 || \ ${vhh_p1h_noZero} -eq 12 || ${vhh_p1h_noZero} -eq 18 ]]; then -echo "" -echo "HELLO BBBBB" - #ndas_basedir_raw="${ndas_basedir_proc}/raw_cyc${iyyyymmddhh}" - #ndas_basedir_raw="${ndas_basedir_proc}/raw_qrtrday${vyyyymmdd_p1h}${vhh_p1h}" + # Base directory that will contain the 6-hourly subdirectories in which + # the NDAS prepbufr files retrieved from archive files will be placed, + # and the 6-hourly subdirectory for the current valid time plus 1 hour. + # We refer to these as the "raw" NDAS base and 6-hourly directories + # because they contain files as they are found in the archives before + # any processing by this script. ndas_basedir_raw="${ndas_basedir_proc}/raw_day${vyyyymmdd_p1h}" - ndas_day_dir_raw="${ndas_basedir_raw}/${vyyyymmdd_p1h}${vhh_p1h}" - #mkdir -p ${ndas_day_dir_raw} - - - - # Check if the raw daily directory already exists on disk. If so, it - # means -#all the gzipped NDAS grib2 files -- i.e. for both REFC and RETOP -# and for all times (hours, minutes, and seconds) in the current valid -# day -- have already been or are in the process of being retrieved from -# the archive (tar) files. -# If so, skip the retrieval process. If not, - # proceed to retrieve all the files and place them in the raw daily - # directory. + ndas_day_dir_raw="${ndas_basedir_raw}/${vdate_p1h}" + + # Check if the raw 6-hourly directory already exists on disk. If so, it + # means the NDAS prepbufr files for the current valid hour and the 5 hours + # preceeding it have already been or are in the process of being retrieved + # from the archive (tar) files. If so, skip the retrieval process. If + # not, proceed to retrieve the archive file, extract the prepbufr files + # from it, and place them in the raw daily directory. if [[ -d "${ndas_day_dir_raw}" ]]; then -# Fix up these messages. - echo "${OBTYPE} directory for day ${vyyyymmdd} exists on disk:" - echo " ndas_day_dir_proc = \"${ndas_day_dir_proc}\"" - echo "This means NDAS files for all hours of the current valid day (${vyyyymmdd}) have been or are being retrieved." - echo "Thus, we will NOT attempt to retrieve NDAS data for the current valid time from remote locations." + print_info_msg " +${OBTYPE} raw directory for day ${vdate_p1h} exists on disk: + ndas_day_dir_raw = \"${ndas_day_dir_raw}\" +This means NDAS files for the current valid time (${vyyyymmdd}) and the +5 hours preceeding it have been or are being retrieved by a get_obs_ndas +workflow task for another cycle. Thus, we will NOT attempt to retrieve +NDAS data for the current valid time from remote locations." else mkdir -p ${ndas_day_dir_raw} - valid_time=${vyyyymmdd_p1h}${vhh_p1h} - -# Before calling retrieve_data.py, change location to the raw base -# directory to avoid get_obs_ndas tasks for other cycles from clobbering -# the output from this call to retrieve_data.py. Note that retrieve_data.py -# extracts the NDAS tar files into the directory it was called from, -# which is the working directory of this script right before retrieve_data.py -# is called. + + # Before calling retrieve_data.py, change location to the raw base + # directory to avoid get_obs_ndas tasks for other cycles from clobbering + # the output from this call to retrieve_data.py. Note that retrieve_data.py + # extracts the NDAS prepbufr files the archive into the directory it was + # called from, which is the working directory of this script right before + # retrieve_data.py is called. cd ${ndas_basedir_raw} -# Use the retrieve_data.py script to retrieve all the gzipped NDAS grib2 -# files -- i.e. for both REFC and RETOP and for all times (hours, minutes, -# and seconds) in the current valid day -- and place them in the raw daily -# directory. Note that this will pull both the REFC and RETOP files in -# one call. + # Use the retrieve_data.py script to retrieve all the NDAS prepbufr files + # for the current valid hour and the 5 hours preceeding it and place them + # in the raw 6-hourly directory. cmd=" python3 -u ${USHdir}/retrieve_data.py \ --debug \ --file_set obs \ --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vyyyymmdd_p1h}${vhh_p1h} \ + --cycle_date ${vdate_p1h} \ --data_stores hpss \ --data_type NDAS_obs \ --output_path ${ndas_day_dir_raw} \ @@ -774,21 +766,32 @@ echo "HELLO BBBBB" The following command exited with a non-zero exit status: ${cmd} " -# Create a flag file that can be used to confirm the completion of the -# retrieval of all files for the current valid day. + + # Create a flag file that can be used to confirm the completion of the + # retrieval of all files for the 6-hour interval ending in vdate_p1h. touch ${ndas_day_dir_raw}/pull_completed.txt fi -# Make sure the retrieval process for the current day (which may have -# been executed above for this cycle or by another cycle) has completed -# by checking for the existence of the flag file that marks completion. -# If not, keep checking until the flag file shows up. + # Make sure the retrieval process for the 6-hour interval ending in + # vdate_p1h (which may have been executed above for this cycle or for + # another cycle) has completed by checking for the existence of the flag + # file that marks completion. If not, keep checking until the flag file + # shows up. while [[ ! -f "${ndas_day_dir_raw}/pull_completed.txt" ]]; do - echo "Waiting for the retrieval process for valid quarter-day ending on ${vyyyymmdd_p1h}${vhh_p1h} to complete..." + echo "Waiting for completion of the NDAS obs retrieval process for the" + echo "6-hour interval ending on ${vdate_p1h} ..." sleep 5s done + # Since this script is part of a workflow, another get_obs_ndas task (i.e. + # for another cycle) may have extracted and placed the current file in its + # processed location between the time we checked for its existence above + # (and didn't find it) and now. This can happen because there can be + # overlap between the verification times for the current cycle and those + # of other cycles. For this reason, check again for the existence of the + # processed file. If it has already been created by another get_obs_ndas + # task, don't bother to recreate it. if [[ -f "${ndas_fp_proc}" ]]; then echo "${OBTYPE} file exists on disk:" @@ -797,18 +800,19 @@ echo "HELLO BBBBB" else - #mkdir -p ${ndas_basedir_proc} - - unix_vdate_p1h=$($DATE_UTIL -d "${unix_init_DATE} $((current_fcst+1)) hours" "+%Y-%m-%d %H:00:00") - # copy files from the previous 6 hours ("tm" means "time minus") - # The tm06 files contain more/better observations than tm00 for the equivalent time - for tm in $(seq 1 6); do + # Create the processed NDAS prepbufr files for the current valid hour as + # well as the preceeding 5 hours (or fewer if they're outside the time + # interval of the forecast) by copying or moving (and in the process + # renaming) them from the raw 6-hourly directory. In the following loop, + # "tm" means "time minus". Note that the tm06 files contain more/better + # observations than tm00 for the equivalent time. + for tm in $(seq 6 -1 1); do # for tm in $(seq --format="%02g" 6 -1 1); do - vyyyymmddhh_p1h_tm=$($DATE_UTIL -d "${unix_vdate_p1h} ${tm} hours ago" +%Y%m%d%H) - if [ ${vyyyymmddhh_p1h_tm} -le ${vdate_last} ]; then + vdate_p1h_tm=$($DATE_UTIL -d "${unix_vdate_p1h} ${tm} hours ago" +%Y%m%d%H) + if [ ${vdate_p1h_tm} -le ${vdate_last} ]; then tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') ${mv_or_cp} ${ndas_day_dir_raw}/nam.t${vhh_p1h}z.prepbufr.tm${tm2}.nr \ - ${ndas_basedir_proc}/prepbufr.ndas.${vyyyymmddhh_p1h_tm} + ${ndas_basedir_proc}/prepbufr.ndas.${vdate_p1h_tm} fi done @@ -816,62 +820,6 @@ echo "HELLO BBBBB" fi - # If at last forecast hour, make sure we're getting the last observations -# if [[ ${current_fcst} -eq ${fcst_length} ]]; then -# -# echo "Retrieving NDAS obs for final forecast hour" -# vhh_noZero=$((vhh_noZero + 6 - (vhh_noZero % 6))) -# if [[ ${vhh_noZero} -eq 24 ]]; then -# vyyyymmdd=${vyyyymmdd_p1d} -# vhh=00 -# elif [[ ${vhh_noZero} -eq 6 ]]; then -# vhh=06 -# else -# vhh=${vhh_noZero} -# fi -# -# if [[ ! -d "${ndas_raw}/${vyyyymmdd}${vhh}" ]]; then -# mkdir -p ${ndas_raw}/${vyyyymmdd}${vhh} -# fi -# -# cd ${ndas_raw} -# # Pull NDAS data from HPSS -# cmd=" -# python3 -u ${USHdir}/retrieve_data.py \ -# --debug \ -# --file_set obs \ -# --config ${PARMdir}/data_locations.yml \ -# --cycle_date ${vyyyymmdd}${vhh} \ -# --data_stores hpss \ -# --data_type NDAS_obs \ -# --output_path ${ndas_raw}/${vyyyymmdd}${vhh} \ -# --summary_file ${logfile}" -# -# echo "CALLING: ${cmd}" -# -# $cmd || print_err_msg_exit "\ -# Could not retrieve NDAS data from HPSS -# -# The following command exited with a non-zero exit status: -# ${cmd} -#" -# -# if [[ ! -d "${ndas_basedir_proc}" ]]; then -# mkdir -p ${ndas_basedir_proc} -# fi -# -# for tm in $(seq 1 6); do -# last_fhr=$((fcst_length + 6 - (vhh_noZero % 6))) -# unix_fdate=$($DATE_UTIL -d "${unix_init_DATE} ${last_fhr} hours" "+%Y-%m-%d %H:00:00") -# vyyyymmddhh_tm=$($DATE_UTIL -d "${unix_fdate} ${tm} hours ago" +%Y%m%d%H) -# tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') -# -# ${mv_or_cp} ${ndas_raw}/${vyyyymmdd}${vhh}/nam.t${vhh}z.prepbufr.tm${tm2}.nr \ -# ${ndas_basedir_proc}/prepbufr.ndas.${vyyyymmddhh_tm} -# done -# -# fi - fi # #----------------------------------------------------------------------- From bc276fe188aa516d3f365f0b9fa5648da2cbc0ed Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 22 Jul 2024 19:26:53 -0600 Subject: [PATCH 018/260] Add debugging statement to clarify the current working directory where cleanup is happening. --- ush/retrieve_data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ush/retrieve_data.py b/ush/retrieve_data.py index 5acf9d5ce9..5b4320ccb6 100755 --- a/ush/retrieve_data.py +++ b/ush/retrieve_data.py @@ -51,6 +51,7 @@ def clean_up_output_dir(expected_subdir, local_archive, output_path, source_path unavailable = {} expand_source_paths = [] logging.debug(f"Cleaning up local paths: {source_paths}") + logging.debug(f"Looking for these local paths under directory: {os.getcwd()}") for p in source_paths: expand_source_paths.extend(glob.glob(p.lstrip("/"))) From fe50a5dca742ecb248bcd78a265c7a79a39ddf95 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 25 Jul 2024 11:32:52 -0600 Subject: [PATCH 019/260] Add code to cause the script to wait until all the (processed) obs files, that are expected to be created once the task is finished actually get created. This is needed because it is possible that for some forecast hours for which there is overlap between cycles, the files are being retrieved and processed by the get_obs_... task for another cycle. --- scripts/exregional_get_verif_obs.sh | 50 ++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 93f17bfa60..b615f05ffa 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -146,6 +146,7 @@ fi # Make sure fcst_length isn't octal (leading zero) fcst_length=$((10#${fcst_length})) +processed_fp_list=() current_fcst=0 while [[ ${current_fcst} -le ${fcst_length} ]]; do @@ -216,6 +217,9 @@ echo "ihh = ${ihh}" # hour-of-day in the name of the file. ccpa_fp_proc="${ccpa_day_dir_proc}/${ccpa_fn}" + # Store the full path to the processed file in a list for later use. + processed_fp_list+=(${ccpa_fp_proc}) + # Check if the CCPA grib2 file for the current valid time already exists # at its procedded location on disk. If so, skip and go to the next valid # time. If not, pull it. @@ -491,6 +495,9 @@ echo "ihh = ${ihh}" # valid time. mrms_fp_proc="${mrms_day_dir_proc}/${mrms_fn}" + # Store the full path to the processed file in a list for later use. + processed_fp_list+=(${mrms_fp_proc}) + # Check if the processed MRMS grib2 file for the current field and valid # time already exists on disk. If so, skip this valid time and go to the # next one. If not, pull it. @@ -666,6 +673,9 @@ echo "vdate_p1h = ${vdate_p1h}" # valid time. ndas_fp_proc="${ndas_basedir_proc}/${ndas_fn}" + # Store the full path to the processed file in a list for later use. + processed_fp_list+=(${ndas_fp_proc}) + # Check if the processed NDAS prepbufr file for the current valid time # already exists on disk. If so, skip this valid time and go to the next # one. @@ -907,11 +917,41 @@ NDAS data for the current valid time from remote locations." current_fcst=$((${current_fcst} + 1)) done - - -# Clean up raw, unprocessed observation files -#rm -rf ${OBS_DIR}/raw - +# +#----------------------------------------------------------------------- +# +# At this point, the processed data files for all output forecast hours +# for this cycle are either being created (by a get_obs_... task for +# another cycle) or have already been created (either by this get_obs_... +# task or one for another cycle). In case they are still being created, +# make sure they have in fact been created before exiting this script. +# If we don't do this, it is possible for this get_obs_... task to complete +# successfully but still have processed obs files for some forecast hours +# not yet created, which is undesirable. +# +#----------------------------------------------------------------------- +# +num_proc_files=${#processed_fp_list[@]} +for (( i=0; i<${num_proc_files}; i++ )); do + obs_fp="${processed_fp_list[$i]}" + while [[ ! -f "${obs_fp}" ]]; do + echo "Waiting for ${OBTYPE} file to be created on disk (by a get_obs_... workflow task for another cycle):" + echo " obs_fp = \"${obs_fp}\"" + sleep 5s + done +done +# +#----------------------------------------------------------------------- +# +# Clean up raw directories. +# +#----------------------------------------------------------------------- +# +remove_raw="TRUE" +#remove_raw="FALSE" +if [ "${remove_raw}" = "TRUE" ]; then + rm -rf ${OBS_DIR}/raw_* +fi # #----------------------------------------------------------------------- # From dc4971dedfed12f45e1dd13411300ac3fe1ae53a Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 25 Jul 2024 11:45:51 -0600 Subject: [PATCH 020/260] Fix the workflow task dependencies and ex-script for the GenEnsProd and EnsembleStat tasks such that GenEnsProd does not depend on the completion of get_obs_... tasks (because it doesn't need observations) but only forecast output while EnsembleStat does. --- parm/wflow/verify_ens.yaml | 47 ++-- ...onal_run_met_genensprod_or_ensemblestat.sh | 64 +++--- ush/set_vx_fhr_list.sh | 206 +++++++++++++++--- 3 files changed, 231 insertions(+), 86 deletions(-) diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index 18b23a1eb0..4be4c5b47e 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -37,16 +37,9 @@ metatask_GenEnsProd_EnsembleStat_CCPA: FCST_LEVEL: 'A#ACCUM_HH#' FCST_THRESH: 'all' dependency: - and: - # The PcpCombine task for obs must be complete because this GenEnsProd - # task checks to see the forecast hours for which obs are available before - # processing the forecast for those hours. - taskdep_pcpcombine_obs: - attrs: - task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h - metataskdep_pcpcombine_fcst: - attrs: - metatask: PcpCombine_fcst_APCP#ACCUM_HH#h_all_mems + metataskdep_pcpcombine_fcst: + attrs: + metatask: PcpCombine_fcst_APCP#ACCUM_HH#h_all_mems task_run_MET_EnsembleStat_vx_APCP#ACCUM_HH#h: <<: *task_GenEnsProd_CCPA envars: @@ -54,9 +47,13 @@ metatask_GenEnsProd_EnsembleStat_CCPA: METPLUSTOOLNAME: 'ENSEMBLESTAT' FCST_THRESH: 'none' dependency: - taskdep_genensprod: - attrs: - task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h + and: + taskdep_pcpcombine_obs: + attrs: + task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h + taskdep_genensprod: + attrs: + task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h metatask_GenEnsProd_EnsembleStat_NOHRSC: var: @@ -75,9 +72,6 @@ metatask_GenEnsProd_EnsembleStat_NOHRSC: FCST_THRESH: 'all' dependency: and: - # The PcpCombine task for obs must be complete because this GenEnsProd - # task checks to see the forecast hours for which obs are available before - # processing the forecast for those hours. metataskdep_pcpcombine_fcst: attrs: metatask: PcpCombine_fcst_ASNOW#ACCUM_HH#h_all_mems @@ -149,15 +143,8 @@ metatask_GenEnsProd_EnsembleStat_NDAS: FCST_THRESH: 'all' walltime: 02:30:00 dependency: - and: - # The Pb2nc task (which is run only for obs) must be complete because - # this GenEnsProd task checks to see the forecast hours for which obs - # are available before processing the forecast for those hours. - taskdep_pb2nc: - attrs: - task: run_MET_Pb2nc_obs - metataskdep_check_post_output: - <<: *check_post_output + metataskdep_check_post_output: + <<: *check_post_output task_run_MET_EnsembleStat_vx_#VAR#: <<: *task_GenEnsProd_NDAS envars: @@ -165,9 +152,13 @@ metatask_GenEnsProd_EnsembleStat_NDAS: METPLUSTOOLNAME: 'ENSEMBLESTAT' walltime: 01:00:00 dependency: - taskdep_genensprod: - attrs: - task: run_MET_GenEnsProd_vx_#VAR# + and: + taskdep_pb2nc: + attrs: + task: run_MET_Pb2nc_obs + taskdep_genensprod: + attrs: + task: run_MET_GenEnsProd_vx_#VAR# metatask_GridStat_CCPA_ensmeanprob_all_accums: var: diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 93caeaa7f2..5003047f4f 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -200,30 +200,40 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" # #----------------------------------------------------------------------- # -# Set the array of forecast hours for which to run the MET/METplus tool. -# This is done by starting with the full list of forecast hours for which -# there is forecast output and then removing from that list any forecast -# hours for which there is no corresponding observation data. -# -# Note that strictly speaking, this does not need to be done if the MET/ -# METplus tool being called is GenEnsProd (because this tool only operates -# on forecasts), but we run the check anyway in this case in order to -# keep the code here simpler and because the output of GenEnsProd for -# forecast hours with missing observations will not be used anyway in -# downstream verification tasks. -# -#----------------------------------------------------------------------- -# -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - base_dir="${OBS_INPUT_DIR}" \ - fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" +# Generate the list of forecast hours for which to run the specified +# METplus tool. +# +# If running the GenEnsProd tool, we set this to the list of forecast +# output times without filtering for the existence of observation files +# corresponding to those times. This is because GenEnsProd operates +# only on forecasts; it does not need observations. +# +# On the other hand, if running the EnsembleStat tool, we set the list of +# forecast hours to a set of times that takes into consideration whether +# or not observations exist. We do this by starting with the full list +# of forecast times for which there is forecast output and then removing +# from that list any times for which there is no corresponding observations. +# +#----------------------------------------------------------------------- +# +if [ "${MetplusToolName}" = "GenEnsProd" ]; then + set_vx_fhr_list_no_missing \ + fcst_len_hrs="${FCST_LEN_HRS}" \ + field="$VAR" \ + accum_hh="${ACCUM_HH}" \ + outvarname_fhr_list_no_missing="FHR_LIST" +elif [ "${MetplusToolName}" = "EnsembleStat" ]; then + set_vx_fhr_list \ + cdate="${CDATE}" \ + fcst_len_hrs="${FCST_LEN_HRS}" \ + field="$VAR" \ + accum_hh="${ACCUM_HH}" \ + base_dir="${OBS_INPUT_DIR}" \ + fn_template="${OBS_INPUT_FN_TEMPLATE}" \ + check_accum_contrib_files="FALSE" \ + num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ + outvarname_fhr_list="FHR_LIST" +fi # #----------------------------------------------------------------------- # @@ -290,7 +300,7 @@ metplus_log_fn="metplus.log.${metplus_log_bn}" # #----------------------------------------------------------------------- # -# Load the yaml-like file containing the configuration for ensemble +# Load the yaml-like file containing the configuration for ensemble # verification. # #----------------------------------------------------------------------- @@ -368,7 +378,7 @@ settings="\ # # Verification configuration dictionary. # -'vx_config_dict': +'vx_config_dict': ${vx_config_dict:-} " @@ -380,7 +390,7 @@ uw template render \ -o ${metplus_config_fp} \ --verbose \ --values-file "${tmpfile}" \ - --search-path "/" + --search-path "/" err=$? rm $tmpfile diff --git a/ush/set_vx_fhr_list.sh b/ush/set_vx_fhr_list.sh index 5cefc78365..8101e927e5 100644 --- a/ush/set_vx_fhr_list.sh +++ b/ush/set_vx_fhr_list.sh @@ -1,14 +1,24 @@ # #----------------------------------------------------------------------- # -# This file defines a function that generates a list of forecast hours -# such that for each hour there exist a corresponding obs file. It does -# this by first generating a generic sequence of forecast hours and then -# removing from that sequence any hour for which there is no obs file. +# This file defines functions used to generate sets of forecast hours for +# which verification will be performed. # #----------------------------------------------------------------------- # -function set_vx_fhr_list() { + +function set_vx_fhr_list_no_missing() { +# +#----------------------------------------------------------------------- +# +# This function sets the forecast hours for which verification will be +# performed under the assumption that that the data file (which may be +# a forecast output file or an observation file) for each hour is available +# (i.e. that there are no missing files). +# +#----------------------------------------------------------------------- +# + # #----------------------------------------------------------------------- # @@ -48,15 +58,10 @@ function set_vx_fhr_list() { #----------------------------------------------------------------------- # local valid_args=( \ - "cdate" \ "fcst_len_hrs" \ "field" \ "accum_hh" \ - "base_dir" \ - "fn_template" \ - "check_accum_contrib_files" \ - "num_missing_files_max" \ - "outvarname_fhr_list" \ + "outvarname_fhr_list_no_missing" \ ) process_args valid_args "$@" # @@ -76,27 +81,15 @@ function set_vx_fhr_list() { # #----------------------------------------------------------------------- # - local crnt_tmpl \ - crnt_tmpl_esc \ - fhr \ - fhr_array \ - fhr_int \ + local fhr_array \ fhr_list \ + fhr_int \ fhr_min \ - fhr_max \ - fn \ - fp \ - i \ - num_fcst_hrs \ - num_missing_files \ - regex_search_tmpl \ - remainder \ - skip_this_fhr + fhr_max # #----------------------------------------------------------------------- # -# Create array containing set of forecast hours for which we will check -# for the existence of corresponding observation or forecast file. +# Create the array of forecast hours. # #----------------------------------------------------------------------- # @@ -140,6 +133,157 @@ this field (field): fhr_max="${fcst_len_hrs}" fhr_array=($( seq ${fhr_min} ${fhr_int} ${fhr_max} )) + + # Express the forecast hour array as a (scalar) string containing a comma + # (and space) separated list of the elements of fhr_array. + fhr_list=$( printf "%s, " "${fhr_array[@]}" ) + fhr_list=$( echo "${fhr_list}" | $SED "s/, $//g" ) + + print_info_msg "$VERBOSE" "\ +Initial (i.e. before filtering for missing files) set of forecast hours +(saved in a scalar string variable) is: + fhr_list = \"${fhr_list}\" +" +# +#----------------------------------------------------------------------- +# +# Set output variables. +# +#----------------------------------------------------------------------- +# + if [ ! -z "${outvarname_fhr_list_no_missing}" ]; then + printf -v ${outvarname_fhr_list_no_missing} "%s" "${fhr_list}" + fi +# +#----------------------------------------------------------------------- +# +# Restore the shell options saved at the beginning of this script/function. +# +#----------------------------------------------------------------------- +# + { restore_shell_opts; } > /dev/null 2>&1 + +} + + + +# +#----------------------------------------------------------------------- +# +# This function generates a list of forecast hours such that for each +# such hour, there exists a corresponding data file with a name of the +# form specified by the template fn_template. Depending on fn_template, +# this file may contain forecast or observation data. This function +# generates this forecast hour list by first generating a set of hours +# under the assumption that there is a corresponding data file for each +# hour and then removing from that list any hour for which there is no +# data file. +# +#----------------------------------------------------------------------- +# +function set_vx_fhr_list() { +# +#----------------------------------------------------------------------- +# +# Save current shell options (in a global array). Then set new options +# for this script/function. +# +#----------------------------------------------------------------------- +# + { save_shell_opts; set -u +x; } > /dev/null 2>&1 +# +#----------------------------------------------------------------------- +# +# Get the full path to the file in which this script/function is located +# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in +# which the file is located (scrfunc_dir). +# +#----------------------------------------------------------------------- +# + local scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) + local scrfunc_fn=$( basename "${scrfunc_fp}" ) + local scrfunc_dir=$( dirname "${scrfunc_fp}" ) +# +#----------------------------------------------------------------------- +# +# Get the name of this function. +# +#----------------------------------------------------------------------- +# + local func_name="${FUNCNAME[0]}" +# +#----------------------------------------------------------------------- +# +# Specify the set of valid argument names for this script/function. Then +# process the arguments provided to this script/function (which should +# consist of a set of name-value pairs of the form arg1="value1", etc). +# +#----------------------------------------------------------------------- +# + local valid_args=( \ + "cdate" \ + "fcst_len_hrs" \ + "field" \ + "accum_hh" \ + "base_dir" \ + "fn_template" \ + "check_accum_contrib_files" \ + "num_missing_files_max" \ + "outvarname_fhr_list" \ + ) + process_args valid_args "$@" +# +#----------------------------------------------------------------------- +# +# For debugging purposes, print out values of arguments passed to this +# script. Note that these will be printed out only if VERBOSE is set to +# TRUE. +# +#----------------------------------------------------------------------- +# + print_input_args valid_args +# +#----------------------------------------------------------------------- +# +# Declare local variables. +# +#----------------------------------------------------------------------- +# + local crnt_tmpl \ + crnt_tmpl_esc \ + fhr \ + fhr_array \ + fhr_list \ + fn \ + fp \ + i \ + num_fcst_hrs \ + num_missing_files \ + regex_search_tmpl \ + remainder \ + skip_this_fhr +# +#----------------------------------------------------------------------- +# +# For the specified field, generate the set of forecast hours at which +# verification will be performed under the assumption that for each such +# hour, the corresponding forecast and/or observation files exists. Thus, +# this set of forecast hours is an initial guess for the hours at which +# vx will be performed. +# +#----------------------------------------------------------------------- +# + set_vx_fhr_list_no_missing \ + fcst_len_hrs="${fcst_len_hrs}" \ + field="${field}" \ + accum_hh="${accum_hh}" \ + outvarname_fhr_list_no_missing="fhr_list_no_missing" + + # For convenience, save the scalar variable fhr_list_no_missing to a bash + # array. + fhr_array=$( printf "%s" "${fhr_list_no_missing}" | $SED "s/,//g" ) + fhr_array=( ${fhr_array} ) + print_info_msg "$VERBOSE" "\ Initial (i.e. before filtering for missing files) set of forecast hours is: @@ -174,7 +318,7 @@ is: skip_this_fhr="FALSE" for (( j=0; j<${num_back_hrs}; j++ )); do # -# Use the provided template to set the name of/relative path to the file +# Use the provided template to set the name of/relative path to the file # Note that the while-loop below is over all METplus time string templates # of the form {...} in the template fn_template; it continues until all # such templates have been evaluated to actual time strings. @@ -195,7 +339,7 @@ is: # # Replace METplus time templates in fn with actual times. Note that # when using sed, we need to escape various characters (question mark, -# closing and opening curly braces, etc) in the METplus template in +# closing and opening curly braces, etc) in the METplus template in # order for the sed command below to work properly. # crnt_tmpl_esc=$( echo "${crnt_tmpl}" | \ @@ -253,8 +397,8 @@ METplus configuration file. # fhr_list=$( echo "${fhr_list}" | $SED "s/^,//g" ) print_info_msg "$VERBOSE" "\ -Final (i.e. after filtering for missing files) set of forecast hours is -(written as a single string): +Final (i.e. after filtering for missing files) set of forecast hours +(saved in a scalar string variable) is: fhr_list = \"${fhr_list}\" " # From 13aba39e140f116b9a553229bc975247e2282ec0 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 25 Jul 2024 15:54:28 -0600 Subject: [PATCH 021/260] Bug fixes after running WE2E vx suite. --- parm/wflow/verify_ens.yaml | 10 +++++++--- scripts/exregional_get_verif_obs.sh | 29 ++++++++++++++++++++++------- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index 4be4c5b47e..9f1079b505 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -248,6 +248,10 @@ metatask_PointStat_NDAS_ensmeanprob: FCST_LEVEL: 'all' FCST_THRESH: 'all' dependency: - taskdep: - attrs: - task: run_MET_GenEnsProd_vx_#VAR# + and: + taskdep_pb2nc: + attrs: + task: run_MET_Pb2nc_obs + taskdep_genensprod: + attrs: + task: run_MET_GenEnsProd_vx_#VAR# diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index b615f05ffa..72be333b82 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -134,10 +134,11 @@ unix_init_DATE="${iyyyy}-${imm}-${idd} ${ihh}:00:00" # This awk expression gets the last item of the list $FHR fcst_length=$(echo ${FHR} | awk '{ print $NF }') +vdate_last=$($DATE_UTIL -d "${unix_init_DATE} ${fcst_length} hours" +%Y%m%d%H) if [[ ${OBTYPE} == "NDAS" ]]; then - vdate_last=$($DATE_UTIL -d "${unix_init_DATE} ${fcst_length} hours" +%Y%m%d%H) vhh_last=$(echo ${vdate_last} | cut -c9-10) - hours_to_add=$(( vhh_last + 6 - (vhh_last % 6) )) + #hours_to_add=$(( vhh_last + 6 - (vhh_last % 6) )) + hours_to_add=$(( 6 - (vhh_last % 6) )) fcst_length_rounded_up=$(( fcst_length + hours_to_add )) # vdate_last_rounded_up=$($DATE_UTIL -d "${unix_init_DATE} ${fcst_length_rounded_up} hours" +%Y%m%d%H) fcst_length=${fcst_length_rounded_up} @@ -218,7 +219,9 @@ echo "ihh = ${ihh}" ccpa_fp_proc="${ccpa_day_dir_proc}/${ccpa_fn}" # Store the full path to the processed file in a list for later use. - processed_fp_list+=(${ccpa_fp_proc}) + if [ ${vyyyymmdd}${vhh} -le ${vdate_last} ]; then + processed_fp_list+=(${ccpa_fp_proc}) + fi # Check if the CCPA grib2 file for the current valid time already exists # at its procedded location on disk. If so, skip and go to the next valid @@ -496,7 +499,9 @@ echo "ihh = ${ihh}" mrms_fp_proc="${mrms_day_dir_proc}/${mrms_fn}" # Store the full path to the processed file in a list for later use. - processed_fp_list+=(${mrms_fp_proc}) + if [ ${vyyyymmdd}${vhh} -le ${vdate_last} ]; then + processed_fp_list+=(${mrms_fp_proc}) + fi # Check if the processed MRMS grib2 file for the current field and valid # time already exists on disk. If so, skip this valid time and go to the @@ -674,7 +679,13 @@ echo "vdate_p1h = ${vdate_p1h}" ndas_fp_proc="${ndas_basedir_proc}/${ndas_fn}" # Store the full path to the processed file in a list for later use. - processed_fp_list+=(${ndas_fp_proc}) +echo +echo "LLLLLLLLLLLLL" + if [ ${vyyyymmdd}${vhh} -le ${vdate_last} ]; then +echo "MMMMMMMMMMMMM" +echo "processed_fp_list = |${processed_fp_list[@]}" + processed_fp_list+=(${ndas_fp_proc}) + fi # Check if the processed NDAS prepbufr file for the current valid time # already exists on disk. If so, skip this valid time and go to the next @@ -931,7 +942,11 @@ done # #----------------------------------------------------------------------- # +echo +echo "KKKKKKKKKKKK" +echo "processed_fp_list = |${processed_fp_list[@]}" num_proc_files=${#processed_fp_list[@]} +echo "num_proc_files = ${num_proc_files}" for (( i=0; i<${num_proc_files}; i++ )); do obs_fp="${processed_fp_list[$i]}" while [[ ! -f "${obs_fp}" ]]; do @@ -947,8 +962,8 @@ done # #----------------------------------------------------------------------- # -remove_raw="TRUE" -#remove_raw="FALSE" +#remove_raw="TRUE" +remove_raw="FALSE" if [ "${remove_raw}" = "TRUE" ]; then rm -rf ${OBS_DIR}/raw_* fi From 860f62e3aee3dd71d3eb23765e9fd846b61b5444 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 25 Jul 2024 15:55:20 -0600 Subject: [PATCH 022/260] Bugfix to dependencies of ensemble vx tasks that come after GenEnsProd due to changes to dependencies of GenEnsProd tasks in previous commit(s). --- parm/wflow/verify_ens.yaml | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index 9f1079b505..4d01281b6d 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -180,9 +180,13 @@ metatask_GridStat_CCPA_ensmeanprob_all_accums: FCST_LEVEL: 'A#ACCUM_HH#' FCST_THRESH: 'all' dependency: - taskdep: - attrs: - task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h + and: + taskdep_pcpcombine_obs: + attrs: + task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h + taskdep_genensprod: + attrs: + task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h metatask_GridStat_NOHRSC_ensmeanprob_all_accums: var: @@ -204,9 +208,13 @@ metatask_GridStat_NOHRSC_ensmeanprob_all_accums: FCST_LEVEL: 'A#ACCUM_HH#' FCST_THRESH: 'all' dependency: - taskdep: - attrs: - task: run_MET_GenEnsProd_vx_ASNOW#ACCUM_HH#h + and: + taskdep: + attrs: + task: get_obs_nohrsc + taskdep_genensprod: + attrs: + task: run_MET_GenEnsProd_vx_ASNOW#ACCUM_HH#h metatask_GridStat_MRMS_ensprob: var: @@ -224,9 +232,13 @@ metatask_GridStat_MRMS_ensprob: FCST_LEVEL: 'L0' FCST_THRESH: 'all' dependency: - taskdep: - attrs: - task: run_MET_GenEnsProd_vx_#VAR# + and: + taskdep_get_obs_mrms: + attrs: + task: get_obs_mrms + taskdep_genensprod: + attrs: + task: run_MET_GenEnsProd_vx_#VAR# metatask_PointStat_NDAS_ensmeanprob: var: From e54ec16d6e7f12cc095e053d00388081dc7ffe60 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 26 Jul 2024 00:11:17 -0600 Subject: [PATCH 023/260] Bug fixes to get all WE2E vx tests to succeed. --- scripts/exregional_get_verif_obs.sh | 73 +++++++++++++++++++---------- 1 file changed, 47 insertions(+), 26 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 72be333b82..f3a52710c5 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -124,7 +124,7 @@ idd=$(echo ${PDY} | cut -c7-8) ihh=${cyc} echo -echo "HELLO GGGGGGGG" +echo "HELLO AAAAAAAAAAA" iyyyymmddhh=${PDY}${cyc} echo "iyyyymmddhh = ${iyyyymmddhh}" @@ -134,14 +134,17 @@ unix_init_DATE="${iyyyy}-${imm}-${idd} ${ihh}:00:00" # This awk expression gets the last item of the list $FHR fcst_length=$(echo ${FHR} | awk '{ print $NF }') +echo +echo "BYE 00000000" vdate_last=$($DATE_UTIL -d "${unix_init_DATE} ${fcst_length} hours" +%Y%m%d%H) if [[ ${OBTYPE} == "NDAS" ]]; then +echo "BYE 111111111" vhh_last=$(echo ${vdate_last} | cut -c9-10) #hours_to_add=$(( vhh_last + 6 - (vhh_last % 6) )) hours_to_add=$(( 6 - (vhh_last % 6) )) fcst_length_rounded_up=$(( fcst_length + hours_to_add )) # vdate_last_rounded_up=$($DATE_UTIL -d "${unix_init_DATE} ${fcst_length_rounded_up} hours" +%Y%m%d%H) - fcst_length=${fcst_length_rounded_up} +# fcst_length=${fcst_length_rounded_up} fi # Make sure fcst_length isn't octal (leading zero) @@ -152,14 +155,17 @@ current_fcst=0 while [[ ${current_fcst} -le ${fcst_length} ]]; do echo -echo "HELLO GGGGGGGG" +echo "HELLO BBBBBBBBBBB" echo "current_fcst = ${current_fcst}" # Calculate valid date info using date utility vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" +%Y%m%d%H) - #unix_vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" "+%Y-%m-%d %H:00:00") + unix_vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" "+%Y-%m-%d %H:00:00") vyyyymmdd=$(echo ${vdate} | cut -c1-8) vhh=$(echo ${vdate} | cut -c9-10) +echo +echo "BYE 222222222" +echo "vhh = ${vhh}" # Calculate valid date + 1 day; this is needed because some obs files # are stored in the *next* day's 00h directory @@ -167,11 +173,10 @@ echo "current_fcst = ${current_fcst}" vyyyymmdd_p1d=$(echo ${vdate_p1d} | cut -c1-8) echo -echo "HELLO HHHHHHHH" +echo "HELLO CCCCCCCCCC" echo "vyyyymmdd = ${vyyyymmdd}" echo "vyyyymmdd_p1d = ${vyyyymmdd_p1d}" echo "ihh = ${ihh}" -#exit #remove leading zero again, this time keep original vhh_noZero=$((10#${vhh})) @@ -655,7 +660,8 @@ echo "ihh = ${ihh}" vhh_p1h_noZero=$((10#${vhh_p1h})) echo "" -echo "HELLO PPPPPPP" +echo "HELLO DDDDDDDDDDD" +echo "vdate = ${vdate}" echo "vyyyymmdd = ${vyyyymmdd}" echo "vhh = ${vhh}" echo "vhh_noZero = ${vhh_noZero}" @@ -672,7 +678,7 @@ echo "vdate_p1h = ${vdate_p1h}" # This is the name of the processed file. Note that this is not the # same as the name of the raw file, i.e. the file extracted from the # archive (tar) file retrieved below by the retrieve_data.py script. - ndas_fn="prepbufr.ndas.${vyyyymmdd}${vhh}" + ndas_fn="prepbufr.ndas.${vdate}" # Full path to the processed NDAS prepbufr file for the current field and # valid time. @@ -680,9 +686,9 @@ echo "vdate_p1h = ${vdate_p1h}" # Store the full path to the processed file in a list for later use. echo -echo "LLLLLLLLLLLLL" - if [ ${vyyyymmdd}${vhh} -le ${vdate_last} ]; then -echo "MMMMMMMMMMMMM" +echo "EEEEEEEEEEEEEE" + if [ ${vdate} -le ${vdate_last} ]; then +echo "FFFFFFFFFFFFFF" echo "processed_fp_list = |${processed_fp_list[@]}" processed_fp_list+=(${ndas_fp_proc}) fi @@ -717,7 +723,7 @@ echo "processed_fp_list = |${processed_fp_list[@]}" mv_or_cp="cp" echo "" -echo "HELLO AAAAA" +echo "HELLO GGGGGGGGGGGGG" echo "vhh_noZero = ${vhh_noZero}" echo "vhh_p1h_noZero = ${vhh_p1h_noZero}" @@ -726,7 +732,22 @@ echo "vhh_p1h_noZero = ${vhh_p1h_noZero}" # 5 hours preceeding it) if the hour-of-day corresponding to the current # valid time plus 1 hour corresponds to one of 0, 6, 12, and 18. if [[ ${vhh_p1h_noZero} -eq 0 || ${vhh_p1h_noZero} -eq 6 || \ - ${vhh_p1h_noZero} -eq 12 || ${vhh_p1h_noZero} -eq 18 ]]; then + ${vhh_p1h_noZero} -eq 12 || ${vhh_p1h_noZero} -eq 18 || \ + ${current_fcst} -eq ${fcst_length} ]]; then + + if [[ ${vhh_p1h_noZero} -eq 0 || ${vhh_p1h_noZero} -eq 6 || \ + ${vhh_p1h_noZero} -eq 12 || ${vhh_p1h_noZero} -eq 18 ]]; then + unix_vdate_archive="${unix_vdate_p1h}" + vdate_archive="${vdate_p1h}" + vyyyymmdd_archive="${vyyyymmdd_p1h}" + vhh_archive=${vhh_p1h} + elif [[ ${current_fcst} -eq ${fcst_length} ]]; then + hours_to_archive=$(( 6 - (vhh % 6) )) + unix_vdate_archive=$($DATE_UTIL -d "${unix_vdate} ${hours_to_archive} hours" "+%Y-%m-%d %H:00:00") + vdate_archive=$($DATE_UTIL -d "${unix_vdate} ${hours_to_archive} hours" +%Y%m%d%H) + vyyyymmdd_archive=$(echo ${vdate_archive} | cut -c1-8) + vhh_archive=$(echo ${vdate_archive} | cut -c9-10) + fi # Base directory that will contain the 6-hourly subdirectories in which # the NDAS prepbufr files retrieved from archive files will be placed, @@ -734,8 +755,8 @@ echo "vhh_p1h_noZero = ${vhh_p1h_noZero}" # We refer to these as the "raw" NDAS base and 6-hourly directories # because they contain files as they are found in the archives before # any processing by this script. - ndas_basedir_raw="${ndas_basedir_proc}/raw_day${vyyyymmdd_p1h}" - ndas_day_dir_raw="${ndas_basedir_raw}/${vdate_p1h}" + ndas_basedir_raw="${ndas_basedir_proc}/raw_day${vyyyymmdd_archive}" + ndas_day_dir_raw="${ndas_basedir_raw}/${vdate_archive}" # Check if the raw 6-hourly directory already exists on disk. If so, it # means the NDAS prepbufr files for the current valid hour and the 5 hours @@ -746,9 +767,9 @@ echo "vhh_p1h_noZero = ${vhh_p1h_noZero}" if [[ -d "${ndas_day_dir_raw}" ]]; then print_info_msg " -${OBTYPE} raw directory for day ${vdate_p1h} exists on disk: +${OBTYPE} raw 6-hourly directory ${vdate_archive} exists on disk: ndas_day_dir_raw = \"${ndas_day_dir_raw}\" -This means NDAS files for the current valid time (${vyyyymmdd}) and the +This means NDAS files for the current valid time (${vdate}) and the 5 hours preceeding it have been or are being retrieved by a get_obs_ndas workflow task for another cycle. Thus, we will NOT attempt to retrieve NDAS data for the current valid time from remote locations." @@ -773,7 +794,7 @@ NDAS data for the current valid time from remote locations." --debug \ --file_set obs \ --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vdate_p1h} \ + --cycle_date ${vdate_archive} \ --data_stores hpss \ --data_type NDAS_obs \ --output_path ${ndas_day_dir_raw} \ @@ -789,19 +810,19 @@ NDAS data for the current valid time from remote locations." " # Create a flag file that can be used to confirm the completion of the - # retrieval of all files for the 6-hour interval ending in vdate_p1h. + # retrieval of all files for the 6-hour interval ending in vdate_archive. touch ${ndas_day_dir_raw}/pull_completed.txt fi # Make sure the retrieval process for the 6-hour interval ending in - # vdate_p1h (which may have been executed above for this cycle or for + # vdate_archive (which may have been executed above for this cycle or for # another cycle) has completed by checking for the existence of the flag # file that marks completion. If not, keep checking until the flag file # shows up. while [[ ! -f "${ndas_day_dir_raw}/pull_completed.txt" ]]; do echo "Waiting for completion of the NDAS obs retrieval process for the" - echo "6-hour interval ending on ${vdate_p1h} ..." + echo "6-hour interval ending on ${vdate_archive} ..." sleep 5s done @@ -829,11 +850,11 @@ NDAS data for the current valid time from remote locations." # observations than tm00 for the equivalent time. for tm in $(seq 6 -1 1); do # for tm in $(seq --format="%02g" 6 -1 1); do - vdate_p1h_tm=$($DATE_UTIL -d "${unix_vdate_p1h} ${tm} hours ago" +%Y%m%d%H) - if [ ${vdate_p1h_tm} -le ${vdate_last} ]; then + vdate_archive_tm=$($DATE_UTIL -d "${unix_vdate_archive} ${tm} hours ago" +%Y%m%d%H) + if [[ ${vdate_archive_tm} -le ${vdate_last} ]]; then tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') - ${mv_or_cp} ${ndas_day_dir_raw}/nam.t${vhh_p1h}z.prepbufr.tm${tm2}.nr \ - ${ndas_basedir_proc}/prepbufr.ndas.${vdate_p1h_tm} + ${mv_or_cp} ${ndas_day_dir_raw}/nam.t${vhh_archive}z.prepbufr.tm${tm2}.nr \ + ${ndas_basedir_proc}/prepbufr.ndas.${vdate_archive_tm} fi done @@ -943,7 +964,7 @@ done #----------------------------------------------------------------------- # echo -echo "KKKKKKKKKKKK" +echo "HHHHHHHHHHHHHHHH" echo "processed_fp_list = |${processed_fp_list[@]}" num_proc_files=${#processed_fp_list[@]} echo "num_proc_files = ${num_proc_files}" From 8e8a1c10defb814f153a5df88acff5ddfda098e9 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 28 Jul 2024 09:36:19 -0600 Subject: [PATCH 024/260] Increase default wallclock time for get_obs_ccpa tasks since they're tending to time out for 48-hour forecasts. --- parm/wflow/verify_pre.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index da43336a0d..c99bd4b4e6 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -33,7 +33,7 @@ task_get_obs_ccpa: native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" - walltime: 00:45:00 + walltime: 02:00:00 task_get_obs_nohrsc: <<: *default_task_verify_pre From a34d99339e67cba2bd9abf474dd1543e77e1433f Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 28 Jul 2024 09:39:34 -0600 Subject: [PATCH 025/260] For each cycle except the last one, in the PcpCombine_obs tasks make sure PcpCombine operates only on those hours unique to the cycle, i.e. for those times starting from the initial time of the cycle to just before the initial time of the next cycle. For the PcpCombine_obs task for the last cycle, allow it to operate on all hours of that cycle's forecast. This ensures that the PcpCombine tasks for the various cycles do not clobber each other's output. Accordingly, change the dependencies of downstream tasks that depend on PcpCombine obs output to make sure they include all PcpCombine_obs tasks that cover the forecast period of the that downstream task's cycle. --- parm/wflow/verify_det.yaml | 39 +++++++++++++++++++++--- parm/wflow/verify_ens.yaml | 36 +++++++++++++++++++--- scripts/exregional_run_met_pcpcombine.sh | 21 ++++++++++++- 3 files changed, 86 insertions(+), 10 deletions(-) diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index 3acfa3e836..f69429bbe4 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -47,12 +47,41 @@ metatask_GridStat_CCPA_all_accums_all_mems: walltime: 02:00:00 dependency: and: - taskdep_pcpcombine_obs: + # The following will include dependencies on the PcpCombine_obs task for + # the current cycle as well as those from other cycles that process CCPA + # obs at valid times that are part of the current cycle's forecast. This + # dependence is necessary because each PcpCombine_obs task except the + # last one processes obs at valid times starting with the initial time + # of the current cycle's forecast and ending with the last output time + # of this forecast that is before the initial time of the next cycle's + # forecast. It will also include a dependency on the PcpCombine_fcst + # task for the current cycle. + taskdep_pcpcombine_obs_fcst: attrs: - task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h - taskdep_pcpcombine_fcst: - attrs: - task: run_MET_PcpCombine_fcst_APCP#ACCUM_HH#h_mem#mem# + task: '{%- set num_cycl_dep = (workflow.FCST_LEN_HRS/workflow.INCR_CYCL_FREQ)|round(0,"ceil")|int %} + {%- set num_cycl_dep = num_cycl_dep %} + {%- for n in range(0, num_cycl_dep) %} + {%- set cycl_offset = n*workflow.INCR_CYCL_FREQ %} + {%- if n == 0 %} + {{- "run_MET_PcpCombine_obs_APCP#ACCUM_HH#h\" cycle_offset=\"%02d:00:00\"/>\n" % cycl_offset }} + {%- elif ((n > 0) and (n < num_cycl_dep-1)) %} + {{- " \n"}} + {{- " \n"}} + {{- " \n" % cycl_offset}} + {{- " \n"}} + {{- " \n" % cycl_offset}} + {{- " \n"}} + {%- elif (n == num_cycl_dep-1) %} + {{- " \n"}} + {{- " \n"}} + {{- " \n" % cycl_offset }} + {{- " \n"}} + {{- " \n" % cycl_offset }} + {{- " \n" }} + {%- endif %} + {%- endfor %} + {{- " \n" % cycl_offset }} + {%- elif ((n > 0) and (n < num_cycl_dep-1)) %} + {{- " \n"}} + {{- " \n"}} + {{- " \n" % cycl_offset}} + {{- " \n"}} + {{- " \n" % cycl_offset}} + {{- " \n"}} + {%- elif (n == num_cycl_dep-1) %} + {{- " \n"}} + {{- " \n"}} + {{- " \n" % cycl_offset }} + {{- " \n"}} + {{- " \n" % cycl_offset }} + {{- " \n" }} + {%- endif %} + {%- endfor %}' taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h @@ -182,8 +211,7 @@ metatask_GridStat_CCPA_ensmeanprob_all_accums: dependency: and: taskdep_pcpcombine_obs: - attrs: - task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h + <<: &taskdep_pcpcombine_obs taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 6e64d102e6..20ae1a9794 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -212,9 +212,28 @@ elif [ "${FCST_OR_OBS}" = "OBS" ]; then num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" fi +# If processing obs, then for all cylces except the last one, calculate +# a "forecast length" that will hours up to but not including the initial +# (zeroth) hour of the next cycle. For the last cycle, take the "forecast +# length" of the obs to be the same as that of the forecast for the cycle. +# This ensures that the PcpCombine_obs tasks for different cycles do not +# overwrite or clobber output from another cycle (because with this +# approach, the valid times on which the current PcpCombine_obs task is +# operating is distinct from the ones for the PcpCombine_obs tasks for +# every other cycle). +fcst_len_hrs="${FCST_LEN_HRS}" +if [ "${FCST_OR_OBS}" = "OBS" ]; then + yyyymmddhhmn="${PDY}${cyc}00" + if [ ${yyyymmddhhmn} -lt ${DATE_LAST_CYCL} ] && \ + [ ${FCST_LEN_HRS} -ge ${INCR_CYCL_FREQ} ]; then + output_incr_hrs="1" + fcst_len_hrs=$((INCR_CYCL_FREQ - output_incr_hrs + 1)) + fi +fi + set_vx_fhr_list \ cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ + fcst_len_hrs="${fcst_len_hrs}" \ field="$VAR" \ accum_hh="${ACCUM_HH}" \ base_dir="${base_dir}" \ From 5550a41a1b31e104db3c39f02f95aa3edb8361ae Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 28 Jul 2024 10:16:43 -0600 Subject: [PATCH 026/260] Bug fix in yaml. --- parm/wflow/verify_ens.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index 995f362926..2d62b803b8 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -211,7 +211,7 @@ metatask_GridStat_CCPA_ensmeanprob_all_accums: dependency: and: taskdep_pcpcombine_obs: - <<: &taskdep_pcpcombine_obs + <<: *taskdep_pcpcombine_obs taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h From c76ed1afdc3b9e18c59c36cf8567588f3e25fa1f Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 29 Jul 2024 11:50:16 -0600 Subject: [PATCH 027/260] Fix still-existing problem of file clobbering with get_obs_mrms and possibly also get_obs_ndas by putting in sleep commands. --- scripts/exregional_get_verif_obs.sh | 36 +++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index f3a52710c5..a2759f7ef5 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -548,10 +548,21 @@ echo "ihh = ${ihh}" # the archive (tar) files. If so, skip the retrieval process. If not, # proceed to retrieve all the files and place them in the raw daily # directory. + # + # Note that despite the check on the existence of the raw daily directory + # below, it is possible for two get_obs_mrms tasks to try to retrieve + # obs for the same day. To minimize this possibility, sleep for a random + # number of seconds (with a maximum wait of maxwait seconds set below) + # before performing the directory existence check + maxwait=30 + sleep_duration_secs=$((RANDOM % maxwait)) + echo "Sleeping for $duration seconds..." + sleep "${sleep_duration_secs}s" + if [[ -d "${mrms_day_dir_raw}" ]]; then - echo "${OBTYPE} directory for day ${vyyyymmdd} exists on disk:" - echo " mrms_day_dir_proc = \"${mrms_day_dir_proc}\"" + echo "${OBTYPE} raw daily directory for day ${vyyyymmdd} exists on disk:" + echo " mrms_day_dir_raw = \"${mrms_day_dir_raw}\"" echo "This means MRMS files for all hours of the current valid day (${vyyyymmdd}) have been or are being retrieved." echo "Thus, we will NOT attempt to retrieve MRMS data for the current valid time from remote locations." @@ -650,7 +661,7 @@ echo "ihh = ${ihh}" # elif [[ ${OBTYPE} == "NDAS" ]]; then - # Calculate valid date plus 1 hour. This is needed because we need to + # Calculate valid date plus 1 hour. This is needed because we need to # check whether this date corresponds to one of the valid hours-of-day # 00, 06, 12, and 18 on which the NDAS archives are provided. unix_vdate_p1h=$($DATE_UTIL -d "${unix_init_DATE} $((current_fcst+1)) hours" "+%Y-%m-%d %H:00:00") @@ -675,7 +686,7 @@ echo "vdate_p1h = ${vdate_p1h}" # Name of the NDAS prepbufr file for the current valid time that will # appear in the processed daily subdirectory after this script finishes. - # This is the name of the processed file. Note that this is not the + # This is the name of the processed file. Note that this is not the # same as the name of the raw file, i.e. the file extracted from the # archive (tar) file retrieved below by the retrieve_data.py script. ndas_fn="prepbufr.ndas.${vdate}" @@ -695,7 +706,7 @@ echo "processed_fp_list = |${processed_fp_list[@]}" # Check if the processed NDAS prepbufr file for the current valid time # already exists on disk. If so, skip this valid time and go to the next - # one. + # one. if [[ -f "${ndas_fp_proc}" ]]; then echo "${OBTYPE} file exists on disk:" @@ -764,6 +775,17 @@ echo "vhh_p1h_noZero = ${vhh_p1h_noZero}" # from the archive (tar) files. If so, skip the retrieval process. If # not, proceed to retrieve the archive file, extract the prepbufr files # from it, and place them in the raw daily directory. + # + # Note that despite the check on the existence of the raw daily directory + # below, it is possible for two get_obs_mrms tasks to try to retrieve + # obs for the same day. To minimize this possibility, sleep for a random + # number of seconds (with a maximum wait of maxwait seconds set below) + # before performing the directory existence check + maxwait=30 + sleep_duration_secs=$((RANDOM % maxwait)) + echo "Sleeping for $duration seconds..." + sleep "${sleep_duration_secs}s" + if [[ -d "${ndas_day_dir_raw}" ]]; then print_info_msg " @@ -949,6 +971,7 @@ NDAS data for the current valid time from remote locations." current_fcst=$((${current_fcst} + 1)) done +echo "SSSSSSSSSSSSSSSS" # #----------------------------------------------------------------------- # @@ -956,14 +979,13 @@ done # for this cycle are either being created (by a get_obs_... task for # another cycle) or have already been created (either by this get_obs_... # task or one for another cycle). In case they are still being created, -# make sure they have in fact been created before exiting this script. +# make sure they have in fact been created before exiting this script. # If we don't do this, it is possible for this get_obs_... task to complete # successfully but still have processed obs files for some forecast hours # not yet created, which is undesirable. # #----------------------------------------------------------------------- # -echo echo "HHHHHHHHHHHHHHHH" echo "processed_fp_list = |${processed_fp_list[@]}" num_proc_files=${#processed_fp_list[@]} From 3f1dea1ebd2f4755bf268a78984c5125ec3476af Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 29 Jul 2024 11:51:57 -0600 Subject: [PATCH 028/260] Improvements to jinja2 code to put in dependencies from other cycles. --- parm/wflow/verify_det.yaml | 27 ++++++++++----------------- parm/wflow/verify_ens.yaml | 19 ++++++------------- 2 files changed, 16 insertions(+), 30 deletions(-) diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index f69429bbe4..35358c9b67 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -54,9 +54,8 @@ metatask_GridStat_CCPA_all_accums_all_mems: # last one processes obs at valid times starting with the initial time # of the current cycle's forecast and ending with the last output time # of this forecast that is before the initial time of the next cycle's - # forecast. It will also include a dependency on the PcpCombine_fcst - # task for the current cycle. - taskdep_pcpcombine_obs_fcst: + # forecast. + taskdep_pcpcombine_obs: attrs: task: '{%- set num_cycl_dep = (workflow.FCST_LEN_HRS/workflow.INCR_CYCL_FREQ)|round(0,"ceil")|int %} {%- set num_cycl_dep = num_cycl_dep %} @@ -64,24 +63,18 @@ metatask_GridStat_CCPA_all_accums_all_mems: {%- set cycl_offset = n*workflow.INCR_CYCL_FREQ %} {%- if n == 0 %} {{- "run_MET_PcpCombine_obs_APCP#ACCUM_HH#h\" cycle_offset=\"%02d:00:00\"/>\n" % cycl_offset }} - {%- elif ((n > 0) and (n < num_cycl_dep-1)) %} - {{- " \n"}} - {{- " \n"}} - {{- " \n" % cycl_offset}} - {{- " \n"}} - {{- " \n" % cycl_offset}} - {{- " \n"}} - {%- elif (n == num_cycl_dep-1) %} - {{- " \n"}} - {{- " \n"}} - {{- " \n" % cycl_offset }} - {{- " \n"}} + {%- else %} + {{- " \n" }} + {{- " \n" % cycl_offset }} {{- " \n" % cycl_offset }} {{- " \n" }} {%- endif %} {%- endfor %} - {{- " \n" }} + {{- " \n" % cycl_offset }} - {%- elif ((n > 0) and (n < num_cycl_dep-1)) %} - {{- " \n"}} - {{- " \n"}} - {{- " \n" % cycl_offset}} - {{- " \n"}} - {{- " \n" % cycl_offset}} - {{- " \n"}} - {%- elif (n == num_cycl_dep-1) %} - {{- " \n"}} - {{- " \n"}} - {{- " \n" % cycl_offset }} - {{- " \n"}} + {%- else %} + {{- " \n" }} + {{- " \n" % cycl_offset }} {{- " \n" % cycl_offset }} {{- " \n" }} {%- endif %} - {%- endfor %}' + {%- endfor %} + {{- " \n" }} + {{- " Date: Tue, 30 Jul 2024 17:47:56 -0600 Subject: [PATCH 029/260] Bug fix. --- scripts/exregional_get_verif_obs.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index a2759f7ef5..314273ba93 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -556,7 +556,7 @@ echo "ihh = ${ihh}" # before performing the directory existence check maxwait=30 sleep_duration_secs=$((RANDOM % maxwait)) - echo "Sleeping for $duration seconds..." + echo "Sleeping for ${sleep_duration_secs} seconds..." sleep "${sleep_duration_secs}s" if [[ -d "${mrms_day_dir_raw}" ]]; then @@ -783,7 +783,7 @@ echo "vhh_p1h_noZero = ${vhh_p1h_noZero}" # before performing the directory existence check maxwait=30 sleep_duration_secs=$((RANDOM % maxwait)) - echo "Sleeping for $duration seconds..." + echo "Sleeping for ${sleep_duration_secs} seconds..." sleep "${sleep_duration_secs}s" if [[ -d "${ndas_day_dir_raw}" ]]; then From f9af954a305debedfd4305f62e8b10902dbb95e5 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 15 Aug 2024 10:39:29 -0600 Subject: [PATCH 030/260] Minor typo fix. --- jobs/JREGIONAL_GET_VERIF_OBS | 1 - 1 file changed, 1 deletion(-) diff --git a/jobs/JREGIONAL_GET_VERIF_OBS b/jobs/JREGIONAL_GET_VERIF_OBS index 3820a739db..65377ddde2 100755 --- a/jobs/JREGIONAL_GET_VERIF_OBS +++ b/jobs/JREGIONAL_GET_VERIF_OBS @@ -55,7 +55,6 @@ This is the J-job script for the task that checks, pulls, and stages observation data for verification purposes. ========================================================================" -# # #----------------------------------------------------------------------- # From f81cd1cab8922930b2d908bb6c141af8cdf10318 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 15 Aug 2024 10:49:21 -0600 Subject: [PATCH 031/260] Add workflow configuration options for whether or not to remove raw obs files. --- parm/wflow/verify_pre.yaml | 4 ++++ ush/config_defaults.yaml | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index c99bd4b4e6..2357c6bc5e 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -29,6 +29,7 @@ task_get_obs_ccpa: ACCUM_HH: '01' OBS_DIR: '&CCPA_OBS_DIR;' OBTYPE: 'CCPA' + REMOVE_RAW_OBS_DIRS: '{{ platform.REMOVE_RAW_OBS_DIRS_CCPA }}' FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' @@ -42,6 +43,7 @@ task_get_obs_nohrsc: <<: *default_vars OBS_DIR: '&NOHRSC_OBS_DIR;' OBTYPE: 'NOHRSC' + REMOVE_RAW_OBS_DIRS: '{{ platform.REMOVE_RAW_OBS_DIRS_NOHRSC }}' FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' @@ -56,6 +58,7 @@ task_get_obs_mrms: OBS_DIR: '&MRMS_OBS_DIR;' OBTYPE: 'MRMS' VAR: 'REFC RETOP' + REMOVE_RAW_OBS_DIRS: '{{ platform.REMOVE_RAW_OBS_DIRS_MRMS }}' FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' @@ -69,6 +72,7 @@ task_get_obs_ndas: <<: *default_vars OBS_DIR: '&NDAS_OBS_DIR;' OBTYPE: 'NDAS' + REMOVE_RAW_OBS_DIRS: '{{ platform.REMOVE_RAW_OBS_DIRS_NDAS }}' FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' queue: "&QUEUE_HPSS;" native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index c9c0fc7cb8..ceccd71277 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -324,6 +324,24 @@ platform: # #----------------------------------------------------------------------- # + # REMOVE_RAW_OBS_DIRS_[CCPA|MRMS|NDAS|NOHRSC]: + # Boolean flag specifying whether to remove the "raw" observation + # directories after pulling the specified type of obs (CCPA, MRMS, + # NDAS, or NOHRSC). The raw directories are the ones in which the + # observation files are placed immediately after pulling them from + # a data store (e.g. NOAA's HPSS) but before performing any processing + # on them (e.g. renaming the files or reorganizing their directory + # structure). + # + #----------------------------------------------------------------------- + # + REMOVE_RAW_OBS_DIRS_CCPA: true + REMOVE_RAW_OBS_DIRS_MRMS: true + REMOVE_RAW_OBS_DIRS_NDAS: true + REMOVE_RAW_OBS_DIRS_NOHRSC: true + # + #----------------------------------------------------------------------- + # # DOMAIN_PREGEN_BASEDIR: # The base directory containing pregenerated grid, orography, and surface # climatology files. This is an alternative for setting GRID_DIR, From cd195542ec7e64f22159656f8ee16c7c3457deea Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Fri, 14 Jun 2024 20:10:35 +0000 Subject: [PATCH 032/260] First round of changes before testing, only including obs pulling tasks so far --- parm/data_locations.yml | 34 +++++++ parm/metplus/vx_config_det.yaml | 6 ++ parm/wflow/verify_pre.yaml | 27 ++++++ scripts/exregional_get_verif_obs.sh | 137 ++++++++++++++++++++++++++-- ush/config_defaults.yaml | 58 +++++------- ush/set_vx_params.sh | 46 ++++++++++ ush/setup.py | 6 ++ ush/valid_param_vals.yaml | 2 +- 8 files changed, 270 insertions(+), 46 deletions(-) diff --git a/parm/data_locations.yml b/parm/data_locations.yml index 7901f4c085..3168061f50 100644 --- a/parm/data_locations.yml +++ b/parm/data_locations.yml @@ -375,3 +375,37 @@ NOHRSC_obs: - "sfav2_CONUS_*h_{yyyy}{mm}{dd}{hh}_grid184.grb2" archive_internal_dir: - ./wgrbbul/nohrsc_snowfall/ + +AERONET: + hpss: + protocol: htar + archive_format: tar + archive_path: + - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} + - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} + archive_file_names: + - "dcom_prod_{yyyy}{mm}{dd}.tar" + - "dcom_{yyyy}{mm}{dd}.tar" + file_names: + obs: + - "{yyyy}{mm}{dd}.lev15" + archive_internal_dir: + - ./airnow/ + +AIRNOW: + hpss: + protocol: htar + archive_format: tar + archive_path: + - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} + - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} + archive_file_names: + - "dcom_prod_{yyyy}{mm}{dd}.tar" + - "dcom_{yyyy}{mm}{dd}.tar" + file_names: + obs: + - "HourlyAQObs_{yyyy}{mm}{dd}*.dat" + - "Monitoring_Site_Locations_V2.dat" + archive_internal_dir: + - ./airnow/ + diff --git a/parm/metplus/vx_config_det.yaml b/parm/metplus/vx_config_det.yaml index 8ea3fd5e13..44a7fb2a2b 100644 --- a/parm/metplus/vx_config_det.yaml +++ b/parm/metplus/vx_config_det.yaml @@ -206,3 +206,9 @@ ADPUPA: L0: [] CAPE%%MLCAPE: L0-90%%L0: ['gt500', 'gt1000', 'gt1500', 'gt2000', 'gt3000', 'gt4000'] +AERONET: + AOTK%%AOD: + L0: [] +AIRNOW: + MASSDEN%%PM25: + R807|A1: [] diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 0d4e1c2448..b35af30752 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -21,6 +21,33 @@ default_task_verify_pre: &default_task_verify_pre queue: '&QUEUE_DEFAULT;' walltime: 00:30:00 +task_get_obs_aeronet: + <<: *default_task_verify_pre + command: '&LOAD_MODULES_RUN_TASK_FP; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' + envars: + <<: *default_vars + OBS_DIR: '&AERONET_OBS_DIR;' + OBTYPE: 'AERONET' + FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' + native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' + partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' + queue: "&QUEUE_HPSS;" + walltime: 00:45:00 + +task_get_obs_airnow: + <<: *default_task_verify_pre + command: '&LOAD_MODULES_RUN_TASK_FP; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' + envars: + <<: *default_vars + OBS_DIR: '&AIRNOW_OBS_DIR;' + OBTYPE: 'AIRNOW' + FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' + native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' + partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' + queue: "&QUEUE_HPSS;" + walltime: 00:45:00 + + task_get_obs_ccpa: <<: *default_task_verify_pre command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 6ad6aaed0e..0b439d356b 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -49,11 +49,10 @@ set -x # # This script performs several important tasks for preparing data for # verification tasks. Depending on the value of the environment variable -# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data +# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC|AERONET|AIRNOW), the script will prepare that particular data # set. # -# If data is not available on disk (in the location specified by -# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), +# If data is not available on disk (in the location specified by [OBTYPE]_OBS_DIR), # the script attempts to retrieve the data from HPSS using the retrieve_data.py # script. Depending on the data set, there are a few strange quirks and/or # bugs in the way data is organized; see in-line comments for details. @@ -67,6 +66,8 @@ set -x # # {CCPA_OBS_DIR}/{YYYYMMDD}/ccpa.t{HH}z.01h.hrap.conus.gb2 # +# This naming scheme can be changed by the config variable OBS_CCPA_APCP_FN_TEMPLATE +# # If data is retrieved from HPSS, it will automatically staged by this # this script. # @@ -91,8 +92,11 @@ set -x # {MRMS_OBS_DIR}/{YYYYMMDD}/[PREFIX]{YYYYMMDD}-{HH}0000.grib2, # # Where [PREFIX] is MergedReflectivityQCComposite_00.50_ for reflectivity -# data and EchoTop_18_00.50_ for echo top data. If data is not available -# at the top of the hour, you should rename the file closest in time to +# data and EchoTop_18_00.50_ for echo top data. This naming scheme can be +# changed by the config variables OBS_MRMS_REFC_FN_TEMPLATE and +# OBS_MRMS_RETOP_FN_TEMPLATE, respectively. + +# If data is not available at the top of the hour, you should rename the file closest in time to # your hour(s) of interest to the above naming format. A script # "ush/mrms_pull_topofhour.py" is provided for this purpose. # @@ -102,12 +106,14 @@ set -x # # NDAS (NAM Data Assimilation System) conventional observations # ---------- -# If data is available on disk, it must be in the following +# If data is available on disk, it should be in the following # directory structure and file name conventions expected by verification # tasks: # # {NDAS_OBS_DIR}/{YYYYMMDD}/prepbufr.ndas.{YYYYMMDDHH} # +# This naming scheme can be changed by the config variable OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE +# # Note that data retrieved from HPSS and other sources may be in a # different format: nam.t{hh}z.prepbufr.tm{prevhour}.nr, where hh is # either 00, 06, 12, or 18, and prevhour is the number of hours prior to @@ -118,9 +124,9 @@ set -x # this script. # # -# NOHRSC snow accumulation observations +# NOHRSC snow accumulation observations # ---------- -# If data is available on disk, it must be in the following +# If data is available on disk, it should be in the following # directory structure and file name conventions expected by verification # tasks: # @@ -128,11 +134,42 @@ set -x # # where AA is the 2-digit accumulation duration in hours: 06 or 24 # +# This naming scheme can be changed by the config variable OBS_NOHRSC_ASNOW_FN_TEMPLATE +# # METplus is configured to verify snowfall using 06- and 24-h accumulated # snowfall from 6- and 12-hourly NOHRSC files, respectively. # # If data is retrieved from HPSS, it will automatically staged by this # this script. +# +# +# AERONET optical depth observations +# ---------- +# If data is available on disk, it must be in the following +# directory structure and file name conventions expected by verification tasks: +# +# {AERONET_OBS_DIR}/{YYYYMMDD}/{YYYYMMDD}.lev15 +# +# If data is retrieved from HPSS, it will automatically staged by this +# this script. +# +# +# AIRNOW air quality observations +# ---------- +# If data is available on disk, it must be in the following +# directory structure and file name conventions expected by verification tasks: +# +# {AIRNOW_OBS_DIR}/{YYYYMMDD}/HourlyAQObs_{YYYYMMDDHH}.dat +# +# In addition to the raw observation files, For each day there is an additional +# required file that stores the locations of all observation stations: +# +# {AIRNOW_OBS_DIR}/{YYYYMMDD}/Monitoring_Site_Locations_V2.dat +# +# If data is retrieved from HPSS, it will automatically staged by this +# this script. +# +# #----------------------------------------------------------------------- # Create and enter top-level obs directory (so temporary data from HPSS won't collide with other tasks) @@ -554,9 +591,91 @@ while [[ ${current_fcst} -le ${fcst_length} ]]; do mv $nohrsc_proc/${vyyyymmdd}/sfav2_CONUS_6h_${vyyyymmdd}${vhh}_grid184.grb2 ${nohrsc06h_file} fi + # Retrieve AERONET observations + elif [[ ${OBTYPE} == "AERONET" ]]; then + + # Reorganized AERONET location (no need for raw data dir) + aeronet_proc=${OBS_DIR} + + # Check if file exists on disk; if not, pull it. + aeronet_file="$aeronet_proc/${vyyyymmdd}/${vyyyymmdd}.lev15" + if [[ -f "${aeronet_file}" ]]; then + echo "${OBTYPE} file exists on disk:" + echo "${aeronet_file}" + else + echo "${OBTYPE} file does not exist on disk:" + echo "${aeronet_file}" + echo "Will attempt to retrieve from remote locations" + + + # Pull AERONET data from HPSS + cmd=" + python3 -u ${USHdir}/retrieve_data.py \ + --debug \ + --file_set obs \ + --config ${PARMdir}/data_locations.yml \ + --cycle_date ${vyyyymmdd}${vhh} \ + --data_stores hpss \ + --data_type AERONET \ + --output_path $aeronet_proc/${vyyyymmdd} \ + --summary_file ${logfile}" + + echo "CALLING: ${cmd}" + + $cmd || print_err_msg_exit "\ + Could not retrieve AERONET data from HPSS + + The following command exited with a non-zero exit status: + ${cmd} +" + + fi + + # Retrieve AIRNOW observations + elif [[ ${OBTYPE} == "AIRNOW" ]]; then + + # Reorganized AIRNOW location (no need for raw data dir) + airnow_proc=${OBS_DIR} + + # Check if file exists on disk; if not, pull it. + airnow_file="$airnow_proc/${vyyyymmdd}/HourlyAQObs_{vyyyymmdd}${vhh}.dat" + if [[ -f "${airnow_file}" ]]; then + echo "${OBTYPE} file exists on disk:" + echo "${airnow_file}" + else + echo "${OBTYPE} file does not exist on disk:" + echo "${airnow_file}" + echo "Will attempt to retrieve from remote locations" + + + # Pull AIRNOW data from HPSS + cmd=" + python3 -u ${USHdir}/retrieve_data.py \ + --debug \ + --file_set obs \ + --config ${PARMdir}/data_locations.yml \ + --cycle_date ${vyyyymmdd}${vhh} \ + --data_stores hpss \ + --data_type AIRNOW \ + --output_path $airnow_proc/${vyyyymmdd} \ + --summary_file ${logfile}" + + echo "CALLING: ${cmd}" + + $cmd || print_err_msg_exit "\ + Could not retrieve AIRNOW data from HPSS + + The following command exited with a non-zero exit status: + ${cmd} +" + + + + fi + else print_err_msg_exit "\ - Invalid OBTYPE specified for script; valid options are CCPA, MRMS, NDAS, and NOHRSC + Invalid OBTYPE specified for script; valid options are CCPA, MRMS, NDAS, NOHRSC, AERONET, AIRNOW " fi # Increment to next forecast hour # Increment to next forecast hour diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 90651c1b7f..454ac5fc84 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -278,42 +278,26 @@ platform: # # Set METplus parameters. Definitions: # - # CCPA_OBS_DIR: - # User-specified location of the directory where CCPA hourly - # precipitation files used by METplus are located (or, if - # retrieved by the workflow, where they will be placed). See comments - # in file scripts/exregional_get_verif_obs.sh for more details about - # files and directory structure, as well as important caveats about - # errors in the metadata and file names. - # NOTE: Do not set this to the same path as other *_OBS_DIR variables; - # otherwise unexpected results and data loss may occur. - # - # NOHRSC_OBS_DIR: - # User-specified location of top-level directory where NOHRSC 6- and - # 24-hour snowfall accumulation files used by METplus are located (or, - # if retrieved by the workflow, where they will be placed). See comments - # in file scripts/exregional_get_verif_obs.sh for more details about - # files and directory structure - # NOTE: Do not set this to the same path as other *_OBS_DIR variables; - # otherwise unexpected results and data loss may occur. - # - # MRMS_OBS_DIR: - # User-specified location of the directory where MRMS composite - # reflectivity and echo top files used by METplus are located (or, if - # retrieved by the workflow, where they will be placed). See comments - # in the scripts/exregional_get_verif_obs.sh for more details about - # files and directory structure. - # NOTE: Do not set this to the same path as other *_OBS_DIR variables; - # otherwise unexpected results and data loss may occur. - # - # NDAS_OBS_DIR: - # User-specified location of top-level directory where NDAS prepbufr - # files used by METplus are located (or, if retrieved by the workflow, - # where they will be placed). See comments in file - # scripts/exregional_get_verif_obs.sh for more details about files - # and directory structure. - # NOTE: Do not set this to the same path as other *_OBS_DIR variables; - # otherwise unexpected results and data loss may occur. + # *OBS_DIR: + # User-specified location of the directory where observation files of a + # specific type for use with METplus verification are located (or, if + # retrieved by the workflow, where they will be placed). See + # comments in file scripts/exregional_get_verif_obs.sh for more details + # about files and directory structure, as well as important caveats about + # some observation types, including known errors in staged observations. + # + # NOTE: It is recommended that you set directories for different obs + # types to different locations; otherwise unexpected results including + # data loss may occur. + # + # The variables for currently supported observation directories are: + # CCPA_OBS_DIR: CCPA hourly precipitation + # NOHRSC_OBS_DIR: NOHRSC 6- and 24-hour snowfall accumulation + # MRMS_OBS_DIR: MRMS composite reflectivity and echo top + # NDAS_OBS_DIR: NDAS prepbufr + # AERONET_OBS_DIR: AERONET aerosol optical depth + # AIRNOW_OBS_DIR: AIRNOW air quality + # # #----------------------------------------------------------------------- # @@ -321,6 +305,8 @@ platform: NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc/proc" MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms/proc" NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas/proc" + AERONET_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/aeronet/proc" + AIRNOW_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/airnow/proc" # #----------------------------------------------------------------------- # diff --git a/ush/set_vx_params.sh b/ush/set_vx_params.sh index 9b67e36d22..ef8a6dc8cd 100644 --- a/ush/set_vx_params.sh +++ b/ush/set_vx_params.sh @@ -230,6 +230,52 @@ this observation type (obtype) and field (field) combination: *) print_err_msg_exit "\ A method for setting verification parameters has not been specified for +this observation type (obtype) and field (field) combination: + obtype = \"${obtype}\" + field = \"${field}\"" + ;; + + esac + ;; + + "AERONET") + + _grid_or_point_="point" + case "${field}" in + + "AOD") + fieldname_in_obs_input="${field}" + fieldname_in_fcst_input="AOTK" + fieldname_in_MET_output="${field}" + fieldname_in_MET_filedir_names="${field}" + ;; + + *) + print_err_msg_exit "\ +A method for setting verification parameters has not been specified for +this observation type (obtype) and field (field) combination: + obtype = \"${obtype}\" + field = \"${field}\"" + ;; + + esac + ;; + + "AIRNOW") + + _grid_or_point_="point" + case "${field}" in + + "PM25") + fieldname_in_obs_input="${field}" + fieldname_in_fcst_input="MASSDEN" + fieldname_in_MET_output="${field}" + fieldname_in_MET_filedir_names="${field}" + ;; + + *) + print_err_msg_exit "\ +A method for setting verification parameters has not been specified for this observation type (obtype) and field (field) combination: obtype = \"${obtype}\" field = \"${field}\"" diff --git a/ush/setup.py b/ush/setup.py index 51d5b2a084..25ab947015 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -575,6 +575,12 @@ def remove_tag(tasks, tag): "metatask_GenEnsProd_EnsembleStat_NDAS", "metatask_PointStat_NDAS_ensmeanprob"] + vx_fields_all["AERONET"] = ["AOD"] + vx_metatasks_all["AERONET"] = ["task_get_obs_aeronet"] + + vx_fields_all["AIRNOW"] = ["PM25"] + vx_metatasks_all["AIRNOW"] = ["task_get_obs_airnow"] + # Get the vx fields specified in the experiment configuration. vx_fields_config = expt_config["verification"]["VX_FIELDS"] diff --git a/ush/valid_param_vals.yaml b/ush/valid_param_vals.yaml index 3530b51ae9..725d604267 100644 --- a/ush/valid_param_vals.yaml +++ b/ush/valid_param_vals.yaml @@ -76,6 +76,6 @@ valid_vals_DO_AQM_CHEM_LBCS: [True, False] valid_vals_DO_AQM_GEFS_LBCS: [True, False] valid_vals_DO_AQM_SAVE_AIRNOW_HIST: [True, False] valid_vals_COLDSTART: [True, False] -valid_vals_VX_FIELDS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] +valid_vals_VX_FIELDS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA", "AERONET", "AIRNOW" ] valid_vals_VX_APCP_ACCUMS_HRS: [ 1, 3, 6, 24 ] valid_vals_VX_ASNOW_ACCUMS_HRS: [ 6, 24 ] From 03d11c07be5b5c75b30f589b5a2996653f6f6a0d Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Thu, 21 Mar 2024 19:42:17 +0000 Subject: [PATCH 033/260] Way more changes than I *wanted* to make, but these are long overdue: - There are lots of task-specific checks that always run regardless of task inclusion: add some checks there so that we don't have to include unnecessary variables like PREDEF_GRID_NAME in vx-only experiments - There were a few task-specific checks that DO check for task inclusion, but the checks were broken: fix those - Move dict_find from an inline function in setup.py to a proper external python function --- ush/python_utils/__init__.py | 2 +- ush/python_utils/misc.py | 24 +++++++++++++++ ush/setup.py | 58 +++++++++++++++++------------------- 3 files changed, 52 insertions(+), 32 deletions(-) diff --git a/ush/python_utils/__init__.py b/ush/python_utils/__init__.py index dabd8b3f08..90f61690e6 100644 --- a/ush/python_utils/__init__.py +++ b/ush/python_utils/__init__.py @@ -1,4 +1,4 @@ -from .misc import uppercase, lowercase, find_pattern_in_str, find_pattern_in_file +from .misc import uppercase, lowercase, find_pattern_in_str, find_pattern_in_file, dict_find from .check_for_preexist_dir_file import check_for_preexist_dir_file from .check_var_valid_value import check_var_valid_value from .create_symlink_to_file import create_symlink_to_file diff --git a/ush/python_utils/misc.py b/ush/python_utils/misc.py index e5e320ae43..38e387f565 100644 --- a/ush/python_utils/misc.py +++ b/ush/python_utils/misc.py @@ -57,3 +57,27 @@ def find_pattern_in_file(pattern, file_name): for match in re.finditer(pattern, line): return match.groups() return None + + +def dict_find(user_dict, substring): + """Find any keys in a dictionary that contain the provided substring + + Args: + user_dict: dictionary to search + substring: substring to search keys for + Return: + True if substring found, otherwise False + """ + + if not isinstance(user_dict, dict): + return False + + for key, value in user_dict.items(): + if substring in key: + return True + if isinstance(value, dict): + if dict_find(value, substring): + return True + + return False + diff --git a/ush/setup.py b/ush/setup.py index 25ab947015..980b2c458c 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -21,6 +21,7 @@ check_var_valid_value, lowercase, uppercase, + dict_find, list_to_str, check_for_preexist_dir_file, flatten_dict, @@ -482,12 +483,25 @@ def setup(USHdir, user_config_fn="config.yaml", debug: bool = False): # ----------------------------------------------------------------------- # + # Before setting task flags, ensure we don't have any invalid rocoto tasks + # (e.g. metatasks with no tasks, tasks with no associated commands) + clean_rocoto_dict(expt_config["rocoto"]["tasks"]) + rocoto_config = expt_config.get('rocoto', {}) rocoto_tasks = rocoto_config.get("tasks") run_make_grid = rocoto_tasks.get('task_make_grid') is not None run_make_orog = rocoto_tasks.get('task_make_orog') is not None run_make_sfc_climo = rocoto_tasks.get('task_make_sfc_climo') is not None + # Also set some flags that will be needed later + run_make_ics = dict_find(rocoto_tasks, "task_make_ics") + run_make_lbcs = dict_find(rocoto_tasks, "task_make_lbcs") + run_run_fcst = dict_find(rocoto_tasks, "task_run_fcst") + run_any_coldstart_task = run_make_ics or \ + run_make_lbcs or \ + run_run_fcst + run_run_post = dict_find(rocoto_tasks, "task_run_post") + # Necessary tasks are turned on pregen_basedir = expt_config["platform"].get("DOMAIN_PREGEN_BASEDIR") if pregen_basedir is None and not ( @@ -838,6 +852,7 @@ def get_location(xcs, fmt, expt_cfg): # # ----------------------------------------------------------------------- # + grid_gen_method = workflow_config["GRID_GEN_METHOD"] if grid_gen_method == "GFDLgrid": grid_params = set_gridparams_GFDLgrid( @@ -854,6 +869,7 @@ def get_location(xcs, fmt, expt_cfg): nh4=expt_config["constants"]["NH4"], run_envir=run_envir, ) + expt_config["grid_params"] = grid_params elif grid_gen_method == "ESGgrid": grid_params = set_gridparams_ESGgrid( lon_ctr=grid_config["ESGgrid_LON_CTR"], @@ -866,8 +882,10 @@ def get_location(xcs, fmt, expt_cfg): dely=grid_config["ESGgrid_DELY"], constants=expt_config["constants"], ) + expt_config["grid_params"] = grid_params + elif not run_any_coldstart_task: + log_info("No coldstart tasks specified, not setting grid parameters") else: - errmsg = dedent( f""" Valid values of GRID_GEN_METHOD are GFDLgrid and ESGgrid. @@ -877,9 +895,6 @@ def get_location(xcs, fmt, expt_cfg): ) raise KeyError(errmsg) from None - # Add a grid parameter section to the experiment config - expt_config["grid_params"] = grid_params - # Check to make sure that mandatory forecast variables are set. vlist = [ "DT_ATMOS", @@ -887,9 +902,10 @@ def get_location(xcs, fmt, expt_cfg): "LAYOUT_Y", "BLOCKSIZE", ] - for val in vlist: - if not fcst_config.get(val): - raise Exception(f"\nMandatory variable '{val}' has not been set\n") + if run_any_coldstart_task: + for val in vlist: + if not fcst_config.get(val): + raise Exception(f"\nMandatory variable '{val}' has not been set\n") # # ----------------------------------------------------------------------- @@ -1143,7 +1159,7 @@ def get_location(xcs, fmt, expt_cfg): post_output_domain_name = post_config.get("POST_OUTPUT_DOMAIN_NAME") if not post_output_domain_name: - if not predef_grid_name: + if not predef_grid_name and run_run_post: raise Exception( f""" The domain name used in naming the run_post output files @@ -1151,7 +1167,7 @@ def get_location(xcs, fmt, expt_cfg): POST_OUTPUT_DOMAIN_NAME = \"{post_output_domain_name}\" If this experiment is not using a predefined grid (i.e. if PREDEF_GRID_NAME is set to a null string), POST_OUTPUT_DOMAIN_NAME - must be set in the configuration file (\"{user_config}\"). """ + must be set in the configuration file (\"{user_config_fn}\"). """ ) post_output_domain_name = predef_grid_name @@ -1293,26 +1309,6 @@ def get_location(xcs, fmt, expt_cfg): # the same resolution input. # - def dict_find(user_dict, substring): - - if not isinstance(user_dict, dict): - return False - - for key, value in user_dict.items(): - if substring in key: - return True - if isinstance(value, dict): - if dict_find(value, substring): - return True - - return False - - run_make_ics = dict_find(rocoto_tasks, "task_make_ics") - run_make_lbcs = dict_find(rocoto_tasks, "task_make_lbcs") - run_run_fcst = dict_find(rocoto_tasks, "task_run_fcst") - run_any_coldstart_task = run_make_ics or \ - run_make_lbcs or \ - run_run_fcst # Flags for creating symlinks to pre-generated grid, orography, and sfc_climo files. # These consider dependencies of other tasks on each pre-processing task. create_symlinks_to_pregen_files = { @@ -1330,10 +1326,10 @@ def dict_find(user_dict, substring): res_in_fixlam_filenames = None for prep_task in prep_tasks: res_in_fns = "" - sect_key = f"task_make_{prep_task.lower()}" # If the user doesn't want to run the given task, link the fix # file from the staged files. - if not task_defs.get(sect_key): + if create_symlinks_to_pregen_files[prep_task]: + sect_key = f"task_make_{prep_task.lower()}" dir_key = f"{prep_task}_DIR" task_dir = expt_config[sect_key].get(dir_key) From 1c0b7214619bd014bb671c7824495cac92a9b7e2 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Fri, 22 Mar 2024 02:44:16 +0000 Subject: [PATCH 034/260] Some major cleanup to generate_FV3LAM_wflow.py to allow for more task-dependent logic checks - Break out all FV3 namelist logic out into a new function, setup_fv3_namelist - Only call this new function if the run_fcst task is active - Delay exporting of variables further down the page (need to completely eliminated this eventually) - Replace some *_vrfy commands with their proper versions - Eliminate some unnecessary variables and block comments --- ush/generate_FV3LAM_wflow.py | 588 +++++++++++++++++------------------ 1 file changed, 293 insertions(+), 295 deletions(-) diff --git a/ush/generate_FV3LAM_wflow.py b/ush/generate_FV3LAM_wflow.py index c671a69da8..c203bf4c68 100755 --- a/ush/generate_FV3LAM_wflow.py +++ b/ush/generate_FV3LAM_wflow.py @@ -24,13 +24,13 @@ import_vars, export_vars, cp_vrfy, - ln_vrfy, mkdir_vrfy, mv_vrfy, check_for_preexist_dir_file, cfg_to_yaml_str, find_pattern_in_str, flatten_dict, + dict_find, ) from setup import setup @@ -165,15 +165,8 @@ def generate_FV3LAM_wflow( # # ----------------------------------------------------------------------- # - # From here on out, going back to setting variables for everything - # in the flattened expt_config dictionary - # TODO: Reference all these variables in their respective - # dictionaries, instead. - import_vars(dictionary=flatten_dict(expt_config)) - export_vars(source_dict=flatten_dict(expt_config)) - # pylint: disable=undefined-variable - if USE_CRON_TO_RELAUNCH: + if expt_config["workflow"]["USE_CRON_TO_RELAUNCH"]: add_crontab_line(called_from_cron=False,machine=expt_config["user"]["MACHINE"], crontab_line=expt_config["workflow"]["CRONTAB_LINE"], exptdir=exptdir,debug=debug) @@ -181,34 +174,36 @@ def generate_FV3LAM_wflow( # # Copy or symlink fix files # - if SYMLINK_FIX_FILES: + fixgsm = expt_config["platform"]["FIXgsm"] + fixam = expt_config["workflow"]["FIXam"] + if expt_config["workflow"]["SYMLINK_FIX_FILES"]: log_info( f""" Symlinking fixed files from system directory (FIXgsm) to a subdirectory (FIXam): - FIXgsm = '{FIXgsm}' - FIXam = '{FIXam}'""", + FIXgsm = '{fixgsm}' + FIXam = '{fixam}'""", verbose=debug, ) - ln_vrfy(f"""-fsn '{FIXgsm}' '{FIXam}'""") + create_symlink_to_file(fixgsm,fixam,False) else: log_info( f""" Copying fixed files from system directory (FIXgsm) to a subdirectory (FIXam): - FIXgsm = '{FIXgsm}' - FIXam = '{FIXam}'""", + FIXgsm = '{fixgsm}' + FIXam = '{fixam}'""", verbose=debug, ) - check_for_preexist_dir_file(FIXam, "delete") - mkdir_vrfy("-p", FIXam) - mkdir_vrfy("-p", os.path.join(FIXam, "fix_co2_proj")) + check_for_preexist_dir_file(fixam, "delete") + mkdir_vrfy("-p", fixam) + mkdir_vrfy("-p", os.path.join(fixam, "fix_co2_proj")) - num_files = len(FIXgsm_FILES_TO_COPY_TO_FIXam) + num_files = len(expt_config["fixed_files"]["FIXgsm_FILES_TO_COPY_TO_FIXam"]) for i in range(num_files): - fn = f"{FIXgsm_FILES_TO_COPY_TO_FIXam[i]}" - cp_vrfy(os.path.join(FIXgsm, fn), os.path.join(FIXam, fn)) + fn = f"{expt_config['fixed_files']['FIXgsm_FILES_TO_COPY_TO_FIXam'][i]}" + cp_vrfy(os.path.join(fixgsm, fn), os.path.join(fixam, fn)) # # ----------------------------------------------------------------------- # @@ -216,26 +211,30 @@ def generate_FV3LAM_wflow( # # ----------------------------------------------------------------------- # - if USE_MERRA_CLIMO: + fixaer = expt_config["platform"]["FIXaer"] + fixlut = expt_config["platform"]["FIXlut"] + fixclim = expt_config["workflow"]["FIXclim"] + + if expt_config["task_run_fcst"]["USE_MERRA_CLIMO"]: log_info( f""" Copying MERRA2 aerosol climatology data files from system directory (FIXaer/FIXlut) to a subdirectory (FIXclim) in the experiment directory: - FIXaer = '{FIXaer}' - FIXlut = '{FIXlut}' - FIXclim = '{FIXclim}'""", + FIXaer = '{fixaer}' + FIXlut = '{fixlut}' + FIXclim = '{fixclim}'""", verbose=debug, ) - check_for_preexist_dir_file(FIXclim, "delete") - mkdir_vrfy("-p", FIXclim) + check_for_preexist_dir_file(fixclim, "delete") + mkdir_vrfy("-p", fixclim) - if SYMLINK_FIX_FILES: - ln_vrfy("-fsn", os.path.join(FIXaer, "merra2.aerclim*.nc"), FIXclim) - ln_vrfy("-fsn", os.path.join(FIXlut, "optics*.dat"), FIXclim) + if expt_config["workflow"]["SYMLINK_FIX_FILES"]: + create_symlink_to_file(os.path.join(fixaer, "merra2.aerclim*.nc"), fixclim) + create_symlink_to_file(os.path.join(fixlut, "optics*.dat"), fixclim) else: - cp_vrfy(os.path.join(FIXaer, "merra2.aerclim*.nc"), FIXclim) - cp_vrfy(os.path.join(FIXlut, "optics*.dat"), FIXclim) + cp_vrfy(os.path.join(fixaer, "merra2.aerclim*.nc"), fixclim) + cp_vrfy(os.path.join(fixlut, "optics*.dat"), fixclim) # # ----------------------------------------------------------------------- # @@ -243,58 +242,277 @@ def generate_FV3LAM_wflow( # # ----------------------------------------------------------------------- # - log_info( - """ - Copying templates of various input files to the experiment directory...""", - verbose=debug, - ) + if dict_find(expt_config["rocoto"]["tasks"], "task_run_fcst"): + log_info( + """ + Copying templates of various input files to the experiment directory...""", + verbose=debug, + ) - log_info( - """ - Copying the template data table file to the experiment directory...""", - verbose=debug, - ) - cp_vrfy(DATA_TABLE_TMPL_FP, DATA_TABLE_FP) + log_info( + """ + Copying the template data table file to the experiment directory...""", + verbose=debug, + ) + cp_vrfy(expt_config["workflow"]["DATA_TABLE_TMPL_FP"], expt_config["workflow"]["DATA_TABLE_FP"]) - log_info( - """ - Copying the template field table file to the experiment directory...""", - verbose=debug, - ) - cp_vrfy(FIELD_TABLE_TMPL_FP, FIELD_TABLE_FP) + log_info( + """ + Copying the template field table file to the experiment directory...""", + verbose=debug, + ) + cp_vrfy(expt_config["workflow"]["FIELD_TABLE_TMPL_FP"], expt_config["workflow"]["FIELD_TABLE_FP"]) + + # + # Copy the CCPP physics suite definition file from its location in the + # clone of the FV3 code repository to the experiment directory (EXPT- + # DIR). + # + log_info( + """ + Copying the CCPP physics suite definition XML file from its location in + the forecast model directory structure to the experiment directory...""", + verbose=debug, + ) + cp_vrfy(expt_config["workflow"]["CCPP_PHYS_SUITE_IN_CCPP_FP"], expt_config["workflow"]["CCPP_PHYS_SUITE_FP"]) + # + # Copy the field dictionary file from its location in the + # clone of the FV3 code repository to the experiment directory (EXPT- + # DIR). + # + log_info( + """ + Copying the field dictionary file from its location in the + forecast model directory structure to the experiment + directory...""", + verbose=debug, + ) + cp_vrfy(expt_config["workflow"]["FIELD_DICT_IN_UWM_FP"], expt_config["workflow"]["FIELD_DICT_FP"]) + + # + # ----------------------------------------------------------------------- + # + # Call function to write the FV3 namelist + # + # ----------------------------------------------------------------------- + # + setup_fv3_namelist(expt_config,debug) # - # Copy the CCPP physics suite definition file from its location in the - # clone of the FV3 code repository to the experiment directory (EXPT- - # DIR). + # If not running the TN_MAKE_GRID task (which implies the workflow will + # use pregenerated grid files), set the namelist variables specifying + # the paths to surface climatology files. These files are located in + # (or have symlinks that point to them) in the FIXlam directory. # - log_info( - """ - Copying the CCPP physics suite definition XML file from its location in - the forecast model directory structure to the experiment directory...""", - verbose=debug, - ) - cp_vrfy(CCPP_PHYS_SUITE_IN_CCPP_FP, CCPP_PHYS_SUITE_FP) + # Note that if running the TN_MAKE_GRID task, this action usually cannot + # be performed here but must be performed in that task because the names + # of the surface climatology files depend on the CRES parameter (which is + # the C-resolution of the grid), and this parameter is in most workflow + # configurations is not known until the grid is created. # - # Copy the field dictionary file from its location in the - # clone of the FV3 code repository to the experiment directory (EXPT- - # DIR). + if not expt_config['rocoto']['tasks'].get('task_make_grid') and dict_find(expt_config["rocoto"]["tasks"], "task_run_fcst"): + set_fv3nml_sfc_climo_filenames(flatten_dict(expt_config), debug) + # - log_info( - """ - Copying the field dictionary file from its location in the - forecast model directory structure to the experiment - directory...""", - verbose=debug, - ) - cp_vrfy(FIELD_DICT_IN_UWM_FP, FIELD_DICT_FP) + # ----------------------------------------------------------------------- + # + # Add the relevant tendency-based stochastic physics namelist variables to + # "settings" when running with SPPT, SHUM, or SKEB turned on. If running + # with SPP or LSM SPP, set the "new_lscale" variable. Otherwise only + # include an empty "nam_stochy" stanza. # # ----------------------------------------------------------------------- # - # Set parameters in the FV3-LAM namelist file. + # From here on out, going back to setting variables for everything + # in the flattened expt_config dictionary + # TODO: Reference all these variables in their respective + # dictionaries, instead. + import_vars(dictionary=flatten_dict(expt_config)) + export_vars(source_dict=flatten_dict(expt_config)) + settings = {} + settings["gfs_physics_nml"] = { + "do_shum": DO_SHUM, + "do_sppt": DO_SPPT, + "do_skeb": DO_SKEB, + "do_spp": DO_SPP, + "n_var_spp": N_VAR_SPP, + "n_var_lndp": N_VAR_LNDP, + "lndp_type": LNDP_TYPE, + "fhcyc": FHCYC_LSM_SPP_OR_NOT, + } + nam_stochy_dict = {} + if DO_SPPT: + nam_stochy_dict.update( + { + "iseed_sppt": ISEED_SPPT, + "new_lscale": NEW_LSCALE, + "sppt": SPPT_MAG, + "sppt_logit": SPPT_LOGIT, + "sppt_lscale": SPPT_LSCALE, + "sppt_sfclimit": SPPT_SFCLIMIT, + "sppt_tau": SPPT_TSCALE, + "spptint": SPPT_INT, + "use_zmtnblck": USE_ZMTNBLCK, + } + ) + + if DO_SHUM: + nam_stochy_dict.update( + { + "iseed_shum": ISEED_SHUM, + "new_lscale": NEW_LSCALE, + "shum": SHUM_MAG, + "shum_lscale": SHUM_LSCALE, + "shum_tau": SHUM_TSCALE, + "shumint": SHUM_INT, + } + ) + + if DO_SKEB: + nam_stochy_dict.update( + { + "iseed_skeb": ISEED_SKEB, + "new_lscale": NEW_LSCALE, + "skeb": SKEB_MAG, + "skeb_lscale": SKEB_LSCALE, + "skebnorm": SKEBNORM, + "skeb_tau": SKEB_TSCALE, + "skebint": SKEB_INT, + "skeb_vdof": SKEB_VDOF, + } + ) + + if DO_SPP or DO_LSM_SPP: + nam_stochy_dict.update({"new_lscale": NEW_LSCALE}) + + settings["nam_stochy"] = nam_stochy_dict + # + # Add the relevant SPP namelist variables to "settings" when running with + # SPP turned on. Otherwise only include an empty "nam_sppperts" stanza. + # + nam_sppperts_dict = {} + if DO_SPP: + nam_sppperts_dict = { + "iseed_spp": ISEED_SPP, + "spp_lscale": SPP_LSCALE, + "spp_prt_list": SPP_MAG_LIST, + "spp_sigtop1": SPP_SIGTOP1, + "spp_sigtop2": SPP_SIGTOP2, + "spp_stddev_cutoff": SPP_STDDEV_CUTOFF, + "spp_tau": SPP_TSCALE, + "spp_var_list": SPP_VAR_LIST, + } + + settings["nam_sppperts"] = nam_sppperts_dict + # + # Add the relevant LSM SPP namelist variables to "settings" when running with + # LSM SPP turned on. + # + nam_sfcperts_dict = {} + if DO_LSM_SPP: + nam_sfcperts_dict = { + "lndp_type": LNDP_TYPE, + "lndp_model_type": LNDP_MODEL_TYPE, + "lndp_tau": LSM_SPP_TSCALE, + "lndp_lscale": LSM_SPP_LSCALE, + "iseed_lndp": ISEED_LSM_SPP, + "lndp_var_list": LSM_SPP_VAR_LIST, + "lndp_prt_list": LSM_SPP_MAG_LIST, + } + + settings["nam_sfcperts"] = nam_sfcperts_dict + + settings_str = cfg_to_yaml_str(settings) + # + #----------------------------------------------------------------------- + # + # Generate namelist files with stochastic physics if needed + # + #----------------------------------------------------------------------- + # + if any((DO_SPP, DO_SPPT, DO_SHUM, DO_SKEB, DO_LSM_SPP)): + realize( + input_config=FV3_NML_FP, + input_format="nml", + output_file=FV3_NML_STOCH_FP, + output_format="nml", + update_config=get_nml_config(settings), + ) + + # + # ----------------------------------------------------------------------- + # + # To have a record of how this experiment/workflow was generated, copy + # the experiment/workflow configuration file to the experiment directo- + # ry. + # + # ----------------------------------------------------------------------- + # + cp_vrfy(os.path.join(ushdir, EXPT_CONFIG_FN), EXPTDIR) + + # + # ----------------------------------------------------------------------- + # + # For convenience, print out the commands that need to be issued on the + # command line in order to launch the workflow and to check its status. + # Also, print out the line that should be placed in the user's cron table + # in order for the workflow to be continually resubmitted. # # ----------------------------------------------------------------------- # + if WORKFLOW_MANAGER == "rocoto": + wflow_db_fn = f"{os.path.splitext(WFLOW_XML_FN)[0]}.db" + rocotorun_cmd = f"rocotorun -w {WFLOW_XML_FN} -d {wflow_db_fn} -v 10" + rocotostat_cmd = f"rocotostat -w {WFLOW_XML_FN} -d {wflow_db_fn} -v 10" + + # pylint: disable=line-too-long + log_info( + f""" + To launch the workflow, change location to the experiment directory + (EXPTDIR) and issue the rocotrun command, as follows: + + > cd {EXPTDIR} + > {rocotorun_cmd} + + To check on the status of the workflow, issue the rocotostat command + (also from the experiment directory): + + > {rocotostat_cmd} + + Note that: + + 1) The rocotorun command must be issued after the completion of each + task in the workflow in order for the workflow to submit the next + task(s) to the queue. + + 2) In order for the output of the rocotostat command to be up-to-date, + the rocotorun command must be issued immediately before issuing the + rocotostat command. + + For automatic resubmission of the workflow (say every {CRON_RELAUNCH_INTVL_MNTS} minutes), the + following line can be added to the user's crontab (use 'crontab -e' to + edit the cron table): + + */{CRON_RELAUNCH_INTVL_MNTS} * * * * cd {EXPTDIR} && ./launch_FV3LAM_wflow.sh called_from_cron="TRUE" + """ + ) + # pylint: enable=line-too-long + + # If we got to this point everything was successful: move the log + # file to the experiment directory. + mv_vrfy(logfile, EXPTDIR) + + return EXPTDIR + +def setup_fv3_namelist(expt_config,debug): + + # From here on out, going back to setting variables for everything + # in the flattened expt_config dictionary + # TODO: Reference all these variables in their respective + # dictionaries, instead. + import_vars(dictionary=flatten_dict(expt_config)) + export_vars(source_dict=flatten_dict(expt_config)) + log_info( f""" Setting parameters in weather model's namelist file (FV3_NML_FP): @@ -302,14 +520,6 @@ def generate_FV3LAM_wflow( verbose=debug, ) # - # Set npx and npy, which are just NX plus 1 and NY plus 1, respectively. - # These need to be set in the FV3-LAM Fortran namelist file. They represent - # the number of cell vertices in the x and y directions on the regional - # grid. - # - npx = NX + 1 - npy = NY + 1 - # # For the physics suites that use RUC LSM, set the parameter kice to 9, # Otherwise, leave it unspecified (which means it gets set to the default # value in the forecast model). @@ -317,7 +527,7 @@ def generate_FV3LAM_wflow( kice = None if SDF_USES_RUC_LSM: kice = 9 - # + # # Set lsoil, which is the number of input soil levels provided in the # chgres_cube output NetCDF file. This is the same as the parameter # nsoill_out in the namelist file for chgres_cube. [On the other hand, @@ -327,7 +537,7 @@ def generate_FV3LAM_wflow( # lsoil as the one used to set nsoill_out in exregional_make_ics.sh. # See that script for details. # - # NOTE: + # NOTE: # May want to remove lsoil from FV3.input.yml (and maybe input.nml.FV3). # Also, may want to set lsm here as well depending on SDF_USES_RUC_LSM. # @@ -343,17 +553,6 @@ def generate_FV3LAM_wflow( # passed to a python script that will in turn set the values of these # variables in the namelist file. # - # IMPORTANT: - # If we want a namelist variable to be removed from the namelist file, - # in the "settings" variable below, we need to set its value to the - # string "null". This is equivalent to setting its value to - # !!python/none - # in the base namelist file specified by FV3_NML_BASE_SUITE_FP or the - # suite-specific yaml settings file specified by FV3_NML_YAML_CONFIG_FP. - # - # It turns out that setting the variable to an empty string also works - # to remove it from the namelist! Which is better to use?? - # settings = {} settings["atmos_model_nml"] = { "blocksize": BLOCKSIZE, @@ -374,8 +573,8 @@ def generate_FV3LAM_wflow( # FV3 namelist file? # "stretch_fac": STRETCH_FAC, - "npx": npx, - "npy": npy, + "npx": NX + 1, + "npy": NY + 1, "layout": [LAYOUT_X, LAYOUT_Y], "bc_update_interval": LBC_SPEC_INTVL_HRS, }) @@ -417,7 +616,6 @@ def generate_FV3LAM_wflow( "lsoil": lsoil or None, "print_diff_pgr": PRINT_DIFF_PGR, }) - if CPL_AQM: gfs_physics_nml_dict.update({ "cplaqm": True, @@ -522,206 +720,6 @@ def generate_FV3LAM_wflow( if v is None: del base_namelist[sect][k] base_namelist.dump(FV3_NML_FP) - # - # If not running the TN_MAKE_GRID task (which implies the workflow will - # use pregenerated grid files), set the namelist variables specifying - # the paths to surface climatology files. These files are located in - # (or have symlinks that point to them) in the FIXlam directory. - # - # Note that if running the TN_MAKE_GRID task, this action usually cannot - # be performed here but must be performed in that task because the names - # of the surface climatology files depend on the CRES parameter (which is - # the C-resolution of the grid), and this parameter is in most workflow - # configurations is not known until the grid is created. - # - if not expt_config['rocoto']['tasks'].get('task_make_grid'): - - set_fv3nml_sfc_climo_filenames(flatten_dict(expt_config), debug) - - # - # ----------------------------------------------------------------------- - # - # Add the relevant tendency-based stochastic physics namelist variables to - # "settings" when running with SPPT, SHUM, or SKEB turned on. If running - # with SPP or LSM SPP, set the "new_lscale" variable. Otherwise only - # include an empty "nam_stochy" stanza. - # - # ----------------------------------------------------------------------- - # - settings = {} - settings["gfs_physics_nml"] = { - "do_shum": DO_SHUM, - "do_sppt": DO_SPPT, - "do_skeb": DO_SKEB, - "do_spp": DO_SPP, - "n_var_spp": N_VAR_SPP, - "n_var_lndp": N_VAR_LNDP, - "lndp_type": LNDP_TYPE, - "fhcyc": FHCYC_LSM_SPP_OR_NOT, - } - nam_stochy_dict = {} - if DO_SPPT: - nam_stochy_dict.update( - { - "iseed_sppt": ISEED_SPPT, - "new_lscale": NEW_LSCALE, - "sppt": SPPT_MAG, - "sppt_logit": SPPT_LOGIT, - "sppt_lscale": SPPT_LSCALE, - "sppt_sfclimit": SPPT_SFCLIMIT, - "sppt_tau": SPPT_TSCALE, - "spptint": SPPT_INT, - "use_zmtnblck": USE_ZMTNBLCK, - } - ) - - if DO_SHUM: - nam_stochy_dict.update( - { - "iseed_shum": ISEED_SHUM, - "new_lscale": NEW_LSCALE, - "shum": SHUM_MAG, - "shum_lscale": SHUM_LSCALE, - "shum_tau": SHUM_TSCALE, - "shumint": SHUM_INT, - } - ) - - if DO_SKEB: - nam_stochy_dict.update( - { - "iseed_skeb": ISEED_SKEB, - "new_lscale": NEW_LSCALE, - "skeb": SKEB_MAG, - "skeb_lscale": SKEB_LSCALE, - "skebnorm": SKEBNORM, - "skeb_tau": SKEB_TSCALE, - "skebint": SKEB_INT, - "skeb_vdof": SKEB_VDOF, - } - ) - - if DO_SPP or DO_LSM_SPP: - nam_stochy_dict.update({"new_lscale": NEW_LSCALE}) - - settings["nam_stochy"] = nam_stochy_dict - # - # Add the relevant SPP namelist variables to "settings" when running with - # SPP turned on. Otherwise only include an empty "nam_sppperts" stanza. - # - nam_sppperts_dict = {} - if DO_SPP: - nam_sppperts_dict = { - "iseed_spp": ISEED_SPP, - "spp_lscale": SPP_LSCALE, - "spp_prt_list": SPP_MAG_LIST, - "spp_sigtop1": SPP_SIGTOP1, - "spp_sigtop2": SPP_SIGTOP2, - "spp_stddev_cutoff": SPP_STDDEV_CUTOFF, - "spp_tau": SPP_TSCALE, - "spp_var_list": SPP_VAR_LIST, - } - - settings["nam_sppperts"] = nam_sppperts_dict - # - # Add the relevant LSM SPP namelist variables to "settings" when running with - # LSM SPP turned on. - # - nam_sfcperts_dict = {} - if DO_LSM_SPP: - nam_sfcperts_dict = { - "lndp_type": LNDP_TYPE, - "lndp_model_type": LNDP_MODEL_TYPE, - "lndp_tau": LSM_SPP_TSCALE, - "lndp_lscale": LSM_SPP_LSCALE, - "iseed_lndp": ISEED_LSM_SPP, - "lndp_var_list": LSM_SPP_VAR_LIST, - "lndp_prt_list": LSM_SPP_MAG_LIST, - } - - settings["nam_sfcperts"] = nam_sfcperts_dict - - settings_str = cfg_to_yaml_str(settings) - # - #----------------------------------------------------------------------- - # - # Generate namelist files with stochastic physics if needed - # - #----------------------------------------------------------------------- - # - if any((DO_SPP, DO_SPPT, DO_SHUM, DO_SKEB, DO_LSM_SPP)): - realize( - input_config=FV3_NML_FP, - input_format="nml", - output_file=FV3_NML_STOCH_FP, - output_format="nml", - update_config=get_nml_config(settings), - ) - - # - # ----------------------------------------------------------------------- - # - # To have a record of how this experiment/workflow was generated, copy - # the experiment/workflow configuration file to the experiment directo- - # ry. - # - # ----------------------------------------------------------------------- - # - cp_vrfy(os.path.join(ushdir, EXPT_CONFIG_FN), EXPTDIR) - - # - # ----------------------------------------------------------------------- - # - # For convenience, print out the commands that need to be issued on the - # command line in order to launch the workflow and to check its status. - # Also, print out the line that should be placed in the user's cron table - # in order for the workflow to be continually resubmitted. - # - # ----------------------------------------------------------------------- - # - if WORKFLOW_MANAGER == "rocoto": - wflow_db_fn = f"{os.path.splitext(WFLOW_XML_FN)[0]}.db" - rocotorun_cmd = f"rocotorun -w {WFLOW_XML_FN} -d {wflow_db_fn} -v 10" - rocotostat_cmd = f"rocotostat -w {WFLOW_XML_FN} -d {wflow_db_fn} -v 10" - - # pylint: disable=line-too-long - log_info( - f""" - To launch the workflow, change location to the experiment directory - (EXPTDIR) and issue the rocotrun command, as follows: - - > cd {EXPTDIR} - > {rocotorun_cmd} - - To check on the status of the workflow, issue the rocotostat command - (also from the experiment directory): - - > {rocotostat_cmd} - - Note that: - - 1) The rocotorun command must be issued after the completion of each - task in the workflow in order for the workflow to submit the next - task(s) to the queue. - - 2) In order for the output of the rocotostat command to be up-to-date, - the rocotorun command must be issued immediately before issuing the - rocotostat command. - - For automatic resubmission of the workflow (say every {CRON_RELAUNCH_INTVL_MNTS} minutes), the - following line can be added to the user's crontab (use 'crontab -e' to - edit the cron table): - - */{CRON_RELAUNCH_INTVL_MNTS} * * * * cd {EXPTDIR} && ./launch_FV3LAM_wflow.sh called_from_cron="TRUE" - """ - ) - # pylint: enable=line-too-long - - # If we got to this point everything was successful: move the log - # file to the experiment directory. - mv_vrfy(logfile, EXPTDIR) - - return EXPTDIR def setup_logging(logfile: str = "log.generate_FV3LAM_wflow", debug: bool = False) -> None: From 635565550e5c4275ee7d0f7726ce7bbfe7e2b079 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Fri, 22 Mar 2024 03:04:26 +0000 Subject: [PATCH 035/260] create_symlink_to_file() did not support wildcards. Now it does --- ush/python_utils/create_symlink_to_file.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/ush/python_utils/create_symlink_to_file.py b/ush/python_utils/create_symlink_to_file.py index 363a49fa40..ba6ce19147 100644 --- a/ush/python_utils/create_symlink_to_file.py +++ b/ush/python_utils/create_symlink_to_file.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import os +import glob from .print_input_args import print_input_args from .print_msg import print_err_msg_exit @@ -37,12 +38,16 @@ def create_symlink_to_file(target, symlink, relative=True): ) if not os.path.exists(target): - print_err_msg_exit( - f""" - Cannot create symlink to specified target file because the latter does - not exist or is not a file: - target = '{target}'""" - ) + if glob.glob(target): + for wildtarget in glob.glob(target): + create_symlink_to_file(wildtarget,symlink,relative) + else: + print_err_msg_exit( + f""" + Cannot create symlink to specified target file because the latter does + not exist or is not a file: + target = '{target}'""" + ) relative_flag = "" if relative: From 8353c5d016be56dd9d600bffc6455855bd268667 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Fri, 22 Mar 2024 03:05:40 +0000 Subject: [PATCH 036/260] Some fixes for new obs: need to define paths in default workflow task, need to create observation directories if they don't exist --- parm/wflow/default_workflow.yaml | 2 ++ scripts/exregional_get_verif_obs.sh | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/parm/wflow/default_workflow.yaml b/parm/wflow/default_workflow.yaml index e37fdae1ea..600fa3a9ce 100644 --- a/parm/wflow/default_workflow.yaml +++ b/parm/wflow/default_workflow.yaml @@ -4,6 +4,8 @@ rocoto: entities: ACCOUNT: '{{ user.ACCOUNT }}' + AERONET_OBS_DIR: '{{ platform.AERONET_OBS_DIR }}' + AIRNOW_OBS_DIR: '{{ platform.AIRNOW_OBS_DIR }}' CCPA_OBS_DIR: '{{ platform.CCPA_OBS_DIR }}' COLDSTART: '{{ workflow.COLDSTART }}' COMINgfs: '{{ platform.get("COMINgfs") }}' diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 0b439d356b..fb6f17184b 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -608,6 +608,9 @@ while [[ ${current_fcst} -le ${fcst_length} ]]; do echo "Will attempt to retrieve from remote locations" + if [[ ! -d "$aeronet_proc/${vyyyymmdd}" ]]; then + mkdir -p $aeronet_proc/${vyyyymmdd} + fi # Pull AERONET data from HPSS cmd=" python3 -u ${USHdir}/retrieve_data.py \ @@ -647,6 +650,9 @@ while [[ ${current_fcst} -le ${fcst_length} ]]; do echo "${airnow_file}" echo "Will attempt to retrieve from remote locations" + if [[ ! -d "$airnow_proc/${vyyyymmdd}" ]]; then + mkdir -p $airnow_proc/${vyyyymmdd} + fi # Pull AIRNOW data from HPSS cmd=" From 880cf63e73397ba48a594441de061a81a1952b52 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Fri, 22 Mar 2024 03:16:44 +0000 Subject: [PATCH 037/260] Add "get obs" tasks to list that gets deactivated if that ob type is not specified, include correct valid VX_FIELDS for new variables --- ush/setup.py | 6 +++--- ush/valid_param_vals.yaml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ush/setup.py b/ush/setup.py index 980b2c458c..3834f12d84 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -565,7 +565,7 @@ def remove_tag(tasks, tag): vx_metatasks_all = {} vx_fields_all["CCPA"] = ["APCP"] - vx_metatasks_all["CCPA"] = ["metatask_PcpCombine_obs", + vx_metatasks_all["CCPA"] = ["task_get_obs_ccpa","metatask_PcpCombine_obs", "metatask_PcpCombine_fcst_APCP_all_accums_all_mems", "metatask_GridStat_CCPA_all_accums_all_mems", "metatask_GenEnsProd_EnsembleStat_CCPA", @@ -579,12 +579,12 @@ def remove_tag(tasks, tag): "metatask_GridStat_NOHRSC_ensmeanprob_all_accums"] vx_fields_all["MRMS"] = ["REFC", "RETOP"] - vx_metatasks_all["MRMS"] = ["metatask_GridStat_MRMS_all_mems", + vx_metatasks_all["MRMS"] = ["task_get_obs_mrms","metatask_GridStat_MRMS_all_mems", "metatask_GenEnsProd_EnsembleStat_MRMS", "metatask_GridStat_MRMS_ensprob"] vx_fields_all["NDAS"] = ["ADPSFC", "ADPUPA"] - vx_metatasks_all["NDAS"] = ["task_run_MET_Pb2nc_obs", + vx_metatasks_all["NDAS"] = ["task_get_obs_ndas","task_run_MET_Pb2nc_obs", "metatask_PointStat_NDAS_all_mems", "metatask_GenEnsProd_EnsembleStat_NDAS", "metatask_PointStat_NDAS_ensmeanprob"] diff --git a/ush/valid_param_vals.yaml b/ush/valid_param_vals.yaml index 725d604267..1e5908417f 100644 --- a/ush/valid_param_vals.yaml +++ b/ush/valid_param_vals.yaml @@ -76,6 +76,6 @@ valid_vals_DO_AQM_CHEM_LBCS: [True, False] valid_vals_DO_AQM_GEFS_LBCS: [True, False] valid_vals_DO_AQM_SAVE_AIRNOW_HIST: [True, False] valid_vals_COLDSTART: [True, False] -valid_vals_VX_FIELDS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA", "AERONET", "AIRNOW" ] +valid_vals_VX_FIELDS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA", "AOD", "PM25" ] valid_vals_VX_APCP_ACCUMS_HRS: [ 1, 3, 6, 24 ] valid_vals_VX_ASNOW_ACCUMS_HRS: [ 6, 24 ] From e30316cf356a2bf2a0b3b7582a8387c92a134e26 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Fri, 22 Mar 2024 03:17:14 +0000 Subject: [PATCH 038/260] Adding new smoke test! Only retrieving obs and checking output for now; more tasks to come! --- ...config.MET_verification_smoke_only_vx.yaml | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml diff --git a/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml b/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml new file mode 100644 index 0000000000..2286728136 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml @@ -0,0 +1,32 @@ +metadata: + description: |- + This test checks the capability of the workflow to run verification tasks + on a user-specified grid. This also serves as a winter snowstorm case for + checking snowfall verification statistics using observations retrieved from + HPSS, and a test for custom domains with RAP data retrieved from HPSS +user: + RUN_ENVIR: community +workflow: + CCPP_PHYS_SUITE: FV3_RAP + DATE_FIRST_CYCL: '2023121700' + DATE_LAST_CYCL: '2023121700' + FCST_LEN_HRS: 24 + PREEXISTING_DIR_METHOD: rename +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + metatask_run_ensemble: + task_run_fcst_mem#mem#: + walltime: 01:00:00 +verification: + VX_FCST_MODEL_NAME: RRFS_smoke_test +# VX_FIELDS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA", "AOD", "PM25" ] + VX_FIELDS: [ "AOD", "PM25" ] + VX_FCST_INPUT_BASEDIR: /scratch2/BMC/fv3lam/kavulich/smoke_verif/smoke_vx/fcst + FCST_FN_TEMPLATE: 'rrfs.t{init?fmt=%H?shift=-${time_lag}}z.prslev.f{lead?fmt=%HHH?shift=${time_lag}}.conus_3km.grib2' +platform: + CCPA_OBS_DIR: '{{ workflow.EXPTDIR }}/CCPA_obs' + MRMS_OBS_DIR: '{{ workflow.EXPTDIR }}/MRMS_obs' + NDAS_OBS_DIR: '{{ workflow.EXPTDIR }}/NDAS_obs' + AERONET_OBS_DIR: '{{ workflow.EXPTDIR }}/AERONET_obs' + AIRNOW_OBS_DIR: '{{ workflow.EXPTDIR }}/AIRNOW_obs' From 7dbfc3986285dd8e9adeb453d8f36393d7aa5262 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Tue, 18 Jun 2024 18:25:09 +0000 Subject: [PATCH 039/260] Fix location of aeronet data in HPSS tar files --- parm/data_locations.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/data_locations.yml b/parm/data_locations.yml index 3168061f50..211d187dac 100644 --- a/parm/data_locations.yml +++ b/parm/data_locations.yml @@ -390,7 +390,7 @@ AERONET: obs: - "{yyyy}{mm}{dd}.lev15" archive_internal_dir: - - ./airnow/ + - ./validation_data/aq/aeronet/ AIRNOW: hpss: From e5f0fab3a617013507fafd534e5994bace78241a Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Tue, 18 Jun 2024 18:30:31 +0000 Subject: [PATCH 040/260] Adding new task for airnow and AERONET obs: ASCII2NC - New metplus conf file - New J-job and exscript for new task - New task entry in wflow/verify_pre.yaml - New variables for obs filenames and ASCII2NC output filenames - New entries in various scripts for new task - ush/get_metplus_tool_name.sh - ush/setup.py - ush/set_vx_fhr_list.sh - Updating some comments - Stage test observations on disk for faster testing --- jobs/JREGIONAL_RUN_MET_ASCII2NC_OBS | 77 ++++ parm/metplus/Ascii2nc_obs.conf | 113 ++++++ parm/wflow/verify_pre.yaml | 33 ++ scripts/exregional_get_verif_obs.sh | 4 + scripts/exregional_run_met_ascii2nc_obs.sh | 346 ++++++++++++++++++ ...config.MET_verification_smoke_only_vx.yaml | 9 +- ush/config_defaults.yaml | 16 + ush/get_metplus_tool_name.sh | 6 +- ush/set_vx_fhr_list.sh | 8 + ush/setup.py | 4 +- 10 files changed, 608 insertions(+), 8 deletions(-) create mode 100755 jobs/JREGIONAL_RUN_MET_ASCII2NC_OBS create mode 100644 parm/metplus/Ascii2nc_obs.conf create mode 100755 scripts/exregional_run_met_ascii2nc_obs.sh diff --git a/jobs/JREGIONAL_RUN_MET_ASCII2NC_OBS b/jobs/JREGIONAL_RUN_MET_ASCII2NC_OBS new file mode 100755 index 0000000000..f73848c51b --- /dev/null +++ b/jobs/JREGIONAL_RUN_MET_ASCII2NC_OBS @@ -0,0 +1,77 @@ +#!/usr/bin/env bash + +# +#----------------------------------------------------------------------- +# +# Source the variable definitions file and the bash utility functions. +# +#----------------------------------------------------------------------- +# +. $USHdir/source_util_funcs.sh +source_config_for_task "task_run_met_ascii2nc_obs" ${GLOBAL_VAR_DEFNS_FP} +. $USHdir/job_preamble.sh +# +#----------------------------------------------------------------------- +# +# Save current shell options (in a global array). Then set new options +# for this script/function. +# +#----------------------------------------------------------------------- +# +{ save_shell_opts; . $USHdir/preamble.sh; } > /dev/null 2>&1 +# +#----------------------------------------------------------------------- +# +# Get the full path to the file in which this script/function is located +# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in +# which the file is located (scrfunc_dir). +# +#----------------------------------------------------------------------- +# +scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) +scrfunc_fn=$( basename "${scrfunc_fp}" ) +scrfunc_dir=$( dirname "${scrfunc_fp}" ) +# +#----------------------------------------------------------------------- +# +# Print message indicating entry into script. +# +#----------------------------------------------------------------------- +# +print_info_msg " +======================================================================== +Entering script: \"${scrfunc_fn}\" +In directory: \"${scrfunc_dir}\" + +This is the J-job script for the task that runs METplus for point-stat +by initialization time for all forecast hours. +========================================================================" +# +#----------------------------------------------------------------------- +# +# Call the ex-script for this J-job and pass to it the necessary varia- +# bles. +# +#----------------------------------------------------------------------- +# +$SCRIPTSdir/exregional_run_met_ascii2nc_obs.sh || \ +print_err_msg_exit "\ +Call to ex-script corresponding to J-job \"${scrfunc_fn}\" failed." +# +#----------------------------------------------------------------------- +# +# Run job postamble. +# +#----------------------------------------------------------------------- +# +job_postamble +# +#----------------------------------------------------------------------- +# +# Restore the shell options saved at the beginning of this script/func- +# tion. +# +#----------------------------------------------------------------------- +# +{ restore_shell_opts; } > /dev/null 2>&1 + diff --git a/parm/metplus/Ascii2nc_obs.conf b/parm/metplus/Ascii2nc_obs.conf new file mode 100644 index 0000000000..fbcbaf6577 --- /dev/null +++ b/parm/metplus/Ascii2nc_obs.conf @@ -0,0 +1,113 @@ +# ASCII2nc METplus Configuration + +[config] + +# List of applications (tools) to run. +PROCESS_LIST = ASCII2NC + +# time looping - options are INIT, VALID, RETRO, and REALTIME +# If set to INIT or RETRO: +# INIT_TIME_FMT, INIT_BEG, INIT_END, and INIT_INCREMENT must also be set +# If set to VALID or REALTIME: +# VALID_TIME_FMT, VALID_BEG, VALID_END, and VALID_INCREMENT must also be set +LOOP_BY = INIT + +# Format of INIT_BEG and INIT_END using % items +# %Y = 4 digit year, %m = 2 digit month, %d = 2 digit day, etc. +# see www.strftime.org for more information +# %Y%m%d%H expands to YYYYMMDDHH +INIT_TIME_FMT = %Y%m%d%H + +# Start time for METplus run - must match INIT_TIME_FMT +INIT_BEG = {{cdate}} + +# End time for METplus run - must match INIT_TIME_FMT +INIT_END = {{cdate}} + +# Increment between METplus runs (in seconds if no units are specified). +# Must be >= 60 seconds. +INIT_INCREMENT = 3600 + +# List of forecast leads to process for each run time (init or valid) +# In hours if units are not specified +# If unset, defaults to 0 (don't loop through forecast leads) +LEAD_SEQ = {{fhr_list}} + +# Order of loops to process data - Options are times, processes +# Not relevant if only one item is in the PROCESS_LIST +# times = run all wrappers in the PROCESS_LIST for a single run time, then +# increment the run time and run all wrappers again until all times have +# been evaluated. +# processes = run the first wrapper in the PROCESS_LIST for all times +# specified, then repeat for the next item in the PROCESS_LIST until all +# wrappers have been run +LOOP_ORDER = times +# +# Verbosity of MET logging output. 0 to 5; 0 is quiet, 5 is loud. +# +LOG_{{METPLUS_TOOL_NAME}}_VERBOSITY = {{metplus_verbosity_level}} +# +# Specify the name of the METplus log file. +# +LOG_METPLUS = {LOG_DIR}/{{metplus_log_fn}} +# +# Specify the location and name of the final METplus conf file. +# +METPLUS_CONF = {ASCII2NC_OUTPUT_DIR}/metplus_final.{{metplus_config_fn}} +# +# Location of MET configuration file to pass to Ascii2nc. +# +# References PARM_BASE, which is the location of the parm directory +# corresponding to the ush directory of the run_metplus.py script that +# is called or the value of the environment variable METPLUS_PARM_BASE +# if set. +# +ASCII2NC_CONFIG_FILE = {PARM_BASE}/met_config/Ascii2NcConfig_wrapped +# +# Name to identify observation data in output. +# +OBTYPE = {{obtype}} +# For AERONET and Airnow data, times in the file are always top of the hour. +ASCII2NC_FILE_WINDOW_BEGIN = 0 +ASCII2NC_FILE_WINDOW_END = 0 +# +# Observation data time window(s). +# +OBS_WINDOW_BEGIN = -1799 +OBS_WINDOW_END = 1800 + +ASCII2NC_WINDOW_BEGIN = {OBS_WINDOW_BEGIN} +ASCII2NC_WINDOW_END = {OBS_WINDOW_END} + +ASCII2NC_INPUT_FORMAT = {{input_format}} + +# If set to True, skip run if the output file determined by the output +# directory and filename template already exists. +ASCII2NC_SKIP_IF_OUTPUT_EXISTS = False + +# End of [config] section and start of [dir] section +[dir] +# +# Directory containing (observation) input to ASCII2nc. +# +ASCII2NC_INPUT_DIR = {{obs_input_dir}} +# +# Directory in which to write output from ASCII2nc. +# +# OUTPUT_BASE apparently has to be set to something; it cannot be left +# to its default value. But it is not explicitly used elsewhere in this +# configuration file. +# +OUTPUT_BASE = {{output_base}} +ASCII2NC_OUTPUT_DIR = {{output_dir}} + +# End of [dir] section and start of [filename_templates] section. +[filename_templates] +# +# Template for ascii input to ASCII2nc relative to ASCII2NC_INPUT_DIR. +# +ASCII2NC_INPUT_TEMPLATE = {{obs_input_fn_template}} +# +# Template for output from Ascii2nc relative to ASCII2NC_OUTPUT_DIR. +# +ASCII2NC_OUTPUT_TEMPLATE = {{output_fn_template}} diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index b35af30752..2f17216705 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -129,6 +129,39 @@ task_run_MET_Pb2nc_obs: attrs: task: get_obs_ndas +metatask_ASCII2nc_obs: + var: + METAVAR: '{% for var in verification.VX_FIELDS %}{% if var in ["AOD", "PM25"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + METAOBTYPE: '{% for var in verification.VX_FIELDS %}{% if var =="AOD" %}AERONET {% elif var =="PM25" %}AIRNOW {% endif %}{% endfor %}' + METAOBS_DIR: '{% for var in verification.VX_FIELDS %}{% if var =="AOD" %}&AERONET_OBS_DIR; {% elif var =="PM25" %}&AIRNOW_OBS_DIR; {% endif %}{% endfor %}' + TASKDEP: '{% for var in verification.VX_FIELDS %}{% if var =="AOD" %}get_obs_aeronet {% elif var =="PM25" %}get_obs_airnow {% endif %}{% endfor %}' + task_run_MET_ASCIInc_obs_#METAVAR#: + <<: *default_task_verify_pre + attrs: + cycledefs: forecast + maxtries: '2' + command: '&LOAD_MODULES_RUN_TASK_FP; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_ASCII2NC_OBS"' + envars: + <<: *default_vars + VAR: '#METAVAR#' + ACCUM_HH: '01' + FCST_OR_OBS: OBS + OBTYPE: '#METAOBTYPE#' + OBS_DIR: '#METAOBS_DIR#' + METPLUSTOOLNAME: 'ASCII2NC' + dependency: + and: + datadep: + text: '#METAOBS_DIR#' + or: + not: + taskvalid: + attrs: + task: '#TASKDEP#' + taskdep: + attrs: + task: '#TASKDEP#' + metatask_PcpCombine_obs: var: ACCUM_HH: '{% for ah in verification.VX_APCP_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index fb6f17184b..9aa4665a77 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -150,6 +150,8 @@ set -x # # {AERONET_OBS_DIR}/{YYYYMMDD}/{YYYYMMDD}.lev15 # +# This naming scheme can be changed by the config variable OBS_AERONET_FN_TEMPLATE +# # If data is retrieved from HPSS, it will automatically staged by this # this script. # @@ -161,6 +163,8 @@ set -x # # {AIRNOW_OBS_DIR}/{YYYYMMDD}/HourlyAQObs_{YYYYMMDDHH}.dat # +# This naming scheme can be changed by the config variable OBS_AIRNOW_FN_TEMPLATE +# # In addition to the raw observation files, For each day there is an additional # required file that stores the locations of all observation stations: # diff --git a/scripts/exregional_run_met_ascii2nc_obs.sh b/scripts/exregional_run_met_ascii2nc_obs.sh new file mode 100755 index 0000000000..525cb4a33d --- /dev/null +++ b/scripts/exregional_run_met_ascii2nc_obs.sh @@ -0,0 +1,346 @@ +#!/usr/bin/env bash + +# +#----------------------------------------------------------------------- +# +# Source the variable definitions file and the bash utility functions. +# +#----------------------------------------------------------------------- +# +. $USHdir/source_util_funcs.sh +source_config_for_task "task_run_met_ascii2nc_obs" ${GLOBAL_VAR_DEFNS_FP} +# +#----------------------------------------------------------------------- +# +# Source files defining auxiliary functions for verification. +# +#----------------------------------------------------------------------- +# +. $USHdir/get_metplus_tool_name.sh +. $USHdir/set_vx_params.sh +. $USHdir/set_vx_fhr_list.sh +# +#----------------------------------------------------------------------- +# +# Save current shell options (in a global array). Then set new options +# for this script/function. +# +#----------------------------------------------------------------------- +# +{ save_shell_opts; . $USHdir/preamble.sh; } > /dev/null 2>&1 +# +#----------------------------------------------------------------------- +# +# Get the full path to the file in which this script/function is located +# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in +# which the file is located (scrfunc_dir). +# +#----------------------------------------------------------------------- +# +scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) +scrfunc_fn=$( basename "${scrfunc_fp}" ) +scrfunc_dir=$( dirname "${scrfunc_fp}" ) +# +#----------------------------------------------------------------------- +# +# Get the name of the MET/METplus tool in different formats that may be +# needed from the global variable METPLUSTOOLNAME. +# +#----------------------------------------------------------------------- +# +set -x +get_metplus_tool_name \ + METPLUSTOOLNAME="${METPLUSTOOLNAME}" \ + outvarname_metplus_tool_name="metplus_tool_name" \ + outvarname_MetplusToolName="MetplusToolName" \ + outvarname_METPLUS_TOOL_NAME="METPLUS_TOOL_NAME" +# +#----------------------------------------------------------------------- +# +print_info_msg " +======================================================================== +Entering script: \"${scrfunc_fn}\" +In directory: \"${scrfunc_dir}\" + +This is the ex-script for the task that runs the METplus tool ${MetplusToolName} +to convert ASCII format observation files to NetCDF format. +========================================================================" +# +CDATE="${PDY}${cyc}" +# +#----------------------------------------------------------------------- +# +# Set various verification parameters associated with the field to be +# verified. Not all of these are necessarily used later below but are +# set here for consistency with other verification ex-scripts. +# +#----------------------------------------------------------------------- +# +FIELDNAME_IN_OBS_INPUT="" +FIELDNAME_IN_FCST_INPUT="" +FIELDNAME_IN_MET_OUTPUT="" +FIELDNAME_IN_MET_FILEDIR_NAMES="" + +set_vx_params \ + obtype="${OBTYPE}" \ + field="$VAR" \ + accum_hh="${ACCUM_HH}" \ + outvarname_grid_or_point="grid_or_point" \ + outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ + outvarname_fieldname_in_fcst_input="FIELDNAME_IN_FCST_INPUT" \ + outvarname_fieldname_in_MET_output="FIELDNAME_IN_MET_OUTPUT" \ + outvarname_fieldname_in_MET_filedir_names="FIELDNAME_IN_MET_FILEDIR_NAMES" +# +#----------------------------------------------------------------------- +# +# Set paths and file templates for input to and output from the MET/ +# METplus tool to be run as well as other file/directory parameters. +# +#----------------------------------------------------------------------- +# +vx_output_basedir=$( eval echo "${VX_OUTPUT_BASEDIR}" ) + +OBS_INPUT_DIR="${OBS_DIR}" + +OUTPUT_BASE="${vx_output_basedir}" +OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_obs" +STAGING_DIR="${OUTPUT_BASE}/stage/${MetplusToolName}_obs" +if [ "${OBTYPE}" = "AERONET" ]; then + OBS_INPUT_FN_TEMPLATE=${OBS_AERONET_FN_TEMPLATE} + OUTPUT_FN_TEMPLATE=${OBS_AERONET_FN_TEMPLATE_ASCII2NC_OUTPUT} + ASCII2NC_INPUT_FORMAT=aeronetv3 +elif [ "${OBTYPE}" = "AIRNOW" ]; then + OBS_INPUT_FN_TEMPLATE=${OBS_AIRNOW_FN_TEMPLATE} + OUTPUT_FN_TEMPLATE=${OBS_AIRNOW_FN_TEMPLATE_ASCII2NC_OUTPUT} + ASCII2NC_INPUT_FORMAT=airnowhourlyaqobs +else + print_err_msg_exit "\nNo filename template set for OBTYPE \"${OBTYPE}\"!" +fi +#----------------------------------------------------------------------- +# +# Set the array of forecast hours for which to run the MET/METplus tool. +# +#----------------------------------------------------------------------- +# +set_vx_fhr_list \ + cdate="${CDATE}" \ + fcst_len_hrs="${FCST_LEN_HRS}" \ + field="$VAR" \ + accum_hh="${ACCUM_HH}" \ + base_dir="${OBS_INPUT_DIR}" \ + fn_template="${OBS_INPUT_FN_TEMPLATE}" \ + check_accum_contrib_files="FALSE" \ + num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ + outvarname_fhr_list="FHR_LIST" +# +#----------------------------------------------------------------------- +# +# Make sure the MET/METplus output directory(ies) exists. +# +#----------------------------------------------------------------------- +# +mkdir -p "${OUTPUT_DIR}" +# +#----------------------------------------------------------------------- +# +# Check for existence of top-level OBS_DIR. +# +#----------------------------------------------------------------------- +# +if [ ! -d "${OBS_DIR}" ]; then + print_err_msg_exit "\ +OBS_DIR does not exist or is not a directory: + OBS_DIR = \"${OBS_DIR}\"" +fi +# +#----------------------------------------------------------------------- +# +# Export variables needed in the common METplus configuration file (at +# ${METPLUS_CONF}/common.conf). +# +#----------------------------------------------------------------------- +# +export METPLUS_CONF +export LOGDIR +# +#----------------------------------------------------------------------- +# +# Do not run METplus if there isn't at least one valid forecast hour for +# which to run it. +# +#----------------------------------------------------------------------- +# +if [ -z "${FHR_LIST}" ]; then + print_err_msg_exit "\ +The list of forecast hours for which to run METplus is empty: + FHR_LIST = [${FHR_LIST}]" +fi +# +#----------------------------------------------------------------------- +# +# Set the names of the template METplus configuration file, the METplus +# configuration file generated from this template, and the METplus log +# file. +# +#----------------------------------------------------------------------- +# +# First, set the base file names. +# +metplus_config_tmpl_fn="${MetplusToolName}_obs" +# +# Note that we append the cycle date to the name of the configuration +# file because we are considering only observations when using ASCII2NC, so +# the output files from METplus are not placed under cycle directories. +# Thus, another method is necessary to associate the configuration file +# with the cycle for which it is used. +# +# Note also that if considering an ensemble forecast, we include the +# ensemble member name to the config file name. This is necessary in +# NCO mode (i.e. when RUN_ENVIR = "nco") because in that mode, the +# directory tree under which the configuration file is placed does not +# contain member information, so the file names must include it. It is +# not necessary in community mode (i.e. when RUN_ENVIR = "community") +# because in that case, the directory structure does contain the member +# information, but we still include that info in the file name so that +# the behavior in the two modes is as similar as possible. +# +metplus_config_fn="${metplus_config_tmpl_fn}_${CDATE}" +metplus_log_fn="${metplus_config_fn}" +# +# Add prefixes and suffixes (extensions) to the base file names. +# +metplus_config_tmpl_fn="${metplus_config_tmpl_fn}.conf" +metplus_config_fn="${metplus_config_fn}.conf" +metplus_log_fn="metplus.log.${metplus_log_fn}" +# +#----------------------------------------------------------------------- +# +# Generate the METplus configuration file from its jinja template. +# +#----------------------------------------------------------------------- +# +# Set the full paths to the jinja template METplus configuration file +# (which already exists) and the METplus configuration file that will be +# generated from it. +# +metplus_config_tmpl_fp="${METPLUS_CONF}/${metplus_config_tmpl_fn}" +metplus_config_fp="${OUTPUT_DIR}/${metplus_config_fn}" +# +# Define variables that appear in the jinja template. +# +settings="\ +# +# MET/METplus information. +# + 'metplus_tool_name': '${metplus_tool_name}' + 'MetplusToolName': '${MetplusToolName}' + 'METPLUS_TOOL_NAME': '${METPLUS_TOOL_NAME}' + 'metplus_verbosity_level': '${METPLUS_VERBOSITY_LEVEL}' +# +# Date and forecast hour information. +# + 'cdate': '$CDATE' + 'fhr_list': '${FHR_LIST}' +# +# Input and output directory/file information. +# + 'metplus_config_fn': '${metplus_config_fn:-}' + 'metplus_log_fn': '${metplus_log_fn:-}' + 'obs_input_dir': '${OBS_INPUT_DIR:-}' + 'obs_input_fn_template': '${OBS_INPUT_FN_TEMPLATE:-}' + 'fcst_input_dir': '${FCST_INPUT_DIR:-}' + 'fcst_input_fn_template': '${FCST_INPUT_FN_TEMPLATE:-}' + 'output_base': '${OUTPUT_BASE}' + 'output_dir': '${OUTPUT_DIR}' + 'output_fn_template': '${OUTPUT_FN_TEMPLATE:-}' + 'staging_dir': '${STAGING_DIR}' + 'vx_fcst_model_name': '${VX_FCST_MODEL_NAME}' + 'input_format': '${ASCII2NC_INPUT_FORMAT}' +# +# Ensemble and member-specific information. +# + 'num_ens_members': '${NUM_ENS_MEMBERS}' + 'ensmem_name': '${ensmem_name:-}' + 'time_lag': '${time_lag:-}' +# +# Field information. +# + 'fieldname_in_obs_input': '${FIELDNAME_IN_OBS_INPUT}' + 'fieldname_in_fcst_input': '${FIELDNAME_IN_FCST_INPUT}' + 'fieldname_in_met_output': '${FIELDNAME_IN_MET_OUTPUT}' + 'fieldname_in_met_filedir_names': '${FIELDNAME_IN_MET_FILEDIR_NAMES}' + 'obtype': '${OBTYPE}' + 'accum_hh': '${ACCUM_HH:-}' + 'accum_no_pad': '${ACCUM_NO_PAD:-}' +" + +# Render the template to create a METplus configuration file +tmpfile=$( $READLINK -f "$(mktemp ./met_plus_settings.XXXXXX.yaml)") +printf "%s" "$settings" > "$tmpfile" +uw template render \ + -i ${metplus_config_tmpl_fp} \ + -o ${metplus_config_fp} \ + --verbose \ + --values-file "${tmpfile}" \ + --search-path "/" + +err=$? +rm $tmpfile +if [ $err -ne 0 ]; then + message_txt="Error rendering template for METplus config. + Contents of input are: +$settings" + if [ "${RUN_ENVIR}" = "nco" ] && [ "${MACHINE}" = "WCOSS2" ]; then + err_exit "${message_txt}" + else + print_err_msg_exit "${message_txt}" + fi +fi +# +#----------------------------------------------------------------------- +# +# Call METplus. +# +#----------------------------------------------------------------------- +# +#TEMPORARILY POINTING TO BETA RELEASE +MET_ROOT=/contrib/met/12.0.0-beta3 +MET_INSTALL_DIR=${MET_ROOT} +MET_BIN_EXEC=${MET_INSTALL_DIR}/bin +MET_BASE=${MET_INSTALL_DIR}/share/met +METPLUS_ROOT=/contrib/METplus/METplus-6.0.0-beta3/ +METPLUS_PATH=${METPLUS_ROOT} +MET_ROOT=/contrib/met/12.0.0-beta3 +#TEMPORARILY POINTING TO BETA RELEASE +print_info_msg "$VERBOSE" " +Calling METplus to run MET's ${metplus_tool_name} tool on observations of type: ${OBTYPE}" +${METPLUS_PATH}/ush/run_metplus.py \ + -c ${METPLUS_CONF}/common.conf \ + -c ${metplus_config_fp} || \ +print_err_msg_exit " +Call to METplus failed with return code: $? +METplus configuration file used is: + metplus_config_fp = \"${metplus_config_fp}\"" +# +#----------------------------------------------------------------------- +# +# Print message indicating successful completion of script. +# +#----------------------------------------------------------------------- +# +print_info_msg " +======================================================================== +METplus ${MetplusToolName} tool completed successfully. + +Exiting script: \"${scrfunc_fn}\" +In directory: \"${scrfunc_dir}\" +========================================================================" +# +#----------------------------------------------------------------------- +# +# Restore the shell options saved at the beginning of this script/func- +# tion. +# +#----------------------------------------------------------------------- +# +{ restore_shell_opts; } > /dev/null 2>&1 diff --git a/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml b/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml index 2286728136..ce5829d6b3 100644 --- a/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml @@ -1,9 +1,7 @@ metadata: description: |- This test checks the capability of the workflow to run verification tasks - on a user-specified grid. This also serves as a winter snowstorm case for - checking snowfall verification statistics using observations retrieved from - HPSS, and a test for custom domains with RAP data retrieved from HPSS + for smoke data (AERONET AOD and AIRNOW PM). user: RUN_ENVIR: community workflow: @@ -28,5 +26,6 @@ platform: CCPA_OBS_DIR: '{{ workflow.EXPTDIR }}/CCPA_obs' MRMS_OBS_DIR: '{{ workflow.EXPTDIR }}/MRMS_obs' NDAS_OBS_DIR: '{{ workflow.EXPTDIR }}/NDAS_obs' - AERONET_OBS_DIR: '{{ workflow.EXPTDIR }}/AERONET_obs' - AIRNOW_OBS_DIR: '{{ workflow.EXPTDIR }}/AIRNOW_obs' + AERONET_OBS_DIR: /scratch2/BMC/fv3lam/kavulich/smoke_verif/rebase_june/staged_case/AERONET_obs/ + AIRNOW_OBS_DIR: /scratch2/BMC/fv3lam/kavulich/smoke_verif/rebase_june/staged_case/AIRNOW_obs/ + diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 454ac5fc84..82a699b849 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2411,11 +2411,19 @@ verification: # surface (ADPSFC) or ADP upper air (ADPUPA) fields and then generate # NetCDF versions of these files. # + # OBS_AERONET_FN_TEMPLATE: + # File name template for AERONET observation files. + # + # OBS_AIRNOW_FN_TEMPLATE: + # File name template for AERONET observation files. + # OBS_CCPA_APCP_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2' OBS_NOHRSC_ASNOW_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/sfav2_CONUS_${ACCUM_HH}h_{valid?fmt=%Y%m%d%H}_grid184.grb2' OBS_MRMS_REFC_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' OBS_MRMS_RETOP_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE: 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' + OBS_AERONET_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/{valid?fmt=%Y%m%d}.lev15' + OBS_AIRNOW_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/HourlyAQObs_{valid?fmt=%Y%m%d%H}.dat' # # OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: # Template used to specify the names of the output NetCDF observation @@ -2433,6 +2441,14 @@ verification: OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${OBS_CCPA_APCP_FN_TEMPLATE}_a${ACCUM_HH}h.nc' OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: '${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}.nc' # + # OBS_AERONET_FN_TEMPLATE_ASCII2NC_OUTPUT: + # Template used to specify the names of the output NetCDF observation + # files generated by the worfklow verification tasks that call the + # METplus ASCII2NC tool on AERONET and AIRNOW observations, respectively. + # + OBS_AERONET_FN_TEMPLATE_ASCII2NC_OUTPUT: 'hourly_aeronet_obs_{valid?fmt=%Y%m%d%H}.nc' + OBS_AIRNOW_FN_TEMPLATE_ASCII2NC_OUTPUT: 'hourly_airnow_obs_{valid?fmt=%Y%m%d%H}.nc' + # # VX_FCST_MODEL_NAME: # String that specifies a descriptive name for the model being verified. # This is used in forming the names of the verification output files as diff --git a/ush/get_metplus_tool_name.sh b/ush/get_metplus_tool_name.sh index 9ef37f5f13..366455e092 100644 --- a/ush/get_metplus_tool_name.sh +++ b/ush/get_metplus_tool_name.sh @@ -70,11 +70,15 @@ function get_metplus_tool_name() { #----------------------------------------------------------------------- # valid_vals_METPLUSTOOLNAME=( \ - "PB2NC" "PCPCOMBINE" "GRIDSTAT" "POINTSTAT" "GENENSPROD" "ENSEMBLESTAT" \ + "ASCII2NC" "PB2NC" "PCPCOMBINE" "GRIDSTAT" "POINTSTAT" "GENENSPROD" "ENSEMBLESTAT" \ ) check_var_valid_value "METPLUSTOOLNAME" "valid_vals_METPLUSTOOLNAME" case "${METPLUSTOOLNAME}" in + "ASCII2NC") + _metplus_tool_name_="ascii2nc" + _MetplusToolName_="Ascii2nc" + ;; "PB2NC") _metplus_tool_name_="pb2nc" _MetplusToolName_="Pb2nc" diff --git a/ush/set_vx_fhr_list.sh b/ush/set_vx_fhr_list.sh index 5cefc78365..500e75d5b3 100644 --- a/ush/set_vx_fhr_list.sh +++ b/ush/set_vx_fhr_list.sh @@ -101,6 +101,10 @@ function set_vx_fhr_list() { #----------------------------------------------------------------------- # case "${field}" in + "AOD") + fhr_min="00" + fhr_int="24" + ;; "APCP") fhr_min="${accum_hh}" fhr_int="${accum_hh}" @@ -114,6 +118,10 @@ function set_vx_fhr_list() { fhr_int="${accum_hh}" fi ;; + "PM25") + fhr_min="00" + fhr_int="01" + ;; "REFC") fhr_min="00" fhr_int="01" diff --git a/ush/setup.py b/ush/setup.py index 3834f12d84..e7c38722cf 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -590,10 +590,10 @@ def remove_tag(tasks, tag): "metatask_PointStat_NDAS_ensmeanprob"] vx_fields_all["AERONET"] = ["AOD"] - vx_metatasks_all["AERONET"] = ["task_get_obs_aeronet"] + vx_metatasks_all["AERONET"] = ["task_get_obs_aeronet","task_run_MET_ASCII2nc_obs"] vx_fields_all["AIRNOW"] = ["PM25"] - vx_metatasks_all["AIRNOW"] = ["task_get_obs_airnow"] + vx_metatasks_all["AIRNOW"] = ["task_get_obs_airnow","task_run_MET_ASCII2nc_obs"] # Get the vx fields specified in the experiment configuration. vx_fields_config = expt_config["verification"]["VX_FIELDS"] From 6b9aea95f4dce92b25988a9d9cd178c091007e3a Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Fri, 21 Jun 2024 19:29:14 +0000 Subject: [PATCH 041/260] Fix incorrect task list for smoke variables so we get a valid XML for non-smoke cases --- ush/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ush/setup.py b/ush/setup.py index e7c38722cf..7394031251 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -590,10 +590,10 @@ def remove_tag(tasks, tag): "metatask_PointStat_NDAS_ensmeanprob"] vx_fields_all["AERONET"] = ["AOD"] - vx_metatasks_all["AERONET"] = ["task_get_obs_aeronet","task_run_MET_ASCII2nc_obs"] + vx_metatasks_all["AERONET"] = ["task_get_obs_aeronet","metatask_ASCII2nc_obs"] vx_fields_all["AIRNOW"] = ["PM25"] - vx_metatasks_all["AIRNOW"] = ["task_get_obs_airnow","task_run_MET_ASCII2nc_obs"] + vx_metatasks_all["AIRNOW"] = ["task_get_obs_airnow","metatask_ASCII2nc_obs"] # Get the vx fields specified in the experiment configuration. vx_fields_config = expt_config["verification"]["VX_FIELDS"] From 3c04d412e9d0c328541efc5896e04baf457c15b6 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Wed, 26 Jun 2024 20:39:23 +0000 Subject: [PATCH 042/260] Add PM10 in addition to PM25, get workflow working through PCPCombine - Add PM10 as a valid ob type - Update PcpCombine.conf template to allow obs other than CCPA, USER_DEFINED command - Fix task name for ASCII2NC - Add PCPCombine tasks for PM - Fix check of airnow ob file name in exregional_get_verif_obs.sh - ASCII2NC doesn't need beta version of MET - Update some comments in config_defaults.yaml - Pythonize ush/set_vx_fhr_list.sh with help from ChatGPT; this results in an insane speedup (100 seconds to check forecast files --> ~ 1 second) --- parm/metplus/PcpCombine.conf | 11 +- parm/metplus/vx_config_det.yaml | 4 +- parm/wflow/verify_pre.yaml | 31 +++- scripts/exregional_get_verif_obs.sh | 2 +- scripts/exregional_run_met_ascii2nc_obs.sh | 9 - scripts/exregional_run_met_pcpcombine.sh | 49 ++++-- ...config.MET_verification_smoke_only_vx.yaml | 3 +- ush/config_defaults.yaml | 26 +-- ush/set_vx_fhr_list.py | 157 ++++++++++++++++++ ush/set_vx_fhr_list.sh | 4 + ush/set_vx_params.sh | 6 + ush/setup.py | 2 +- ush/valid_param_vals.yaml | 2 +- 13 files changed, 264 insertions(+), 42 deletions(-) create mode 100644 ush/set_vx_fhr_list.py diff --git a/parm/metplus/PcpCombine.conf b/parm/metplus/PcpCombine.conf index 3cee69df1d..ba7d0da0e2 100644 --- a/parm/metplus/PcpCombine.conf +++ b/parm/metplus/PcpCombine.conf @@ -75,10 +75,13 @@ OBS_PCP_COMBINE_RUN = True FCST_PCP_COMBINE_RUN = False {%- endif %} # -# Mode of PcpCombine to use (SUM, ADD, SUBTRACT, DERIVE, or CUSTOM). +# Mode of PcpCombine to use (SUM, ADD, SUBTRACT, DERIVE, or USER_DEFINED). # -{{FCST_OR_OBS}}_PCP_COMBINE_METHOD = ADD - +{{FCST_OR_OBS}}_PCP_COMBINE_METHOD = {{pcp_combine_method}} +# +# FCST_PCP_COMBINE_COMMAND is only used when FCST_PCP_COMBINE_METHOD = USER_DEFINED +# +FCST_PCP_COMBINE_COMMAND = {{pcp_combine_command}} {%- if (FCST_OR_OBS == 'FCST') and (input_field_group == 'ASNOW') %} # # Specify name of variable for Snowfall Accumulation. @@ -177,7 +180,7 @@ FCST_PCP_COMBINE_CONSTANT_INIT = True # # Name to identify observation data in output. # -OBTYPE = CCPA +OBTYPE = {{obtype}} {%- endif %} # # Specify file type of input data. diff --git a/parm/metplus/vx_config_det.yaml b/parm/metplus/vx_config_det.yaml index 44a7fb2a2b..eaff56e8c3 100644 --- a/parm/metplus/vx_config_det.yaml +++ b/parm/metplus/vx_config_det.yaml @@ -211,4 +211,6 @@ AERONET: L0: [] AIRNOW: MASSDEN%%PM25: - R807|A1: [] + Z8|A1: [] + MASSDEN%%PM10: + Z8|A1: [] diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 2f17216705..f637f4538b 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -135,7 +135,7 @@ metatask_ASCII2nc_obs: METAOBTYPE: '{% for var in verification.VX_FIELDS %}{% if var =="AOD" %}AERONET {% elif var =="PM25" %}AIRNOW {% endif %}{% endfor %}' METAOBS_DIR: '{% for var in verification.VX_FIELDS %}{% if var =="AOD" %}&AERONET_OBS_DIR; {% elif var =="PM25" %}&AIRNOW_OBS_DIR; {% endif %}{% endfor %}' TASKDEP: '{% for var in verification.VX_FIELDS %}{% if var =="AOD" %}get_obs_aeronet {% elif var =="PM25" %}get_obs_airnow {% endif %}{% endfor %}' - task_run_MET_ASCIInc_obs_#METAVAR#: + task_run_MET_ASCII2nc_obs_#METAVAR#: <<: *default_task_verify_pre attrs: cycledefs: forecast @@ -325,3 +325,32 @@ metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems: age: 00:00:00:30 text: !cycstr '{{ workflow.EXPTDIR }}/@Y@m@d@H/post_files_exist_mem#mem#.txt' walltime: 00:10:00 + +metatask_PcpCombine_fcst_PM_all_mems: + var: + PM: '{% for var in verification.VX_FIELDS %}{% if var in ["PM25", "PM10"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + metatask_PcpCombine_fcst_#PM#_all_mems: + var: + mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' + task_run_MET_PcpCombine_fcst_#PM#_mem#mem#: + <<: *default_task_verify_pre + attrs: + cycledefs: forecast + maxtries: '2' + command: '&LOAD_MODULES_RUN_TASK_FP; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PCPCOMBINE"' + envars: + <<: *default_vars + VAR: '#PM#' + ACCUM_HH: '01' + FCST_OR_OBS: FCST + OBTYPE: AIRNOW + OBS_DIR: '&AIRNOW_OBS_DIR;' + METPLUSTOOLNAME: 'PCPCOMBINE' + ENSMEM_INDX: "#mem#" + dependency: + datadep: + attrs: + age: 00:00:00:30 + text: !cycstr '{{ workflow.EXPTDIR }}/@Y@m@d@H/post_files_exist_mem#mem#.txt' + walltime: 00:10:00 + diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 9aa4665a77..2c53dacbf2 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -645,7 +645,7 @@ while [[ ${current_fcst} -le ${fcst_length} ]]; do airnow_proc=${OBS_DIR} # Check if file exists on disk; if not, pull it. - airnow_file="$airnow_proc/${vyyyymmdd}/HourlyAQObs_{vyyyymmdd}${vhh}.dat" + airnow_file="$airnow_proc/${vyyyymmdd}/HourlyAQObs_${vyyyymmdd}${vhh}.dat" if [[ -f "${airnow_file}" ]]; then echo "${OBTYPE} file exists on disk:" echo "${airnow_file}" diff --git a/scripts/exregional_run_met_ascii2nc_obs.sh b/scripts/exregional_run_met_ascii2nc_obs.sh index 525cb4a33d..ea12769cdb 100755 --- a/scripts/exregional_run_met_ascii2nc_obs.sh +++ b/scripts/exregional_run_met_ascii2nc_obs.sh @@ -303,15 +303,6 @@ fi # #----------------------------------------------------------------------- # -#TEMPORARILY POINTING TO BETA RELEASE -MET_ROOT=/contrib/met/12.0.0-beta3 -MET_INSTALL_DIR=${MET_ROOT} -MET_BIN_EXEC=${MET_INSTALL_DIR}/bin -MET_BASE=${MET_INSTALL_DIR}/share/met -METPLUS_ROOT=/contrib/METplus/METplus-6.0.0-beta3/ -METPLUS_PATH=${METPLUS_ROOT} -MET_ROOT=/contrib/met/12.0.0-beta3 -#TEMPORARILY POINTING TO BETA RELEASE print_info_msg "$VERBOSE" " Calling METplus to run MET's ${metplus_tool_name} tool on observations of type: ${OBTYPE}" ${METPLUS_PATH}/ush/run_metplus.py \ diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 026afb4eb2..357d6f9e2c 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -173,7 +173,8 @@ OBS_INPUT_DIR="" OBS_INPUT_FN_TEMPLATE="" FCST_INPUT_DIR="" FCST_INPUT_FN_TEMPLATE="" - +PCP_COMBINE_METHOD="ADD" +PCP_COMBINE_COMMAND="" if [ "${FCST_OR_OBS}" = "FCST" ]; then FCST_INPUT_DIR="${vx_fcst_input_basedir}" @@ -183,7 +184,17 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_fcst" OUTPUT_FN_TEMPLATE=$( eval echo ${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT} ) STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" + if [ "${OBTYPE}" = "AIRNOW" ]; then + PCP_COMBINE_METHOD="USER_DEFINED" + if [ "${VAR}" = "PM25" ]; then + # Need to combine two fields (different PM types) and convert units from forecast files to create PM25 equivalent to obs + PCP_COMBINE_COMMAND="-add {FCST_PCP_COMBINE_INPUT_DIR}/{FCST_PCP_COMBINE_INPUT_TEMPLATE} 'name=\"MASSDEN\"; level=\"Z8\"; GRIB2_aerosol_type=62010; convert(x)=x*1e9;' {FCST_PCP_COMBINE_INPUT_DIR}/{FCST_PCP_COMBINE_INPUT_TEMPLATE} 'name=\"MASSDEN\"; level=\"Z8\"; GRIB2_aerosol_type=62001; GRIB2_aerosol_interval_type=0; convert(x)=x*1e9;'" + elif [ "${VAR}" = "PM10" ]; then + # for PM10, command is just a passthrough + PCP_COMBINE_COMMAND="-add {FCST_PCP_COMBINE_INPUT_DIR}/{FCST_PCP_COMBINE_INPUT_TEMPLATE} -field 'name=\"MASSDEN\"; level=\"Z8\"; GRIB2_aerosol_type=62001; GRIB2_aerosol_interval_type=2; convert(x)=x*1e9;'" + fi + fi elif [ "${FCST_OR_OBS}" = "OBS" ]; then OBS_INPUT_DIR="${OBS_DIR}" @@ -216,16 +227,16 @@ elif [ "${FCST_OR_OBS}" = "OBS" ]; then num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" fi -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - base_dir="${base_dir}" \ - fn_template="${fn_template}" \ - check_accum_contrib_files="TRUE" \ - num_missing_files_max="${num_missing_files_max}" \ - outvarname_fhr_list="FHR_LIST" +set -x +FHR_LIST=$( python3 $USHdir/set_vx_fhr_list.py \ + --cdate="${CDATE}" \ + --fcst_len="${FCST_LEN_HRS}" \ + --field="$VAR" \ + --accum_hh="${ACCUM_HH}" \ + --base_dir="${base_dir}" \ + --filename_template="${fn_template}" \ + --num_missing_files_max="${num_missing_files_max}") || \ +print_err_msg_exit "Call to set_vx_fhr_list.py failed with return code: $?" # #----------------------------------------------------------------------- # @@ -361,8 +372,13 @@ settings="\ 'input_field_group': '${VAR:-}' 'input_level_fcst': '${FCST_LEVEL:-}' 'input_thresh_fcst': '${FCST_THRESH:-}' +# +# Configuration information +# + 'pcp_combine_method': '${PCP_COMBINE_METHOD}' +# NOTE: this command must remain un-quoted for proper rendering of nested quotes in command + 'pcp_combine_command': ${PCP_COMBINE_COMMAND} " - # Render the template to create a METplus configuration file tmpfile=$( $READLINK -f "$(mktemp ./met_plus_settings.XXXXXX.yaml)") printf "%s" "$settings" > "$tmpfile" @@ -392,6 +408,15 @@ fi # #----------------------------------------------------------------------- # +#TEMPORARILY POINTING TO BETA RELEASE +MET_ROOT=/contrib/met/12.0.0-beta3 +MET_INSTALL_DIR=${MET_ROOT} +MET_BIN_EXEC=${MET_INSTALL_DIR}/bin +MET_BASE=${MET_INSTALL_DIR}/share/met +METPLUS_ROOT=/contrib/METplus/METplus-6.0.0-beta3/ +METPLUS_PATH=${METPLUS_ROOT} +MET_ROOT=/contrib/met/12.0.0-beta3 +#TEMPORARILY POINTING TO BETA RELEASE print_info_msg "$VERBOSE" " Calling METplus to run MET's ${metplus_tool_name} tool for field(s): ${FIELDNAME_IN_MET_FILEDIR_NAMES}" ${METPLUS_PATH}/ush/run_metplus.py \ diff --git a/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml b/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml index ce5829d6b3..e575f8ea82 100644 --- a/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml @@ -19,8 +19,9 @@ rocoto: verification: VX_FCST_MODEL_NAME: RRFS_smoke_test # VX_FIELDS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA", "AOD", "PM25" ] - VX_FIELDS: [ "AOD", "PM25" ] + VX_FIELDS: [ "AOD", "PM25", "PM10" ] VX_FCST_INPUT_BASEDIR: /scratch2/BMC/fv3lam/kavulich/smoke_verif/smoke_vx/fcst + FCST_SUBDIR_TEMPLATE: '{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}' FCST_FN_TEMPLATE: 'rrfs.t{init?fmt=%H?shift=-${time_lag}}z.prslev.f{lead?fmt=%HHH?shift=${time_lag}}.conus_3km.grib2' platform: CCPA_OBS_DIR: '{{ workflow.EXPTDIR }}/CCPA_obs' diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 82a699b849..6646d3e393 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -359,11 +359,11 @@ platform: # mimics the stream in a more controlled way and tests the ability # to access LBCS. # - # TEST_CCPA_OBS_DIR, TEST_MRMS_OBS_DIR, TEST_NDAS_OBS_DIR: - # These parameters are used by the testing script to test the mechanism + # TEST_*_OBS_DIR: + # These parameters are used by the WE2E testing script to test the mechanism # that allows user to point to data streams on disk for observation data - # for verification tasks. They test the ability for users to set - # CCPA_OBS_DIR, MRMS_OBS_DIR, and NDAS_OBS_DIR respectively. + # for verification tasks. They test the ability for users to manually set + # these observation directories. # # TEST_VX_FCST_INPUT_BASEDIR: # The path to user-staged forecast files for WE2E testing of verificaton @@ -379,6 +379,9 @@ platform: TEST_CCPA_OBS_DIR: "" TEST_MRMS_OBS_DIR: "" TEST_NDAS_OBS_DIR: "" + TEST_NOHRSC_OBS_DIR: "" + TEST_AERONET_OBS_DIR: "" + TEST_AIRNOW_OBS_DIR: "" TEST_VX_FCST_INPUT_BASEDIR: "" # #----------------------------------------------------------------------- @@ -730,7 +733,7 @@ workflow: # the workflow launch script (WFLOW_LAUNCH_SCRIPT_FN). # # GLOBAL_VAR_DEFNS_FP: - # Path to the global varibale definition file + # Path to the global variable definition file # (GLOBAL_VAR_DEFNS_FN) in the experiment directory. # # ROCOTO_YAML_FP: @@ -2455,12 +2458,13 @@ verification: # well as in the contents of those files. # # VX_FIELDS: - # The fields or groups of fields on which to run verification. Because - # accumulated snow (ASNOW) is often not of interest in non-winter cases - # and because observation files for ASNOW are not available on NOAA - # HPSS for retrospective cases before March 2020, by default ASNOW is - # not included VX_FIELDS, but it may be added to this list in order to - # include the verification tasks for ASNOW in the workflow. + # The fields or groups of fields on which to run verification. By default, + # only a subset of fields are verified (conventional surface/upper-air, + # accumulated precipitation, and radar obs) because others (accumulated + # snow, air quality) are more niche and have limited availability. For + # example, observation files for ASNOW are not available on NOAA HPSS for + # retrospective cases before March 2020. See valid_param_vals.yaml for a + # complete list of valid verificaiton fields. # # VX_APCP_ACCUMS_HRS: # The 2-digit accumulation periods (in units of hours) to consider for diff --git a/ush/set_vx_fhr_list.py b/ush/set_vx_fhr_list.py new file mode 100644 index 0000000000..7cd462f3cf --- /dev/null +++ b/ush/set_vx_fhr_list.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python3 +import argparse +import os +import re +import subprocess +import sys +from datetime import datetime, timedelta + +def eval_METplus_timestr_tmpl(init_time, fhr, METplus_timestr_tmpl): + yyyymmdd_init = init_time[:8] + hh_init = init_time[8:10] + + mn_init = "00" + if len(init_time) > 10: + mn_init = init_time[10:12] + + ss_init = "00" + if len(init_time) > 12: + ss_init = init_time[12:14] + + init_time_str = f"{yyyymmdd_init} {hh_init}:{mn_init}:{ss_init}" + init_time_dt = datetime.strptime(init_time_str, "%Y%m%d %H:%M:%S") + valid_time_dt = init_time_dt + timedelta(hours=fhr) + + regex_search = r"^\{(init|valid|lead)(\?)(fmt=)([^\?]*)(\?)?(shift=)?([^\?]*)?\}" + + METplus_time_type = re.search(regex_search, METplus_timestr_tmpl).group(1) + METplus_time_fmt = re.search(regex_search, METplus_timestr_tmpl).group(4) + METplus_time_shift = re.search(regex_search, METplus_timestr_tmpl).group(7) + + if METplus_time_fmt in ["%Y%m%d%H", "%Y%m%d", "%H%M%S", "%H"]: + fmt = METplus_time_fmt + elif METplus_time_fmt == "%HHH": + fmt = "%03.0f" + else: + raise ValueError(f"Unsupported METplus time format:\n {METplus_time_fmt=}\n {METplus_timestr_tmpl=}") + + time_shift_secs = int(float(METplus_time_shift or 0)) + time_shift_td = timedelta(seconds=time_shift_secs) + + if METplus_time_type == "init": + formatted_time = (init_time_dt + time_shift_td).strftime(fmt) + elif METplus_time_type == "valid": + formatted_time = (valid_time_dt + time_shift_td).strftime(fmt) + elif METplus_time_type == "lead": + lead_secs = (valid_time_dt + time_shift_td - init_time_dt).total_seconds() + lead_hrs = lead_secs / 3600 + + lead_hrs_trunc = int(lead_hrs) + lead_hrs_rem = lead_hrs - lead_hrs_trunc + if lead_hrs_rem != 0: + raise ValueError(f"The lead in hours ({lead_hrs=}) derived from seconds ({lead_secs=}) must be an integer") + + formatted_time = f"{lead_hrs_trunc:03d}" + else: + raise ValueError(f"Unsupported METplus time type: {METplus_time_type=}") + + + return formatted_time + + +def set_vx_fhr_list(cdate, fcst_len, field, accum_hh, base_dir, filename_template, num_missing_files_max, verbose): + if field == "AOD": + fhr_min = 0 + fhr_int = 24 + elif field == "APCP": + fhr_min = accum_hh + fhr_int = accum_hh + elif field == "ASNOW": + if accum_hh == 24: + fhr_min = 24 + fhr_int = 12 + else: + fhr_min = accum_hh + fhr_int = accum_hh + elif field in ["PM25", "PM10", "REFC", "RETOP", "ADPSFC"]: + fhr_min = 0 + fhr_int = 1 + elif field == "ADPUPA": + fhr_min = 0 + fhr_int = 6 + else: + raise ValueError(f"A method for setting verification parameters has not been specified for this field: {field=}") + + fhr_max = fcst_len + fhr_array = list(range(fhr_min, fhr_max + 1, fhr_int)) + if verbose: + print(f"Initial (i.e. before filtering for missing files) set of forecast hours is:\n fhr_array = {fhr_array}") + + fhr_list = [] + num_missing_files = 0 + + for fhr_orig in fhr_array: + fhr = fhr_orig - accum_hh + 1 + num_back_hrs = accum_hh + + skip_this_fhr = False + for _ in range(num_back_hrs): + fn = filename_template + regex_search_tmpl = r"(.*)(\{.*\})(.*)" + crnt_tmpl = re.search(regex_search_tmpl, filename_template).group(2) + remainder = re.sub(regex_search_tmpl, r"\1\3", filename_template) + + while crnt_tmpl: + actual_value = eval_METplus_timestr_tmpl(cdate, fhr, crnt_tmpl) + crnt_tmpl_esc = re.escape(crnt_tmpl) + fn = re.sub(crnt_tmpl_esc, actual_value, fn, 1) + match = re.search(regex_search_tmpl, remainder) + crnt_tmpl = match.group(2) if match else '' + remainder = re.sub(regex_search_tmpl, r"\1\3", remainder) + + fp = os.path.join(base_dir, fn) + + if os.path.isfile(fp): + if verbose: + print(f"Found file (fp) for the current forecast hour (fhr; relative to the cycle date cdate):\n fhr = \"{fhr}\"\n cdate = \"{cdate}\"\n fp = \"{fp}\"") + else: + skip_this_fhr = True + num_missing_files += 1 + if verbose: + print(f"The file (fp) for the current forecast hour (fhr; relative to the cycle date cdate) is missing:\n fhr = \"{fhr}\"\n cdate = \"{cdate}\"\n fp = \"{fp}\"\nExcluding the current forecast hour from the list of hours passed to the METplus configuration file.") + break + + fhr += 1 + + if not skip_this_fhr: + fhr_list.append(fhr_orig) + + fhr_list_str = ','.join(map(str, fhr_list)) + if verbose: + print(f"Final (i.e. after filtering for missing files) set of forecast hours is (written as a single string):\n fhr_list = \"{fhr_list_str}\"") + + if num_missing_files > num_missing_files_max: + raise Exception(f'The number of missing files {num_missing_files} is greater than the specified {num_missing_files_max=}') + + return fhr_list_str + +if __name__ == "__main__": + + parser = argparse.ArgumentParser( + description="Return a list of forecast hours such that there is a corresponding file (can be observations or forecast files) for each list entry.", + ) + parser.add_argument("-v", "--verbose", help="Verbose output", action="store_true") + parser.add_argument("-cd", "--cdate", help="Date in YYYYMMDDHH format", type=str, required=True) + parser.add_argument("-fl", "--fcst_len", help="Forecast length in hours", type=int, required=True) + parser.add_argument("-f", "--field", help="Field name", type=str, required=True, + choices=["PM25", "PM10", "REFC", "RETOP", "ADPSFC", "AOD", "APCP", "ASNOW", "ADPUPA"]) + parser.add_argument("-a", "--accum_hh", help="Accumulation length in hours for the specified field. For example, for 6-hour accumulated precipitation, field=APCP, accum_hh=6", type=int, default=1) + parser.add_argument("-bd", "--base_dir", help="Base directory for forecast/observation file", type=str, default='') + parser.add_argument("-ft", "--filename_template", help="Template for file names to search; see ??? for details on template settings", type=str, required=True) + parser.add_argument("-n", "--num_missing_files_max", help="Number of missing files to tolerate; if more files than this number can not be found, raise an exception", type=int, default=5) + + args = parser.parse_args() + +# vx_fhr_list = set_vx_fhr_list(args.cdate, args.fcst_len, args.field, args.accum_hh, args.base_dir, args.filename_template, args.base_dir)) + vx_fhr_list = set_vx_fhr_list(**vars(args)) + print(vx_fhr_list) diff --git a/ush/set_vx_fhr_list.sh b/ush/set_vx_fhr_list.sh index 500e75d5b3..e3fbfe5173 100644 --- a/ush/set_vx_fhr_list.sh +++ b/ush/set_vx_fhr_list.sh @@ -122,6 +122,10 @@ function set_vx_fhr_list() { fhr_min="00" fhr_int="01" ;; + "PM10") + fhr_min="00" + fhr_int="01" + ;; "REFC") fhr_min="00" fhr_int="01" diff --git a/ush/set_vx_params.sh b/ush/set_vx_params.sh index ef8a6dc8cd..e6266f4fcb 100644 --- a/ush/set_vx_params.sh +++ b/ush/set_vx_params.sh @@ -272,6 +272,12 @@ this observation type (obtype) and field (field) combination: fieldname_in_MET_output="${field}" fieldname_in_MET_filedir_names="${field}" ;; + "PM10") + fieldname_in_obs_input="${field}" + fieldname_in_fcst_input="MASSDEN" + fieldname_in_MET_output="${field}" + fieldname_in_MET_filedir_names="${field}" + ;; *) print_err_msg_exit "\ diff --git a/ush/setup.py b/ush/setup.py index 7394031251..fe636d5d97 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -593,7 +593,7 @@ def remove_tag(tasks, tag): vx_metatasks_all["AERONET"] = ["task_get_obs_aeronet","metatask_ASCII2nc_obs"] vx_fields_all["AIRNOW"] = ["PM25"] - vx_metatasks_all["AIRNOW"] = ["task_get_obs_airnow","metatask_ASCII2nc_obs"] + vx_metatasks_all["AIRNOW"] = ["task_get_obs_airnow","metatask_ASCII2nc_obs","metatask_PcpCombine_fcst_PM_all_mems"] # Get the vx fields specified in the experiment configuration. vx_fields_config = expt_config["verification"]["VX_FIELDS"] diff --git a/ush/valid_param_vals.yaml b/ush/valid_param_vals.yaml index 1e5908417f..4998ba735a 100644 --- a/ush/valid_param_vals.yaml +++ b/ush/valid_param_vals.yaml @@ -76,6 +76,6 @@ valid_vals_DO_AQM_CHEM_LBCS: [True, False] valid_vals_DO_AQM_GEFS_LBCS: [True, False] valid_vals_DO_AQM_SAVE_AIRNOW_HIST: [True, False] valid_vals_COLDSTART: [True, False] -valid_vals_VX_FIELDS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA", "AOD", "PM25" ] +valid_vals_VX_FIELDS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA", "AOD", "PM25", "PM10" ] valid_vals_VX_APCP_ACCUMS_HRS: [ 1, 3, 6, 24 ] valid_vals_VX_ASNOW_ACCUMS_HRS: [ 6, 24 ] From f8c93c6ce09304ca17ac3f5c44343f7ab60afe32 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Tue, 9 Jul 2024 18:27:50 +0000 Subject: [PATCH 043/260] Implement pythonized set_vx_fhr_list for all scripts --- scripts/exregional_check_post_output.sh | 29 ++++------- scripts/exregional_run_met_ascii2nc_obs.sh | 21 ++++---- ...onal_run_met_genensprod_or_ensemblestat.sh | 21 ++++---- ...gional_run_met_gridstat_or_pointstat_vx.sh | 22 ++++---- ...un_met_gridstat_or_pointstat_vx_ensmean.sh | 20 ++++---- ...un_met_gridstat_or_pointstat_vx_ensprob.sh | 21 ++++---- scripts/exregional_run_met_pb2nc_obs.sh | 20 ++++---- scripts/exregional_run_met_pcpcombine.sh | 1 - ush/config_defaults.yaml | 2 +- ush/set_vx_fhr_list.py | 51 ++++++++++++++++--- 10 files changed, 116 insertions(+), 92 deletions(-) diff --git a/scripts/exregional_check_post_output.sh b/scripts/exregional_check_post_output.sh index 320311cc94..2f672d1f23 100755 --- a/scripts/exregional_check_post_output.sh +++ b/scripts/exregional_check_post_output.sh @@ -52,14 +52,6 @@ done # #----------------------------------------------------------------------- # -# Source files defining auxiliary functions for verification. -# -#----------------------------------------------------------------------- -# -. $USHdir/set_vx_fhr_list.sh -# -#----------------------------------------------------------------------- -# # Save current shell options (in a global array). Then set new options # for this script/function. # @@ -103,6 +95,7 @@ user-staged. # #----------------------------------------------------------------------- # +set -x i="0" if [ $(boolify "${DO_ENSEMBLE}") = "TRUE" ]; then i=$( bc -l <<< "${ENSMEM_INDX}-1" ) @@ -126,16 +119,16 @@ time_lag=$( bc -l <<< "${ENS_TIME_LAG_HRS[$i]}*${SECS_PER_HOUR}" ) ensmem_indx=$(printf "%0${VX_NDIGITS_ENSMEM_NAMES}d" $(( 10#${ENSMEM_INDX}))) ensmem_name="mem${ensmem_indx}" FCST_INPUT_FN_TEMPLATE=$( eval echo ${FCST_SUBDIR_TEMPLATE:+${FCST_SUBDIR_TEMPLATE}/}${FCST_FN_TEMPLATE} ) -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - base_dir="${VX_FCST_INPUT_BASEDIR}" \ - fn_template="${FCST_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ - num_missing_files_max="${NUM_MISSING_FCST_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" + +FHR_LIST=$( python3 $USHdir/set_vx_fhr_list.py \ + --cdate="${CDATE}" \ + --fcst_len="${FCST_LEN_HRS}" \ + --field="$VAR" \ + --accum_hh="${ACCUM_HH}" \ + --base_dir="${VX_FCST_INPUT_BASEDIR}" \ + --filename_template="${FCST_INPUT_FN_TEMPLATE}" \ + --num_missing_files_max="${NUM_MISSING_FCST_FILES_MAX}" --verbose) || \ +print_err_msg_exit "Call to set_vx_fhr_list.py failed with return code: $?" # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_ascii2nc_obs.sh b/scripts/exregional_run_met_ascii2nc_obs.sh index ea12769cdb..d46ce34d04 100755 --- a/scripts/exregional_run_met_ascii2nc_obs.sh +++ b/scripts/exregional_run_met_ascii2nc_obs.sh @@ -18,7 +18,6 @@ source_config_for_task "task_run_met_ascii2nc_obs" ${GLOBAL_VAR_DEFNS_FP} # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh # #----------------------------------------------------------------------- # @@ -122,16 +121,16 @@ fi # #----------------------------------------------------------------------- # -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - base_dir="${OBS_INPUT_DIR}" \ - fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" +FHR_LIST=$( python3 $USHdir/set_vx_fhr_list.py \ + --cdate="${CDATE}" \ + --fcst_len="${FCST_LEN_HRS}" \ + --field="$VAR" \ + --accum_hh="${ACCUM_HH}" \ + --base_dir="${OBS_INPUT_DIR}" \ + --filename_template="${OBS_INPUT_FN_TEMPLATE}" \ + --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}") || \ +print_err_msg_exit "Call to set_vx_fhr_list.py failed with return code: $?" + # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 1c09dc09c6..130da9a085 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -22,7 +22,6 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh # #----------------------------------------------------------------------- # @@ -218,16 +217,16 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" # #----------------------------------------------------------------------- # -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - base_dir="${OBS_INPUT_DIR}" \ - fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" +FHR_LIST=$( python3 $USHdir/set_vx_fhr_list.py \ + --cdate="${CDATE}" \ + --fcst_len="${FCST_LEN_HRS}" \ + --field="$VAR" \ + --accum_hh="${ACCUM_HH}" \ + --base_dir="${OBS_INPUT_DIR}" \ + --filename_template="${OBS_INPUT_FN_TEMPLATE}" \ + --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}") || \ +print_err_msg_exit "Call to set_vx_fhr_list.py failed with return code: $?" + # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index abe5e3dd31..b7f67e28e0 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -22,7 +22,6 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh # #----------------------------------------------------------------------- # @@ -215,16 +214,17 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" # #----------------------------------------------------------------------- # -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - base_dir="${OBS_INPUT_DIR}" \ - fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" +set -x +FHR_LIST=$( python3 $USHdir/set_vx_fhr_list.py \ + --cdate="${CDATE}" \ + --fcst_len="${FCST_LEN_HRS}" \ + --field="$VAR" \ + --accum_hh="${ACCUM_HH}" \ + --base_dir="${OBS_INPUT_DIR}" \ + --filename_template="${OBS_INPUT_FN_TEMPLATE}" \ + --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}") || \ +print_err_msg_exit "Call to set_vx_fhr_list.py failed with return code: $?" + # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index 2c8378c128..aa5c84ba4c 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -22,7 +22,6 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh # #----------------------------------------------------------------------- # @@ -164,16 +163,15 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}_ensmean" # #----------------------------------------------------------------------- # -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - base_dir="${OBS_INPUT_DIR}" \ - fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" +FHR_LIST=$( python3 $USHdir/set_vx_fhr_list.py \ + --cdate="${CDATE}" \ + --fcst_len="${FCST_LEN_HRS}" \ + --field="$VAR" \ + --accum_hh="${ACCUM_HH}" \ + --base_dir="${OBS_INPUT_DIR}" \ + --filename_template="${OBS_INPUT_FN_TEMPLATE}" \ + --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}") || \ +print_err_msg_exit "Call to set_vx_fhr_list.py failed with return code: $?" # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index eae1850ad8..7beb702374 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -22,7 +22,6 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh # #----------------------------------------------------------------------- # @@ -163,16 +162,16 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}_ensprob" # #----------------------------------------------------------------------- # -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - base_dir="${OBS_INPUT_DIR}" \ - fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" +FHR_LIST=$( python3 $USHdir/set_vx_fhr_list.py \ + --cdate="${CDATE}" \ + --fcst_len="${FCST_LEN_HRS}" \ + --field="$VAR" \ + --accum_hh="${ACCUM_HH}" \ + --base_dir="${OBS_INPUT_DIR}" \ + --filename_template="${OBS_INPUT_FN_TEMPLATE}" \ + --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}") || \ +print_err_msg_exit "Call to set_vx_fhr_list.py failed with return code: $?" + # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_pb2nc_obs.sh b/scripts/exregional_run_met_pb2nc_obs.sh index 7e79fb4efb..388a6f12fd 100755 --- a/scripts/exregional_run_met_pb2nc_obs.sh +++ b/scripts/exregional_run_met_pb2nc_obs.sh @@ -21,7 +21,6 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh # #----------------------------------------------------------------------- # @@ -126,16 +125,15 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${MetplusToolName}_obs" # #----------------------------------------------------------------------- # -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - base_dir="${OBS_INPUT_DIR}" \ - fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" +FHR_LIST=$( python3 $USHdir/set_vx_fhr_list.py \ + --cdate="${CDATE}" \ + --fcst_len="${FCST_LEN_HRS}" \ + --field="$VAR" \ + --accum_hh="${ACCUM_HH}" \ + --base_dir="${OBS_INPUT_DIR}" \ + --filename_template="${OBS_INPUT_FN_TEMPLATE}" \ + --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}") || \ +print_err_msg_exit "Call to set_vx_fhr_list.py failed with return code: $?" # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 357d6f9e2c..1559b57308 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -22,7 +22,6 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh # #----------------------------------------------------------------------- # diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 6646d3e393..f5c10ba3cd 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2533,7 +2533,7 @@ verification: # maximum allowed fraction of obs files that can be missing (i.e. the # number missing divided by the number that are expected to exist). # - NUM_MISSING_OBS_FILES_MAX: 2 + NUM_MISSING_OBS_FILES_MAX: 0 # # For verification tasks that need forecast data, this specifies the # maximum number of post-processed forecast files that may be missing. diff --git a/ush/set_vx_fhr_list.py b/ush/set_vx_fhr_list.py index 7cd462f3cf..5cab5202a0 100644 --- a/ush/set_vx_fhr_list.py +++ b/ush/set_vx_fhr_list.py @@ -59,7 +59,36 @@ def eval_METplus_timestr_tmpl(init_time, fhr, METplus_timestr_tmpl): return formatted_time -def set_vx_fhr_list(cdate, fcst_len, field, accum_hh, base_dir, filename_template, num_missing_files_max, verbose): +def set_vx_fhr_list(cdate, fcst_len, field, accum_hh, base_dir, filename_template, num_missing_files_max, verbose=False,check_accum_contrib_files=False): + """Generates a list of forecast hours such that for each hour there exist a corresponding file + according to the filename pattern (filename_template) and other variables provided. + + Args: + cdate (str): Date string in YYYYMMDD[mmss] format, where minutes and seconds are + optional. + fcst_len (int): Length of forecast in hours + field (str): Field name; see the first if block for valid values + accum_hh (int): Accumulation period for the specified field. For instantaneous fields, + set to 1. + base_dir (str): Directory to find the paths to files specified by filename_template + filename_template (str): The METplus filename template for finding the files + num_missing_files_max (int): If more files than this value are not found, raise exception + verbose (bool): By default this script only outputs the list of forecast hours + (for easier parsing from bash contexts). Set the verbose flag + to True for additional debugging output. + check_accum_contrib_files (bool): If true, check all files contributing to accumulation + period, not just forecast hours. + Returns: + fhr_list_str (str) : A comma-separated list of forecast hours where files were found + """ + # Set the interval (fhr_int) and minimum (fhr_min) hours for observation files for a given + # observation type and accumulation period/interval. For most observations and forecast fields + # this is 1 and 0, respectively (indicating instantaneous obs available every hour). For + # accumulation fields, you need a forecast length at least as long as the accumulation period, + # so fhr_min=accum_hh. For files that are not hourly, but instead contain data for multiple + # hours, they will need to have fhr_int specified accordingly. + # Every valid verification field (valid_vals_VX_FIELDS in valid_param_vals.yaml) should have + # an entry in this if block if field == "AOD": fhr_min = 0 fhr_int = 24 @@ -91,11 +120,19 @@ def set_vx_fhr_list(cdate, fcst_len, field, accum_hh, base_dir, filename_templat num_missing_files = 0 for fhr_orig in fhr_array: - fhr = fhr_orig - accum_hh + 1 - num_back_hrs = accum_hh - + if check_accum_contrib_files: + fhr = fhr_orig - accum_hh + 1 + num_back_hrs = accum_hh + else: + fhr = fhr_orig + num_back_hrs = 1 skip_this_fhr = False + for _ in range(num_back_hrs): + # Use the provided template to set the name of/relative path to the file + # Note that the while-loop below is over all METplus time string templates + # of the form {...} in the template fn_template; it continues until all + # such templates have been evaluated to actual time strings. fn = filename_template regex_search_tmpl = r"(.*)(\{.*\})(.*)" crnt_tmpl = re.search(regex_search_tmpl, filename_template).group(2) @@ -148,10 +185,12 @@ def set_vx_fhr_list(cdate, fcst_len, field, accum_hh, base_dir, filename_templat parser.add_argument("-a", "--accum_hh", help="Accumulation length in hours for the specified field. For example, for 6-hour accumulated precipitation, field=APCP, accum_hh=6", type=int, default=1) parser.add_argument("-bd", "--base_dir", help="Base directory for forecast/observation file", type=str, default='') parser.add_argument("-ft", "--filename_template", help="Template for file names to search; see ??? for details on template settings", type=str, required=True) - parser.add_argument("-n", "--num_missing_files_max", help="Number of missing files to tolerate; if more files than this number can not be found, raise an exception", type=int, default=5) + parser.add_argument("-n", "--num_missing_files_max", type=int, default=5, + help="Number of missing files to tolerate; if more files than this number can not be found, raise an exception") + parser.add_argument("--check_accum_contrib_files", action="store_true", + help="Flag that determines whether we check the initial time of the accumulation period or not") args = parser.parse_args() -# vx_fhr_list = set_vx_fhr_list(args.cdate, args.fcst_len, args.field, args.accum_hh, args.base_dir, args.filename_template, args.base_dir)) vx_fhr_list = set_vx_fhr_list(**vars(args)) print(vx_fhr_list) From 9615dfd3992e8530f3ed96707bfef73c771390ef Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Fri, 16 Aug 2024 16:33:05 +0000 Subject: [PATCH 044/260] Fully deprecate all our custom regex for METplus filename templates; now importing the necessary METplus functions directly. This will need some attention before merging to ensure it is platform-independent, only working on Hera for now. But the smoke stuff is Hera-specific for now regardless. --- scripts/exregional_check_post_output.sh | 3 +- ...onal_run_met_genensprod_or_ensemblestat.sh | 3 +- ...gional_run_met_gridstat_or_pointstat_vx.sh | 3 +- scripts/exregional_run_met_pcpcombine.sh | 3 +- ush/bash_utils/eval_METplus_timestr_tmpl.sh | 281 ---------------- ush/set_vx_fhr_list.py | 118 ++----- ush/set_vx_fhr_list.sh | 307 ------------------ 7 files changed, 40 insertions(+), 678 deletions(-) delete mode 100644 ush/bash_utils/eval_METplus_timestr_tmpl.sh delete mode 100644 ush/set_vx_fhr_list.sh diff --git a/scripts/exregional_check_post_output.sh b/scripts/exregional_check_post_output.sh index 2f672d1f23..16020de3bc 100755 --- a/scripts/exregional_check_post_output.sh +++ b/scripts/exregional_check_post_output.sh @@ -127,7 +127,8 @@ FHR_LIST=$( python3 $USHdir/set_vx_fhr_list.py \ --accum_hh="${ACCUM_HH}" \ --base_dir="${VX_FCST_INPUT_BASEDIR}" \ --filename_template="${FCST_INPUT_FN_TEMPLATE}" \ - --num_missing_files_max="${NUM_MISSING_FCST_FILES_MAX}" --verbose) || \ + --num_missing_files_max="${NUM_MISSING_FCST_FILES_MAX}" \ + --time_lag="${time_lag}" --verbose) || \ print_err_msg_exit "Call to set_vx_fhr_list.py failed with return code: $?" # #----------------------------------------------------------------------- diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 130da9a085..8ec035a40f 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -224,7 +224,8 @@ FHR_LIST=$( python3 $USHdir/set_vx_fhr_list.py \ --accum_hh="${ACCUM_HH}" \ --base_dir="${OBS_INPUT_DIR}" \ --filename_template="${OBS_INPUT_FN_TEMPLATE}" \ - --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}") || \ + --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ + --time_lag="${time_lag}") || \ print_err_msg_exit "Call to set_vx_fhr_list.py failed with return code: $?" # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index b7f67e28e0..de61eb5718 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -222,7 +222,8 @@ FHR_LIST=$( python3 $USHdir/set_vx_fhr_list.py \ --accum_hh="${ACCUM_HH}" \ --base_dir="${OBS_INPUT_DIR}" \ --filename_template="${OBS_INPUT_FN_TEMPLATE}" \ - --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}") || \ + --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ + --time_lag="${time_lag}") || \ print_err_msg_exit "Call to set_vx_fhr_list.py failed with return code: $?" # diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 1559b57308..078ee580c4 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -234,7 +234,8 @@ FHR_LIST=$( python3 $USHdir/set_vx_fhr_list.py \ --accum_hh="${ACCUM_HH}" \ --base_dir="${base_dir}" \ --filename_template="${fn_template}" \ - --num_missing_files_max="${num_missing_files_max}") || \ + --num_missing_files_max="${num_missing_files_max}" \ + --time_lag="${time_lag}") || \ print_err_msg_exit "Call to set_vx_fhr_list.py failed with return code: $?" # #----------------------------------------------------------------------- diff --git a/ush/bash_utils/eval_METplus_timestr_tmpl.sh b/ush/bash_utils/eval_METplus_timestr_tmpl.sh deleted file mode 100644 index 572f7c68c4..0000000000 --- a/ush/bash_utils/eval_METplus_timestr_tmpl.sh +++ /dev/null @@ -1,281 +0,0 @@ -# -#----------------------------------------------------------------------- -# -# This file defines a function that evaluates a METplus time-string -# template. -# -#----------------------------------------------------------------------- -# -function eval_METplus_timestr_tmpl() { -# -#----------------------------------------------------------------------- -# -# Save current shell options (in a global array). Then set new options -# for this script/function. -# -#----------------------------------------------------------------------- -# - { save_shell_opts; . ${USHdir}/preamble.sh; } > /dev/null 2>&1 -# -#----------------------------------------------------------------------- -# -# Get the full path to the file in which this script/function is located -# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in -# which the file is located (scrfunc_dir). -# -#----------------------------------------------------------------------- -# - local scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) - local scrfunc_fn=$( basename "${scrfunc_fp}" ) - local scrfunc_dir=$( dirname "${scrfunc_fp}" ) -# -#----------------------------------------------------------------------- -# -# Get the name of this function. -# -#----------------------------------------------------------------------- -# - local func_name="${FUNCNAME[0]}" -# -#----------------------------------------------------------------------- -# -# Specify the set of valid argument names for this script/function. Then -# process the arguments provided to this script/function (which should -# consist of a set of name-value pairs of the form arg1="value1", etc). -# -#----------------------------------------------------------------------- -# - local valid_args=( \ - "init_time" \ - "fhr" \ - "METplus_timestr_tmpl" \ - "outvarname_formatted_time" \ - ) - process_args valid_args "$@" -# -#----------------------------------------------------------------------- -# -# For debugging purposes, print out values of arguments passed to this -# script. Note that these will be printed out only if VERBOSE is set to -# TRUE. -# -#----------------------------------------------------------------------- -# - print_input_args "valid_args" -# -#----------------------------------------------------------------------- -# -# Declare local variables. -# -#----------------------------------------------------------------------- -# - local fmt \ - formatted_time \ - hh_init \ - init_time_str \ - lead_hrs \ - len \ - mn_init \ - METplus_time_fmt \ - METplus_time_shift \ - METplus_time_type \ - regex_search \ - ss_init \ - valid_time_str \ - yyyymmdd_init -# -#----------------------------------------------------------------------- -# -# Run checks on input arguments. -# -#----------------------------------------------------------------------- -# - if [ -z "${METplus_timestr_tmpl}" ]; then - print_err_msg_exit "\ -The specified METplus time string template (METplus_timestr_tmpl) cannot be empty: - METplus_timestr_tmpl = \"${METplus_timestr_tmpl}\"" - fi - - len=${#init_time} - if [[ ${init_time} =~ ^[0-9]+$ ]]; then - if [ "$len" -ne 10 ] && [ "$len" -ne 12 ] && [ "$len" -ne 14 ]; then - print_err_msg_exit "\ -The specified initial time string (init_time) must contain exactly 10, -12, or 14 integers (but contains $len): - init_time = \"${init_time}\"" - fi - else - print_err_msg_exit "\ -The specified initial time string (init_time) must consist of only -integers and cannot be empty: - init_time = \"${init_time}\"" - fi - - if ! [[ $fhr =~ ^[0-9]+$ ]]; then - print_err_msg_exit "\ -The specified forecast hour (fhr) must consist of only integers and -cannot be empty: - fhr = \"${fhr}\"" - fi -# -#----------------------------------------------------------------------- -# -# Set strings for the initial and valid times that can be passed to the -# "date" utility for evaluation. -# -#----------------------------------------------------------------------- -# - yyyymmdd_init=${init_time:0:8} - hh_init=${init_time:8:2} - - mn_init="00" - if [ "$len" -gt "10" ]; then - mn_init=${init_time:10:2} - fi - - ss_init="00" - if [ "$len" -gt "12" ]; then - ss_init=${init_time:12:2} - fi - - init_time_str=$( printf "%s" "${yyyymmdd_init} + ${hh_init} hours + ${mn_init} minutes + ${ss_init} seconds" ) - valid_time_str=$( printf "%s" "${init_time_str} + ${fhr} hours" ) -# -#----------------------------------------------------------------------- -# -# Parse the input METplus time string template. -# -#----------------------------------------------------------------------- -# - regex_search="^\{(init|valid|lead)(\?)(fmt=)([^\?]*)(\?)?(shift=)?([^\?]*)?\}" - METplus_time_type=$( \ - printf "%s" "${METplus_timestr_tmpl}" | $SED -n -r -e "s/${regex_search}/\1/p" ) - METplus_time_fmt=$( \ - printf "%s" "${METplus_timestr_tmpl}" | $SED -n -r -e "s/${regex_search}/\4/p" ) - METplus_time_shift=$( \ - printf "%s" "${METplus_timestr_tmpl}" | $SED -n -r -e "s/${regex_search}/\7/p" ) -# -#----------------------------------------------------------------------- -# -# Get strings for the time format and time shift that can be passed to -# the "date" utility or the "printf" command. -# -#----------------------------------------------------------------------- -# - case "${METplus_time_fmt}" in - "%Y%m%d%H"|"%Y%m%d"|"%H%M%S") - fmt="${METplus_time_fmt}" - ;; - "%H") -# -# The "%H" format needs to be treated differently depending on if it's -# formatting a "lead" time type or another (e.g. "init" or "vald") because -# for "lead", the printf function is used below (which doesn't understand -# the "%H" format) whereas for the others, the date utility is used (which -# does understand "%H"). -# - if [ "${METplus_time_type}" = "lead" ]; then - fmt="%02.0f" - else - fmt="${METplus_time_fmt}" - fi - ;; - "%HHH") -# -# Print format assumes that the argument to printf (i.e. the number to -# print out) may be a float. If we instead assume an integer and use -# "%03d" as the format, the printf function below will fail if the argument -# happens to be a float. The "%03.0f" format will work for both a float -# and an integer argument (and will truncate the float and print out a -# 3-digit integer). -# - fmt="%03.0f" - ;; - *) - print_err_msg_exit "\ -Unsupported METplus time format: - METplus_time_fmt = \"${METplus_time_fmt}\" -METplus time string template passed to this function is: - METplus_timestr_tmpl = \"${METplus_timestr_tmpl}\"" - ;; - esac -# -# Calculate the time shift as an integer in units of seconds. -# - time_shift_str=$(( $(printf "%.0f" "${METplus_time_shift}") + 0 ))" seconds" -# -#----------------------------------------------------------------------- -# -# Set the formatted time string. -# -#----------------------------------------------------------------------- -# - case "${METplus_time_type}" in - "init") - formatted_time=$( ${DATE_UTIL} --date="${init_time_str} + ${time_shift_str}" +"${fmt}" ) - ;; - "valid") - formatted_time=$( ${DATE_UTIL} --date="${valid_time_str} + ${time_shift_str}" +"${fmt}" ) - ;; - "lead") - lead_secs=$(( $( ${DATE_UTIL} --date="${valid_time_str} + ${time_shift_str}" +"%s" ) \ - - $( ${DATE_UTIL} --date="${init_time_str}" +"%s" ) )) - lead_hrs=$( bc -l <<< "${lead_secs}/${SECS_PER_HOUR}" ) -# -# Check to make sure lead_hrs is an integer. -# - lead_hrs_trunc=$( bc <<< "${lead_secs}/${SECS_PER_HOUR}" ) - lead_hrs_rem=$( bc -l <<< "${lead_hrs} - ${lead_hrs_trunc}" ) - if [ "${lead_hrs_rem}" != "0" ]; then - print_err_msg_exit "\ -The lead in hours (lead_hrs) must be an integer but isn't: - lead_hrs = ${lead_hrs} -The lead in seconds (lead_secs) is: - lead_secs = ${lead_secs} -The remainder (lead_hrs_rem) after dividing the lead_secs by SECS_PER_HOUR -= ${SECS_PER_HOUR} is: - lead_hrs_rem = ${lead_hrs_rem}" - fi -# -# Get the lead in the proper format. -# - formatted_time=$( printf "${fmt}" "${lead_hrs}" ) - ;; - *) - print_err_msg_exit "\ -Unsupported METplus time type: - METplus_time_type = \"${METplus_time_type}\" -METplus time string template passed to this function is: - METplus_timestr_tmpl = \"${METplus_timestr_tmpl}\"" - ;; - esac - - if [ -z "${formatted_time}" ]; then - print_err_msg_exit "\ -The specified METplus time string template (METplus_timestr_tmpl) could -not be evaluated for the given initial time (init_time) and forecast -hour (fhr): - METplus_timestr_tmpl = \"${METplus_timestr_tmpl}\" - init_time = \"${init_time}\" - fhr = \"${fhr}\"" - fi -# -#----------------------------------------------------------------------- -# -# Set output variables. -# -#----------------------------------------------------------------------- -# - if [ ! -z "${outvarname_formatted_time}" ]; then - printf -v ${outvarname_formatted_time} "%s" "${formatted_time}" - fi -# -#----------------------------------------------------------------------- -# -# Restore the shell options saved at the beginning of this script/function. -# -#----------------------------------------------------------------------- -# - { restore_shell_opts; } > /dev/null 2>&1 - -} diff --git a/ush/set_vx_fhr_list.py b/ush/set_vx_fhr_list.py index 5cab5202a0..960094b24b 100644 --- a/ush/set_vx_fhr_list.py +++ b/ush/set_vx_fhr_list.py @@ -6,70 +6,21 @@ import sys from datetime import datetime, timedelta -def eval_METplus_timestr_tmpl(init_time, fhr, METplus_timestr_tmpl): - yyyymmdd_init = init_time[:8] - hh_init = init_time[8:10] +sys.path.append(os.environ['METPLUS_ROOT']) +from metplus.util import string_template_substitution as sts - mn_init = "00" - if len(init_time) > 10: - mn_init = init_time[10:12] - - ss_init = "00" - if len(init_time) > 12: - ss_init = init_time[12:14] - - init_time_str = f"{yyyymmdd_init} {hh_init}:{mn_init}:{ss_init}" - init_time_dt = datetime.strptime(init_time_str, "%Y%m%d %H:%M:%S") - valid_time_dt = init_time_dt + timedelta(hours=fhr) - - regex_search = r"^\{(init|valid|lead)(\?)(fmt=)([^\?]*)(\?)?(shift=)?([^\?]*)?\}" - - METplus_time_type = re.search(regex_search, METplus_timestr_tmpl).group(1) - METplus_time_fmt = re.search(regex_search, METplus_timestr_tmpl).group(4) - METplus_time_shift = re.search(regex_search, METplus_timestr_tmpl).group(7) - - if METplus_time_fmt in ["%Y%m%d%H", "%Y%m%d", "%H%M%S", "%H"]: - fmt = METplus_time_fmt - elif METplus_time_fmt == "%HHH": - fmt = "%03.0f" - else: - raise ValueError(f"Unsupported METplus time format:\n {METplus_time_fmt=}\n {METplus_timestr_tmpl=}") - - time_shift_secs = int(float(METplus_time_shift or 0)) - time_shift_td = timedelta(seconds=time_shift_secs) - - if METplus_time_type == "init": - formatted_time = (init_time_dt + time_shift_td).strftime(fmt) - elif METplus_time_type == "valid": - formatted_time = (valid_time_dt + time_shift_td).strftime(fmt) - elif METplus_time_type == "lead": - lead_secs = (valid_time_dt + time_shift_td - init_time_dt).total_seconds() - lead_hrs = lead_secs / 3600 - - lead_hrs_trunc = int(lead_hrs) - lead_hrs_rem = lead_hrs - lead_hrs_trunc - if lead_hrs_rem != 0: - raise ValueError(f"The lead in hours ({lead_hrs=}) derived from seconds ({lead_secs=}) must be an integer") - - formatted_time = f"{lead_hrs_trunc:03d}" - else: - raise ValueError(f"Unsupported METplus time type: {METplus_time_type=}") - - - return formatted_time - - -def set_vx_fhr_list(cdate, fcst_len, field, accum_hh, base_dir, filename_template, num_missing_files_max, verbose=False,check_accum_contrib_files=False): +def set_vx_fhr_list(cdate, fcst_len, field, accum_hh, time_lag, base_dir, filename_template, num_missing_files_max, verbose=False,check_accum_contrib_files=False): """Generates a list of forecast hours such that for each hour there exist a corresponding file according to the filename pattern (filename_template) and other variables provided. Args: - cdate (str): Date string in YYYYMMDD[mmss] format, where minutes and seconds are - optional. + cdate (str): Date string of forecast initialization in YYYYMMDD[mmss] format, where + minutes and seconds are optional. fcst_len (int): Length of forecast in hours field (str): Field name; see the first if block for valid values accum_hh (int): Accumulation period for the specified field. For instantaneous fields, set to 1. + time_lag (int): Hours of time lag for a time-lagged ensemble member base_dir (str): Directory to find the paths to files specified by filename_template filename_template (str): The METplus filename template for finding the files num_missing_files_max (int): If more files than this value are not found, raise exception @@ -128,37 +79,31 @@ def set_vx_fhr_list(cdate, fcst_len, field, accum_hh, base_dir, filename_templat num_back_hrs = 1 skip_this_fhr = False - for _ in range(num_back_hrs): - # Use the provided template to set the name of/relative path to the file - # Note that the while-loop below is over all METplus time string templates - # of the form {...} in the template fn_template; it continues until all - # such templates have been evaluated to actual time strings. - fn = filename_template - regex_search_tmpl = r"(.*)(\{.*\})(.*)" - crnt_tmpl = re.search(regex_search_tmpl, filename_template).group(2) - remainder = re.sub(regex_search_tmpl, r"\1\3", filename_template) - - while crnt_tmpl: - actual_value = eval_METplus_timestr_tmpl(cdate, fhr, crnt_tmpl) - crnt_tmpl_esc = re.escape(crnt_tmpl) - fn = re.sub(crnt_tmpl_esc, actual_value, fn, 1) - match = re.search(regex_search_tmpl, remainder) - crnt_tmpl = match.group(2) if match else '' - remainder = re.sub(regex_search_tmpl, r"\1\3", remainder) - - fp = os.path.join(base_dir, fn) - - if os.path.isfile(fp): - if verbose: - print(f"Found file (fp) for the current forecast hour (fhr; relative to the cycle date cdate):\n fhr = \"{fhr}\"\n cdate = \"{cdate}\"\n fp = \"{fp}\"") - else: - skip_this_fhr = True - num_missing_files += 1 - if verbose: - print(f"The file (fp) for the current forecast hour (fhr; relative to the cycle date cdate) is missing:\n fhr = \"{fhr}\"\n cdate = \"{cdate}\"\n fp = \"{fp}\"\nExcluding the current forecast hour from the list of hours passed to the METplus configuration file.") - break - - fhr += 1 + if len(cdate) == 10: + initdate=datetime.strptime(cdate, '%Y%m%d%H') + elif len(cdate) == 12: + initdate=datetime.strptime(cdate, '%Y%m%d%H%M') + elif len(cdate) == 14: + initdate=datetime.strptime(cdate, '%Y%m%d%H%M%S') + else: + raise ValueError(f"Invalid {cdate=}; cdate must be 10, 12, or 14 characters in length") + + validdate=initdate + timedelta(hours=fhr) + leadsec=fhr*3600 + fn = sts.do_string_sub(tmpl=filename_template,init=initdate,lead=leadsec,valid=validdate,shift=time_lag) + fp = os.path.join(base_dir, fn) + + if os.path.isfile(fp): + if verbose: + print(f"Found file (fp) for the current forecast hour (fhr; relative to the cycle date cdate):\n fhr = \"{fhr}\"\n cdate = \"{cdate}\"\n fp = \"{fp}\"") + else: + skip_this_fhr = True + num_missing_files += 1 + if verbose: + print(f"The file (fp) for the current forecast hour (fhr; relative to the cycle date cdate) is missing:\n fhr = \"{fhr}\"\n cdate = \"{cdate}\"\n fp = \"{fp}\"\nExcluding the current forecast hour from the list of hours passed to the METplus configuration file.") + break + + fhr += 1 if not skip_this_fhr: fhr_list.append(fhr_orig) @@ -183,6 +128,7 @@ def set_vx_fhr_list(cdate, fcst_len, field, accum_hh, base_dir, filename_templat parser.add_argument("-f", "--field", help="Field name", type=str, required=True, choices=["PM25", "PM10", "REFC", "RETOP", "ADPSFC", "AOD", "APCP", "ASNOW", "ADPUPA"]) parser.add_argument("-a", "--accum_hh", help="Accumulation length in hours for the specified field. For example, for 6-hour accumulated precipitation, field=APCP, accum_hh=6", type=int, default=1) + parser.add_argument("-tl", "--time_lag", help="Hours of time lag for a time-lagged ensemble member", type=float, default=0) parser.add_argument("-bd", "--base_dir", help="Base directory for forecast/observation file", type=str, default='') parser.add_argument("-ft", "--filename_template", help="Template for file names to search; see ??? for details on template settings", type=str, required=True) parser.add_argument("-n", "--num_missing_files_max", type=int, default=5, diff --git a/ush/set_vx_fhr_list.sh b/ush/set_vx_fhr_list.sh deleted file mode 100644 index e3fbfe5173..0000000000 --- a/ush/set_vx_fhr_list.sh +++ /dev/null @@ -1,307 +0,0 @@ -# -#----------------------------------------------------------------------- -# -# This file defines a function that generates a list of forecast hours -# such that for each hour there exist a corresponding obs file. It does -# this by first generating a generic sequence of forecast hours and then -# removing from that sequence any hour for which there is no obs file. -# -#----------------------------------------------------------------------- -# -function set_vx_fhr_list() { -# -#----------------------------------------------------------------------- -# -# Save current shell options (in a global array). Then set new options -# for this script/function. -# -#----------------------------------------------------------------------- -# - { save_shell_opts; set -u +x; } > /dev/null 2>&1 -# -#----------------------------------------------------------------------- -# -# Get the full path to the file in which this script/function is located -# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in -# which the file is located (scrfunc_dir). -# -#----------------------------------------------------------------------- -# - local scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) - local scrfunc_fn=$( basename "${scrfunc_fp}" ) - local scrfunc_dir=$( dirname "${scrfunc_fp}" ) -# -#----------------------------------------------------------------------- -# -# Get the name of this function. -# -#----------------------------------------------------------------------- -# - local func_name="${FUNCNAME[0]}" -# -#----------------------------------------------------------------------- -# -# Specify the set of valid argument names for this script/function. Then -# process the arguments provided to this script/function (which should -# consist of a set of name-value pairs of the form arg1="value1", etc). -# -#----------------------------------------------------------------------- -# - local valid_args=( \ - "cdate" \ - "fcst_len_hrs" \ - "field" \ - "accum_hh" \ - "base_dir" \ - "fn_template" \ - "check_accum_contrib_files" \ - "num_missing_files_max" \ - "outvarname_fhr_list" \ - ) - process_args valid_args "$@" -# -#----------------------------------------------------------------------- -# -# For debugging purposes, print out values of arguments passed to this -# script. Note that these will be printed out only if VERBOSE is set to -# TRUE. -# -#----------------------------------------------------------------------- -# - print_input_args valid_args -# -#----------------------------------------------------------------------- -# -# Declare local variables. -# -#----------------------------------------------------------------------- -# - local crnt_tmpl \ - crnt_tmpl_esc \ - fhr \ - fhr_array \ - fhr_int \ - fhr_list \ - fhr_min \ - fhr_max \ - fn \ - fp \ - i \ - num_fcst_hrs \ - num_missing_files \ - regex_search_tmpl \ - remainder \ - skip_this_fhr -# -#----------------------------------------------------------------------- -# -# Create array containing set of forecast hours for which we will check -# for the existence of corresponding observation or forecast file. -# -#----------------------------------------------------------------------- -# - case "${field}" in - "AOD") - fhr_min="00" - fhr_int="24" - ;; - "APCP") - fhr_min="${accum_hh}" - fhr_int="${accum_hh}" - ;; - "ASNOW") - if [ "${accum_hh}" = "24" ]; then - fhr_min="24" - fhr_int="12" - else - fhr_min="${accum_hh}" - fhr_int="${accum_hh}" - fi - ;; - "PM25") - fhr_min="00" - fhr_int="01" - ;; - "PM10") - fhr_min="00" - fhr_int="01" - ;; - "REFC") - fhr_min="00" - fhr_int="01" - ;; - "RETOP") - fhr_min="00" - fhr_int="01" - ;; - "ADPSFC") - fhr_min="00" - fhr_int="01" - ;; - "ADPUPA") - fhr_min="00" - fhr_int="06" - ;; - *) - print_err_msg_exit "\ -A method for setting verification parameters has not been specified for -this field (field): - field = \"${field}\"" - ;; - esac - fhr_max="${fcst_len_hrs}" - - fhr_array=($( seq ${fhr_min} ${fhr_int} ${fhr_max} )) - print_info_msg "$VERBOSE" "\ -Initial (i.e. before filtering for missing files) set of forecast hours -is: - fhr_array = ( $( printf "\"%s\" " "${fhr_array[@]}" )) -" -# -#----------------------------------------------------------------------- -# -# Loop through all forecast hours. For each one for which a corresponding -# file exists, add the forecast hour to fhr_list. fhr_list will be a -# scalar containing a comma-separated list of forecast hours for which -# corresponding files exist. Also, use the variable num_missing_files -# to keep track of the number of files that are missing. -# -#----------------------------------------------------------------------- -# - fhr_list="" - num_missing_files="0" - num_fcst_hrs=${#fhr_array[@]} - for (( i=0; i<${num_fcst_hrs}; i++ )); do - - fhr_orig="${fhr_array[$i]}" - - if [ "${check_accum_contrib_files}" = "TRUE" ]; then - fhr=$(( ${fhr_orig} - ${accum_hh} + 1 )) - num_back_hrs=${accum_hh} - else - fhr=${fhr_orig} - num_back_hrs=1 - fi - - skip_this_fhr="FALSE" - for (( j=0; j<${num_back_hrs}; j++ )); do -# -# Use the provided template to set the name of/relative path to the file -# Note that the while-loop below is over all METplus time string templates -# of the form {...} in the template fn_template; it continues until all -# such templates have been evaluated to actual time strings. -# - fn="${fn_template}" - regex_search_tmpl="(.*)(\{.*\})(.*)" - crnt_tmpl=$( printf "%s" "${fn_template}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\2|p" ) - remainder=$( printf "%s" "${fn_template}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\1\3|p" ) - while [ ! -z "${crnt_tmpl}" ]; do - - eval_METplus_timestr_tmpl \ - init_time="$cdate" \ - fhr="$fhr" \ - METplus_timestr_tmpl="${crnt_tmpl}" \ - outvarname_formatted_time="actual_value" -# -# Replace METplus time templates in fn with actual times. Note that -# when using sed, we need to escape various characters (question mark, -# closing and opening curly braces, etc) in the METplus template in -# order for the sed command below to work properly. -# - crnt_tmpl_esc=$( echo "${crnt_tmpl}" | \ - $SED -r -e "s/\?/\\\?/g" -e "s/\{/\\\{/g" -e "s/\}/\\\}/g" ) - fn=$( echo "${fn}" | \ - $SED -n -r "s|(.*)(${crnt_tmpl_esc})(.*)|\1${actual_value}\3|p" ) -# -# Set up values for the next iteration of the while-loop. -# - crnt_tmpl=$( printf "%s" "${remainder}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\2|p" ) - remainder=$( printf "%s" "${remainder}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\1\3|p" ) - - done -# -# Get the full path to the file and check if it exists. -# - fp="${base_dir}/${fn}" - - if [ -f "${fp}" ]; then - print_info_msg "\ -Found file (fp) for the current forecast hour (fhr; relative to the cycle -date cdate): - fhr = \"$fhr\" - cdate = \"$cdate\" - fp = \"${fp}\" -" - else - skip_this_fhr="TRUE" - num_missing_files=$(( ${num_missing_files} + 1 )) - print_info_msg "\ -The file (fp) for the current forecast hour (fhr; relative to the cycle -date cdate) is missing: - fhr = \"$fhr\" - cdate = \"$cdate\" - fp = \"${fp}\" -Excluding the current forecast hour from the list of hours passed to the -METplus configuration file. -" - break - fi - - fhr=$(( $fhr + 1 )) - - done - - if [ "${skip_this_fhr}" != "TRUE" ]; then - fhr_list="${fhr_list},${fhr_orig}" - fi - - done -# -# Remove leading comma from fhr_list. -# - fhr_list=$( echo "${fhr_list}" | $SED "s/^,//g" ) - print_info_msg "$VERBOSE" "\ -Final (i.e. after filtering for missing files) set of forecast hours is -(written as a single string): - fhr_list = \"${fhr_list}\" -" -# -#----------------------------------------------------------------------- -# -# If the number of missing files is greater than the maximum allowed -# (specified by num_missing_files_max), print out an error message and -# exit. -# -#----------------------------------------------------------------------- -# - if [ "${num_missing_files}" -gt "${num_missing_files_max}" ]; then - print_err_msg_exit "\ -The number of missing files (num_missing_files) is greater than the -maximum allowed number (num_missing_files_max): - num_missing_files = ${num_missing_files} - num_missing_files_max = ${num_missing_files_max}" - fi -# -#----------------------------------------------------------------------- -# -# Set output variables. -# -#----------------------------------------------------------------------- -# - if [ ! -z "${outvarname_fhr_list}" ]; then - printf -v ${outvarname_fhr_list} "%s" "${fhr_list}" - fi -# -#----------------------------------------------------------------------- -# -# Restore the shell options saved at the beginning of this script/function. -# -#----------------------------------------------------------------------- -# - { restore_shell_opts; } > /dev/null 2>&1 - -} From b93459e2f7ee9061043576846a27f7b59e2e4b00 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Fri, 16 Aug 2024 16:39:43 +0000 Subject: [PATCH 045/260] Everything is now running through PointStat! The only issue is that I don't get any matched pairs. However, it seems as if the example case has the same issue, so I'll need to figure out what's going on there. - Update vx_config_det.yaml for correct obs names - Update verify_det.yaml to make the PointStat metatask loop over obtypes, so we can combine NDAS with smoke vx - Add PM10 to ASCII2nc_obs - Remove verbose flag from set_vx_fhr_list.py call in exregional_check_post_output.sh so we get correct FHR results - Update exregional_run_met_gridstat_or_pointstat_vx.sh uses beta release, can handle smoke vx obtypes for PointStat - - Remove deleted script from ush/source_util_funcs.sh --- parm/metplus/vx_config_det.yaml | 9 +++---- parm/wflow/verify_det.yaml | 21 +++++++++------- parm/wflow/verify_pre.yaml | 8 +++---- scripts/exregional_check_post_output.sh | 2 +- ...gional_run_met_gridstat_or_pointstat_vx.sh | 24 +++++++++++++++++-- ush/setup.py | 2 +- ush/source_util_funcs.sh | 10 -------- 7 files changed, 45 insertions(+), 31 deletions(-) diff --git a/parm/metplus/vx_config_det.yaml b/parm/metplus/vx_config_det.yaml index eaff56e8c3..40a83c6377 100644 --- a/parm/metplus/vx_config_det.yaml +++ b/parm/metplus/vx_config_det.yaml @@ -206,11 +206,12 @@ ADPUPA: L0: [] CAPE%%MLCAPE: L0-90%%L0: ['gt500', 'gt1000', 'gt1500', 'gt2000', 'gt3000', 'gt4000'] -AERONET: +AOD: AOTK%%AOD: L0: [] -AIRNOW: +PM25: MASSDEN%%PM25: - Z8|A1: [] + Z8%%A1: [] +PM10: MASSDEN%%PM10: - Z8|A1: [] + Z8%%A1: [] diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index a62adb4481..305632dbc0 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -116,21 +116,24 @@ metatask_GridStat_MRMS_all_mems: age: 00:00:00:30 text: !cycstr '{{ workflow.EXPTDIR }}/@Y@m@d@H/post_files_exist_mem#mem#.txt' -metatask_PointStat_NDAS_all_mems: +metatask_PointStat: var: - mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' - metatask_PointStat_NDAS_mem#mem#: + METAVAR: '{% for var in verification.VX_FIELDS %}{% if var in ["ADPSFC", "ADPUPA", "AOD", "PM25", "PM10"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + METAOBTYPE: '{% for var in verification.VX_FIELDS %}{% if var =="ADPSFC" or var =="ADPUPA" %}NDAS {% elif var =="AOD" %}AERONET {% elif var =="PM25" or var =="PM10" %}AIRNOW {% endif %}{% endfor %}' + METAOBS_DIR: '{% for var in verification.VX_FIELDS %}{% if var =="ADPSFC" or var =="ADPUPA" %}&NDAS_OBS_DIR; {% elif var =="AOD" %}&AERONET_OBS_DIR; {% elif var =="PM25" or var =="PM10" %}&AIRNOW_OBS_DIR; {% endif %}{% endfor %}' + TASKDEP: '{% for var in verification.VX_FIELDS %}{% if var =="ADPSFC" or var =="ADPUPA" %}run_MET_Pb2nc_obs {% elif var =="AOD" %}run_MET_ASCII2nc_obs_AOD {% elif var =="PM25" or var =="PM10" %}run_MET_ASCII2nc_obs_{{"%s" % var}} {% endif %}{% endfor %}' + metatask_PointStat_#METAVAR#_all_mems: var: - VAR: '{% for var in verification.VX_FIELDS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' - task_run_MET_PointStat_vx_#VAR#_mem#mem#: + mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' + task_run_MET_PointStat_vx_#METAVAR#_mem#mem#: <<: *default_task_verify_det command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX"' envars: <<: *default_vars - OBS_DIR: '&NDAS_OBS_DIR;' - VAR: '#VAR#' + OBS_DIR: '#METAOBS_DIR#' + VAR: '#METAVAR#' METPLUSTOOLNAME: 'POINTSTAT' - OBTYPE: 'NDAS' + OBTYPE: '#METAOBTYPE#' ACCUM_HH: '01' ENSMEM_INDX: "#mem#" SLASH_ENSMEM_SUBDIR_OR_NULL: '{% if global.DO_ENSEMBLE %}{{ "/mem#mem#" }}{% endif %}' @@ -141,7 +144,7 @@ metatask_PointStat_NDAS_all_mems: and: taskdep_pb2nc: attrs: - task: run_MET_Pb2nc_obs + task: '#TASKDEP#' datadep_post_files_exist: attrs: age: 00:00:00:30 diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index f637f4538b..3c2dc2038f 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -131,10 +131,10 @@ task_run_MET_Pb2nc_obs: metatask_ASCII2nc_obs: var: - METAVAR: '{% for var in verification.VX_FIELDS %}{% if var in ["AOD", "PM25"] %}{{ "%s " % var }}{% endif %}{% endfor %}' - METAOBTYPE: '{% for var in verification.VX_FIELDS %}{% if var =="AOD" %}AERONET {% elif var =="PM25" %}AIRNOW {% endif %}{% endfor %}' - METAOBS_DIR: '{% for var in verification.VX_FIELDS %}{% if var =="AOD" %}&AERONET_OBS_DIR; {% elif var =="PM25" %}&AIRNOW_OBS_DIR; {% endif %}{% endfor %}' - TASKDEP: '{% for var in verification.VX_FIELDS %}{% if var =="AOD" %}get_obs_aeronet {% elif var =="PM25" %}get_obs_airnow {% endif %}{% endfor %}' + METAVAR: '{% for var in verification.VX_FIELDS %}{% if var in ["AOD", "PM25", "PM10"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + METAOBTYPE: '{% for var in verification.VX_FIELDS %}{% if var =="AOD" %}AERONET {% elif var =="PM25" or var=="PM10" %}AIRNOW {% endif %}{% endfor %}' + METAOBS_DIR: '{% for var in verification.VX_FIELDS %}{% if var =="AOD" %}&AERONET_OBS_DIR; {% elif var =="PM25" or var=="PM10" %}&AIRNOW_OBS_DIR; {% endif %}{% endfor %}' + TASKDEP: '{% for var in verification.VX_FIELDS %}{% if var =="AOD" %}get_obs_aeronet {% elif var =="PM25" or var=="PM10" %}get_obs_airnow {% endif %}{% endfor %}' task_run_MET_ASCII2nc_obs_#METAVAR#: <<: *default_task_verify_pre attrs: diff --git a/scripts/exregional_check_post_output.sh b/scripts/exregional_check_post_output.sh index 16020de3bc..95f4c68711 100755 --- a/scripts/exregional_check_post_output.sh +++ b/scripts/exregional_check_post_output.sh @@ -128,7 +128,7 @@ FHR_LIST=$( python3 $USHdir/set_vx_fhr_list.py \ --base_dir="${VX_FCST_INPUT_BASEDIR}" \ --filename_template="${FCST_INPUT_FN_TEMPLATE}" \ --num_missing_files_max="${NUM_MISSING_FCST_FILES_MAX}" \ - --time_lag="${time_lag}" --verbose) || \ + --time_lag="${time_lag}") || \ print_err_msg_exit "Call to set_vx_fhr_list.py failed with return code: $?" # #----------------------------------------------------------------------- diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index de61eb5718..00d594b622 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -192,8 +192,19 @@ if [ "${grid_or_point}" = "grid" ]; then elif [ "${grid_or_point}" = "point" ]; then - OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" - OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" + if [ "${OBTYPE}" = "NDAS" ]; then + OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" + OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" + elif [ "${OBTYPE}" = "AERONET" ]; then + OBS_INPUT_DIR="${vx_output_basedir}/metprd/Ascii2nc_obs" + OBS_INPUT_FN_TEMPLATE="${OBS_AERONET_FN_TEMPLATE_ASCII2NC_OUTPUT}" + elif [ "${OBTYPE}" = "AIRNOW" ]; then + OBS_INPUT_DIR="${vx_output_basedir}/metprd/Ascii2nc_obs" + OBS_INPUT_FN_TEMPLATE="${OBS_AIRNOW_FN_TEMPLATE_ASCII2NC_OUTPUT}" + else + print_err_msg_exit "Invalid OBTYPE for PointStat: ${OBTYPE}" + fi + FCST_INPUT_DIR="${vx_fcst_input_basedir}" FCST_INPUT_FN_TEMPLATE="${FCST_SUBDIR_TEMPLATE:+${FCST_SUBDIR_TEMPLATE}/}${FCST_FN_TEMPLATE}" @@ -403,6 +414,15 @@ fi # #----------------------------------------------------------------------- # +#TEMPORARILY POINTING TO BETA RELEASE +MET_ROOT=/contrib/met/12.0.0-beta3 +MET_INSTALL_DIR=${MET_ROOT} +MET_BIN_EXEC=${MET_INSTALL_DIR}/bin +MET_BASE=${MET_INSTALL_DIR}/share/met +METPLUS_ROOT=/contrib/METplus/METplus-6.0.0-beta3/ +METPLUS_PATH=${METPLUS_ROOT} +MET_ROOT=/contrib/met/12.0.0-beta3 +#TEMPORARILY POINTING TO BETA RELEASE print_info_msg "$VERBOSE" " Calling METplus to run MET's ${metplus_tool_name} tool for field(s): ${FIELDNAME_IN_MET_FILEDIR_NAMES}" ${METPLUS_PATH}/ush/run_metplus.py \ diff --git a/ush/setup.py b/ush/setup.py index fe636d5d97..79bdf2ba8d 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -592,7 +592,7 @@ def remove_tag(tasks, tag): vx_fields_all["AERONET"] = ["AOD"] vx_metatasks_all["AERONET"] = ["task_get_obs_aeronet","metatask_ASCII2nc_obs"] - vx_fields_all["AIRNOW"] = ["PM25"] + vx_fields_all["AIRNOW"] = ["PM25","PM10"] vx_metatasks_all["AIRNOW"] = ["task_get_obs_airnow","metatask_ASCII2nc_obs","metatask_PcpCombine_fcst_PM_all_mems"] # Get the vx fields specified in the experiment configuration. diff --git a/ush/source_util_funcs.sh b/ush/source_util_funcs.sh index 9feceaf68e..266975e97d 100644 --- a/ush/source_util_funcs.sh +++ b/ush/source_util_funcs.sh @@ -214,16 +214,6 @@ function source_util_funcs() { # #----------------------------------------------------------------------- # -# Source the file containing the function that evaluates a METplus time -# string template. -# -#----------------------------------------------------------------------- -# - . ${bashutils_dir}/eval_METplus_timestr_tmpl.sh - -# -#----------------------------------------------------------------------- -# # Source the file that sources YAML files as if they were bash # #----------------------------------------------------------------------- From e2fbdaf60a07d9459d99ddf5f2fe0d982730f278 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Wed, 10 Jul 2024 22:06:11 +0000 Subject: [PATCH 046/260] Fix the problem with AOD, turns out we just need to ensure METplus conf files are unique! Also, make the metatask rules for ASCII2nc simpler --- parm/wflow/verify_pre.yaml | 6 +++--- scripts/exregional_run_met_ascii2nc_obs.sh | 22 +++++----------------- 2 files changed, 8 insertions(+), 20 deletions(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 3c2dc2038f..0737210396 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -132,9 +132,9 @@ task_run_MET_Pb2nc_obs: metatask_ASCII2nc_obs: var: METAVAR: '{% for var in verification.VX_FIELDS %}{% if var in ["AOD", "PM25", "PM10"] %}{{ "%s " % var }}{% endif %}{% endfor %}' - METAOBTYPE: '{% for var in verification.VX_FIELDS %}{% if var =="AOD" %}AERONET {% elif var =="PM25" or var=="PM10" %}AIRNOW {% endif %}{% endfor %}' - METAOBS_DIR: '{% for var in verification.VX_FIELDS %}{% if var =="AOD" %}&AERONET_OBS_DIR; {% elif var =="PM25" or var=="PM10" %}&AIRNOW_OBS_DIR; {% endif %}{% endfor %}' - TASKDEP: '{% for var in verification.VX_FIELDS %}{% if var =="AOD" %}get_obs_aeronet {% elif var =="PM25" or var=="PM10" %}get_obs_airnow {% endif %}{% endfor %}' + METAOBTYPE: '{% for var in verification.VX_FIELDS %}{% if var =="AOD" %}AERONET {% elif var in ["PM25", "PM10"] %}AIRNOW {% endif %}{% endfor %}' + METAOBS_DIR: '{% for var in verification.VX_FIELDS %}{% if var =="AOD" %}&AERONET_OBS_DIR; {% elif var in ["PM25", "PM10"] %}&AIRNOW_OBS_DIR; {% endif %}{% endfor %}' + TASKDEP: '{% for var in verification.VX_FIELDS %}{% if var =="AOD" %}get_obs_aeronet {% elif var in ["PM25", "PM10"] %}get_obs_airnow {% endif %}{% endfor %}' task_run_MET_ASCII2nc_obs_#METAVAR#: <<: *default_task_verify_pre attrs: diff --git a/scripts/exregional_run_met_ascii2nc_obs.sh b/scripts/exregional_run_met_ascii2nc_obs.sh index d46ce34d04..f467597c6b 100755 --- a/scripts/exregional_run_met_ascii2nc_obs.sh +++ b/scripts/exregional_run_met_ascii2nc_obs.sh @@ -187,23 +187,11 @@ fi # metplus_config_tmpl_fn="${MetplusToolName}_obs" # -# Note that we append the cycle date to the name of the configuration -# file because we are considering only observations when using ASCII2NC, so -# the output files from METplus are not placed under cycle directories. -# Thus, another method is necessary to associate the configuration file -# with the cycle for which it is used. -# -# Note also that if considering an ensemble forecast, we include the -# ensemble member name to the config file name. This is necessary in -# NCO mode (i.e. when RUN_ENVIR = "nco") because in that mode, the -# directory tree under which the configuration file is placed does not -# contain member information, so the file names must include it. It is -# not necessary in community mode (i.e. when RUN_ENVIR = "community") -# because in that case, the directory structure does contain the member -# information, but we still include that info in the file name so that -# the behavior in the two modes is as similar as possible. -# -metplus_config_fn="${metplus_config_tmpl_fn}_${CDATE}" +# Set the name of the final conf file that will be used for this task. We +# append the OBTYPE and cycle date to ensure that different tasks in the same +# workflow won't overwrite each others' conf files. +# +metplus_config_fn="${metplus_config_tmpl_fn}_${OBTYPE}_${CDATE}" metplus_log_fn="${metplus_config_fn}" # # Add prefixes and suffixes (extensions) to the base file names. From 01f0080c9024c7038a66de91a40ce461a09f4880 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Fri, 19 Jul 2024 20:11:11 +0000 Subject: [PATCH 047/260] - Update some comments in config_defaults.yaml - Produce hourly nc obs files for AOD - Probably doesn't make a difference, but explicitly reference AOD as "AERONET_AOD" in POINT_STAT_MESSAGE_TYPE --- scripts/exregional_run_met_gridstat_or_pointstat_vx.sh | 1 + ush/config_defaults.yaml | 10 ++++------ ush/set_vx_fhr_list.py | 7 ++----- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index 00d594b622..62842cb52a 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -196,6 +196,7 @@ elif [ "${grid_or_point}" = "point" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" elif [ "${OBTYPE}" = "AERONET" ]; then + FIELDNAME_IN_MET_FILEDIR_NAMES="AERONET_AOD" OBS_INPUT_DIR="${vx_output_basedir}/metprd/Ascii2nc_obs" OBS_INPUT_FN_TEMPLATE="${OBS_AERONET_FN_TEMPLATE_ASCII2NC_OUTPUT}" elif [ "${OBTYPE}" = "AIRNOW" ]; then diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index f5c10ba3cd..9bbd36e8f4 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -942,13 +942,11 @@ workflow: # # DATE_FIRST_CYCL: # Starting cycle date of the FIRST forecast in the set of forecasts to - # run. Format is "YYYYMMDDHH". Note: This has recently changed to - # include the first cycle hour. + # run. Format is "YYYYMMDDHH". # # DATE_LAST_CYCL: # Starting cycle date of the LAST forecast in the set of forecasts to run. - # Format is "YYYYMMDDHH". Note: This has recently changed to include - # the last cycle hour. + # Format is "YYYYMMDDHH". # # INCR_CYCL_FREQ: # Increment in hours for Rocoto cycle frequency. @@ -999,8 +997,8 @@ workflow: # The preexisting directory is renamed and a new directory (having the # same name as the original preexisting directory) is created. The new # name of the preexisting directory consists of its original name and - # the suffix "_oldNNN", where NNN is a 3-digit integer chosen to make - # the new name unique. + # the suffix "_old_YYYYMMDD_HHmmss", where YYYYMMDD_HHmmss is the full + # date and time of the rename # # * "reuse": # This method will keep preexisting directory intact except that diff --git a/ush/set_vx_fhr_list.py b/ush/set_vx_fhr_list.py index 960094b24b..c223c598e0 100644 --- a/ush/set_vx_fhr_list.py +++ b/ush/set_vx_fhr_list.py @@ -40,10 +40,7 @@ def set_vx_fhr_list(cdate, fcst_len, field, accum_hh, time_lag, base_dir, filena # hours, they will need to have fhr_int specified accordingly. # Every valid verification field (valid_vals_VX_FIELDS in valid_param_vals.yaml) should have # an entry in this if block - if field == "AOD": - fhr_min = 0 - fhr_int = 24 - elif field == "APCP": + if field == "APCP": fhr_min = accum_hh fhr_int = accum_hh elif field == "ASNOW": @@ -53,7 +50,7 @@ def set_vx_fhr_list(cdate, fcst_len, field, accum_hh, time_lag, base_dir, filena else: fhr_min = accum_hh fhr_int = accum_hh - elif field in ["PM25", "PM10", "REFC", "RETOP", "ADPSFC"]: + elif field in ["AOD", "PM25", "PM10", "REFC", "RETOP", "ADPSFC"]: fhr_min = 0 fhr_int = 1 elif field == "ADPUPA": From 51f31f94c9047efa5383b83ee04fb67fc647337d Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Fri, 19 Jul 2024 21:11:43 +0000 Subject: [PATCH 048/260] Setting AOD ob level to 550; per Partha Bhattacharjee the AOD output from RRFS is 550 nm. This gets us matched pairs! --- parm/metplus/vx_config_det.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/metplus/vx_config_det.yaml b/parm/metplus/vx_config_det.yaml index 40a83c6377..7cccae2fb3 100644 --- a/parm/metplus/vx_config_det.yaml +++ b/parm/metplus/vx_config_det.yaml @@ -208,7 +208,7 @@ ADPUPA: L0-90%%L0: ['gt500', 'gt1000', 'gt1500', 'gt2000', 'gt3000', 'gt4000'] AOD: AOTK%%AOD: - L0: [] + L0%%L550: [] PM25: MASSDEN%%PM25: Z8%%A1: [] From 6f65ac5b88ffcbc899ca4cb4186fb221cba69488 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Fri, 19 Jul 2024 22:18:12 +0000 Subject: [PATCH 049/260] We have matched pairs for PM as well! --- scripts/exregional_run_met_gridstat_or_pointstat_vx.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index 62842cb52a..2f4ee00875 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -200,6 +200,7 @@ elif [ "${grid_or_point}" = "point" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Ascii2nc_obs" OBS_INPUT_FN_TEMPLATE="${OBS_AERONET_FN_TEMPLATE_ASCII2NC_OUTPUT}" elif [ "${OBTYPE}" = "AIRNOW" ]; then + FIELDNAME_IN_MET_FILEDIR_NAMES="AIRNOW_HOURLY_AQOBS" OBS_INPUT_DIR="${vx_output_basedir}/metprd/Ascii2nc_obs" OBS_INPUT_FN_TEMPLATE="${OBS_AIRNOW_FN_TEMPLATE_ASCII2NC_OUTPUT}" else @@ -416,13 +417,13 @@ fi #----------------------------------------------------------------------- # #TEMPORARILY POINTING TO BETA RELEASE -MET_ROOT=/contrib/met/12.0.0-beta3 +MET_ROOT=/contrib/met/12.0.0-beta5 MET_INSTALL_DIR=${MET_ROOT} MET_BIN_EXEC=${MET_INSTALL_DIR}/bin MET_BASE=${MET_INSTALL_DIR}/share/met -METPLUS_ROOT=/contrib/METplus/METplus-6.0.0-beta3/ +METPLUS_ROOT=/contrib/METplus/METplus-6.0.0-beta5/ METPLUS_PATH=${METPLUS_ROOT} -MET_ROOT=/contrib/met/12.0.0-beta3 +MET_ROOT=/contrib/met/12.0.0-beta5 #TEMPORARILY POINTING TO BETA RELEASE print_info_msg "$VERBOSE" " Calling METplus to run MET's ${metplus_tool_name} tool for field(s): ${FIELDNAME_IN_MET_FILEDIR_NAMES}" From 01e87b8b2f22e21e55c1131c1d11040860a3c8ce Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 16 Aug 2024 11:41:50 -0600 Subject: [PATCH 050/260] Make the names of the deterministic and ensemble vx configuration files user-specifiable. --- scripts/exregional_run_met_genensprod_or_ensemblestat.sh | 4 +--- scripts/exregional_run_met_gridstat_or_pointstat_vx.sh | 4 +--- .../exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh | 4 +--- .../exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh | 4 +--- ush/config_defaults.yaml | 7 +++++++ 5 files changed, 11 insertions(+), 12 deletions(-) diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 5003047f4f..5034369851 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -305,9 +305,7 @@ metplus_log_fn="metplus.log.${metplus_log_bn}" # #----------------------------------------------------------------------- # -det_or_ens="ens" -vx_config_fn="vx_config_${det_or_ens}.yaml" -vx_config_fp="${METPLUS_CONF}/${vx_config_fn}" +vx_config_fp="${METPLUS_CONF}/${VX_CONFIG_ENS_FN}" vx_config_dict=$(<"${vx_config_fp}") # Indent each line of vx_config_dict so that it is aligned properly when # included in the yaml-formatted variable "settings" below. diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index 91c5a7896b..e54dd7b553 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -292,9 +292,7 @@ metplus_log_fn="metplus.log.${metplus_log_bn}" # #----------------------------------------------------------------------- # -det_or_ens="det" -vx_config_fn="vx_config_${det_or_ens}.yaml" -vx_config_fp="${METPLUS_CONF}/${vx_config_fn}" +vx_config_fp="${METPLUS_CONF}/${VX_CONFIG_DET_FN}" vx_config_dict=$(<"${vx_config_fp}") # Indent each line of vx_config_dict so that it is aligned properly when # included in the yaml-formatted variable "settings" below. diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index 6e4a4ff33f..f08c002d5f 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -250,9 +250,7 @@ metplus_log_fn="metplus.log.${metplus_log_bn}" # #----------------------------------------------------------------------- # -det_or_ens="ens" -vx_config_fn="vx_config_${det_or_ens}.yaml" -vx_config_fp="${METPLUS_CONF}/${vx_config_fn}" +vx_config_fp="${METPLUS_CONF}/${VX_CONFIG_ENS_FN}" vx_config_dict=$(<"${vx_config_fp}") # Indent each line of vx_config_dict so that it is aligned properly when # included in the yaml-formatted variable "settings" below. diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 924d321ec3..5952ed3785 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -249,9 +249,7 @@ metplus_log_fn="metplus.log.${metplus_log_bn}" # #----------------------------------------------------------------------- # -det_or_ens="ens" -vx_config_fn="vx_config_${det_or_ens}.yaml" -vx_config_fp="${METPLUS_CONF}/${vx_config_fn}" +vx_config_fp="${METPLUS_CONF}/${VX_CONFIG_ENS_FN}" vx_config_dict=$(<"${vx_config_fp}") # Indent each line of vx_config_dict so that it is aligned properly when # included in the yaml-formatted variable "settings" below. diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index ceccd71277..e564444b49 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2555,6 +2555,13 @@ verification: # be run. # NUM_MISSING_FCST_FILES_MAX: 0 + # + # Names of configuration files for deterministic and ensemble vx that + # specify the field groups, field names, levels, and (if applicable) + # thresholds for which to run verification. + # + VX_CONFIG_DET_FN: 'vx_config_det.yaml' + VX_CONFIG_ENS_FN: 'vx_config_ens.yaml' #---------------------------- # CPL_AQM config parameters From 54446d275fcdd14f3b656cb1a9dfe00357a1910a Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Fri, 16 Aug 2024 20:44:58 +0000 Subject: [PATCH 051/260] Modifications to get things working after rebase - replace references to old source_config_for_task function with new yaml-based stuff - Rename old LOAD_MODULES_RUN_TASK_FP --> LOAD_MODULES_RUN_TASK in rocoto - remove "grid_params" from sections to reference in verification tasks, since this section may not be set and the variables are not needed anyway - Add back create_symlink_to_file import to create_symlink_to_file - Remove references to beta release: we going for real this time! --- jobs/JREGIONAL_RUN_MET_ASCII2NC_OBS | 4 +++- parm/wflow/verify_pre.yaml | 8 ++++---- scripts/exregional_run_met_ascii2nc_obs.sh | 6 +++++- ...onal_run_met_genensprod_or_ensemblestat.sh | 2 +- ...gional_run_met_gridstat_or_pointstat_vx.sh | 11 +---------- ...un_met_gridstat_or_pointstat_vx_ensmean.sh | 2 +- ...un_met_gridstat_or_pointstat_vx_ensprob.sh | 2 +- scripts/exregional_run_met_pb2nc_obs.sh | 2 +- scripts/exregional_run_met_pcpcombine.sh | 11 +---------- ush/generate_FV3LAM_wflow.py | 19 ++++++++++--------- 10 files changed, 28 insertions(+), 39 deletions(-) diff --git a/jobs/JREGIONAL_RUN_MET_ASCII2NC_OBS b/jobs/JREGIONAL_RUN_MET_ASCII2NC_OBS index f73848c51b..151eb503cd 100755 --- a/jobs/JREGIONAL_RUN_MET_ASCII2NC_OBS +++ b/jobs/JREGIONAL_RUN_MET_ASCII2NC_OBS @@ -8,7 +8,9 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -source_config_for_task "task_run_met_ascii2nc_obs" ${GLOBAL_VAR_DEFNS_FP} +for sect in user nco workflow ; do + source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} +done . $USHdir/job_preamble.sh # #----------------------------------------------------------------------- diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 0737210396..250ec2604b 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -23,7 +23,7 @@ default_task_verify_pre: &default_task_verify_pre task_get_obs_aeronet: <<: *default_task_verify_pre - command: '&LOAD_MODULES_RUN_TASK_FP; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' + command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars OBS_DIR: '&AERONET_OBS_DIR;' @@ -36,7 +36,7 @@ task_get_obs_aeronet: task_get_obs_airnow: <<: *default_task_verify_pre - command: '&LOAD_MODULES_RUN_TASK_FP; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' + command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars OBS_DIR: '&AIRNOW_OBS_DIR;' @@ -140,7 +140,7 @@ metatask_ASCII2nc_obs: attrs: cycledefs: forecast maxtries: '2' - command: '&LOAD_MODULES_RUN_TASK_FP; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_ASCII2NC_OBS"' + command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_ASCII2NC_OBS"' envars: <<: *default_vars VAR: '#METAVAR#' @@ -337,7 +337,7 @@ metatask_PcpCombine_fcst_PM_all_mems: attrs: cycledefs: forecast maxtries: '2' - command: '&LOAD_MODULES_RUN_TASK_FP; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PCPCOMBINE"' + command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PCPCOMBINE"' envars: <<: *default_vars VAR: '#PM#' diff --git a/scripts/exregional_run_met_ascii2nc_obs.sh b/scripts/exregional_run_met_ascii2nc_obs.sh index f467597c6b..2c3f141eb0 100755 --- a/scripts/exregional_run_met_ascii2nc_obs.sh +++ b/scripts/exregional_run_met_ascii2nc_obs.sh @@ -8,7 +8,11 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -source_config_for_task "task_run_met_ascii2nc_obs" ${GLOBAL_VAR_DEFNS_FP} +for sect in user nco platform workflow nco global verification cpl_aqm_parm \ + constants fixed_files \ + task_run_met_ascii2nc_obs ; do + source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} +done # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 8ec035a40f..4b76e62faa 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -9,7 +9,7 @@ # . $USHdir/source_util_funcs.sh for sect in user nco platform workflow nco global verification cpl_aqm_parm \ - constants fixed_files grid_params \ + constants fixed_files \ task_run_post ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index 2f4ee00875..ed97317a98 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -9,7 +9,7 @@ # . $USHdir/source_util_funcs.sh for sect in user nco platform workflow nco global verification cpl_aqm_parm \ - constants fixed_files grid_params \ + constants fixed_files \ task_run_post ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done @@ -416,15 +416,6 @@ fi # #----------------------------------------------------------------------- # -#TEMPORARILY POINTING TO BETA RELEASE -MET_ROOT=/contrib/met/12.0.0-beta5 -MET_INSTALL_DIR=${MET_ROOT} -MET_BIN_EXEC=${MET_INSTALL_DIR}/bin -MET_BASE=${MET_INSTALL_DIR}/share/met -METPLUS_ROOT=/contrib/METplus/METplus-6.0.0-beta5/ -METPLUS_PATH=${METPLUS_ROOT} -MET_ROOT=/contrib/met/12.0.0-beta5 -#TEMPORARILY POINTING TO BETA RELEASE print_info_msg "$VERBOSE" " Calling METplus to run MET's ${metplus_tool_name} tool for field(s): ${FIELDNAME_IN_MET_FILEDIR_NAMES}" ${METPLUS_PATH}/ush/run_metplus.py \ diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index aa5c84ba4c..e5103b230c 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -9,7 +9,7 @@ # . $USHdir/source_util_funcs.sh for sect in user nco platform workflow nco global verification cpl_aqm_parm \ - constants fixed_files grid_params \ + constants fixed_files \ task_run_post ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 7beb702374..1c717b0032 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -9,7 +9,7 @@ # . $USHdir/source_util_funcs.sh for sect in user nco platform workflow nco global verification cpl_aqm_parm \ - constants fixed_files grid_params \ + constants fixed_files \ task_run_post ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done diff --git a/scripts/exregional_run_met_pb2nc_obs.sh b/scripts/exregional_run_met_pb2nc_obs.sh index 388a6f12fd..ce07f82942 100755 --- a/scripts/exregional_run_met_pb2nc_obs.sh +++ b/scripts/exregional_run_met_pb2nc_obs.sh @@ -9,7 +9,7 @@ # . $USHdir/source_util_funcs.sh for sect in user nco platform workflow nco global verification cpl_aqm_parm \ - constants fixed_files grid_params ; do + constants fixed_files ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done # diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 078ee580c4..c1611c9a48 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -9,7 +9,7 @@ # . $USHdir/source_util_funcs.sh for sect in user nco platform workflow nco global verification cpl_aqm_parm \ - constants fixed_files grid_params \ + constants fixed_files \ task_run_post ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done @@ -408,15 +408,6 @@ fi # #----------------------------------------------------------------------- # -#TEMPORARILY POINTING TO BETA RELEASE -MET_ROOT=/contrib/met/12.0.0-beta3 -MET_INSTALL_DIR=${MET_ROOT} -MET_BIN_EXEC=${MET_INSTALL_DIR}/bin -MET_BASE=${MET_INSTALL_DIR}/share/met -METPLUS_ROOT=/contrib/METplus/METplus-6.0.0-beta3/ -METPLUS_PATH=${METPLUS_ROOT} -MET_ROOT=/contrib/met/12.0.0-beta3 -#TEMPORARILY POINTING TO BETA RELEASE print_info_msg "$VERBOSE" " Calling METplus to run MET's ${metplus_tool_name} tool for field(s): ${FIELDNAME_IN_MET_FILEDIR_NAMES}" ${METPLUS_PATH}/ush/run_metplus.py \ diff --git a/ush/generate_FV3LAM_wflow.py b/ush/generate_FV3LAM_wflow.py index c203bf4c68..fb15afce0d 100755 --- a/ush/generate_FV3LAM_wflow.py +++ b/ush/generate_FV3LAM_wflow.py @@ -19,18 +19,19 @@ from uwtools.api.template import render from python_utils import ( - list_to_str, - log_info, - import_vars, - export_vars, - cp_vrfy, - mkdir_vrfy, - mv_vrfy, - check_for_preexist_dir_file, cfg_to_yaml_str, + check_for_preexist_dir_file, + cp_vrfy, + create_symlink_to_file, + dict_find, + export_vars, find_pattern_in_str, flatten_dict, - dict_find, + import_vars, + list_to_str, + log_info, + mkdir_vrfy, + mv_vrfy ) from setup import setup From df13ef1a1573aa88a7fe0287e84baa608079d90d Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Fri, 16 Aug 2024 21:00:32 +0000 Subject: [PATCH 052/260] Spack-stack for Hera/GNU now supports MET version 11.1.1, which we need for the smoke VX. Now we don't have to hard-code to the beta version to get smoke working, but the downside is we can only use it on Hera for GNU compilers --- modulefiles/build_hera_gnu.lua | 4 ++-- modulefiles/tasks/hera/run_vx.local.lua | 2 +- .../config.MET_verification_smoke_only_vx.yaml | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/modulefiles/build_hera_gnu.lua b/modulefiles/build_hera_gnu.lua index 8854108966..b7e44b4de8 100644 --- a/modulefiles/build_hera_gnu.lua +++ b/modulefiles/build_hera_gnu.lua @@ -7,7 +7,7 @@ whatis([===[Loads libraries needed for building the UFS SRW App on Hera using GN prepend_path("MODULEPATH", "/scratch2/NCEPDEV/stmp1/role.epic/installs/gnu/modulefiles") prepend_path("MODULEPATH", "/scratch2/NCEPDEV/stmp1/role.epic/installs/openmpi/modulefiles") -prepend_path("MODULEPATH", "/scratch2/NCEPDEV/stmp1/role.epic/spack-stack/spack-stack-1.6.0_gnu13/envs/ufs-wm-srw-rocky8/install/modulefiles/Core") +prepend_path("MODULEPATH", "/scratch2/NCEPDEV/stmp1/role.epic/spack-stack/spack-stack-1.6.0_gnu13/envs/met-unified-env/install/modulefiles/Core") load("stack-gcc/13.3.0") load("stack-openmpi/4.1.6") @@ -16,7 +16,7 @@ load("cmake/3.23.1") load("srw_common") -load(pathJoin("nccmp", os.getenv("nccmp_ver") or "1.9.0.1")) +load(pathJoin("nccmp", os.getenv("nccmp_ver") or "1.9.1")) load(pathJoin("nco", os.getenv("nco_ver") or "5.1.6")) load(pathJoin("openblas", os.getenv("openblas_ver") or "0.3.24")) diff --git a/modulefiles/tasks/hera/run_vx.local.lua b/modulefiles/tasks/hera/run_vx.local.lua index 737fc4f7cc..920a62587f 100644 --- a/modulefiles/tasks/hera/run_vx.local.lua +++ b/modulefiles/tasks/hera/run_vx.local.lua @@ -1,7 +1,7 @@ --[[ Compiler-specific modules are used for met and metplus libraries --]] -local met_ver = (os.getenv("met_ver") or "11.1.0") +local met_ver = (os.getenv("met_ver") or "11.1.1") local metplus_ver = (os.getenv("metplus_ver") or "5.1.0") if (mode() == "load") then load(pathJoin("met", met_ver)) diff --git a/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml b/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml index e575f8ea82..3920697e90 100644 --- a/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml @@ -18,15 +18,15 @@ rocoto: walltime: 01:00:00 verification: VX_FCST_MODEL_NAME: RRFS_smoke_test -# VX_FIELDS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA", "AOD", "PM25" ] - VX_FIELDS: [ "AOD", "PM25", "PM10" ] + VX_FIELDS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA", "AOD", "PM25"] +# VX_FIELDS: [ "AOD", "PM25", "PM10" ] VX_FCST_INPUT_BASEDIR: /scratch2/BMC/fv3lam/kavulich/smoke_verif/smoke_vx/fcst FCST_SUBDIR_TEMPLATE: '{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}' FCST_FN_TEMPLATE: 'rrfs.t{init?fmt=%H?shift=-${time_lag}}z.prslev.f{lead?fmt=%HHH?shift=${time_lag}}.conus_3km.grib2' platform: - CCPA_OBS_DIR: '{{ workflow.EXPTDIR }}/CCPA_obs' - MRMS_OBS_DIR: '{{ workflow.EXPTDIR }}/MRMS_obs' - NDAS_OBS_DIR: '{{ workflow.EXPTDIR }}/NDAS_obs' + CCPA_OBS_DIR: /scratch2/BMC/fv3lam/kavulich/smoke_verif/rebase_june/staged_case/CCPA_obs + MRMS_OBS_DIR: /scratch2/BMC/fv3lam/kavulich/smoke_verif/rebase_june/staged_case/MRMS_obs + NDAS_OBS_DIR: /scratch2/BMC/fv3lam/kavulich/smoke_verif/rebase_june/staged_case/NDAS_obs AERONET_OBS_DIR: /scratch2/BMC/fv3lam/kavulich/smoke_verif/rebase_june/staged_case/AERONET_obs/ AIRNOW_OBS_DIR: /scratch2/BMC/fv3lam/kavulich/smoke_verif/rebase_june/staged_case/AIRNOW_obs/ From 21f7b691af429623c51d001e6de91085bec9672f Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 26 Aug 2024 10:45:09 -0600 Subject: [PATCH 053/260] Add new files. --- ush/bash_utils/ceil.sh | 122 +++++++++++ ush/get_obs_ccpa.sh | 454 +++++++++++++++++++++++++++++++++++++++++ ush/get_obs_mrms.sh | 260 +++++++++++++++++++++++ ush/get_obs_ndas.sh | 305 +++++++++++++++++++++++++++ 4 files changed, 1141 insertions(+) create mode 100644 ush/bash_utils/ceil.sh create mode 100755 ush/get_obs_ccpa.sh create mode 100755 ush/get_obs_mrms.sh create mode 100755 ush/get_obs_ndas.sh diff --git a/ush/bash_utils/ceil.sh b/ush/bash_utils/ceil.sh new file mode 100644 index 0000000000..dc8a21c90d --- /dev/null +++ b/ush/bash_utils/ceil.sh @@ -0,0 +1,122 @@ +# +#----------------------------------------------------------------------- +# +# This function returns the ceiling of the quotient of two numbers. The +# ceiling of a number is the number rounded up to the nearest integer. +# +#----------------------------------------------------------------------- +# +function ceil() { +# +#----------------------------------------------------------------------- +# +# Save current shell options (in a global array). Then set new options +# for this script/function. +# +#----------------------------------------------------------------------- +# + { save_shell_opts; . ${USHdir}/preamble.sh; } > /dev/null 2>&1 +# +#----------------------------------------------------------------------- +# +# Get the full path to the file in which this script/function is located +# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in +# which the file is located (scrfunc_dir). +# +#----------------------------------------------------------------------- +# + local scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) + local scrfunc_fn=$( basename "${scrfunc_fp}" ) + local scrfunc_dir=$( dirname "${scrfunc_fp}" ) +# +#----------------------------------------------------------------------- +# +# Get the name of this function. +# +#----------------------------------------------------------------------- +# + local func_name="${FUNCNAME[0]}" +# +#----------------------------------------------------------------------- +# +# Check number of arguments. +# +#----------------------------------------------------------------------- +# + if [ "$#" -ne 2 ]; then + + print_err_msg_exit " +Incorrect number of arguments specified: + + Function name: \"${func_name}\" + Number of arguments specified: $# + +Usage: + + ${func_name} numer denom + +where denom is a nonnegative integer and denom is a positive integer. +" + + fi +# +#----------------------------------------------------------------------- +# +# Make sure arguments are of the right form. +# +#----------------------------------------------------------------------- +# + local numer="$1" + local denom="$2" + + if ! [[ "${numer}" =~ ^[0-9]+$ ]]; then + print_err_msg_exit " +The first argument to the \"${func_name}\" function (numer) must be a nonnegative +integer but isn't: + numer = ${numer} +" + fi + + if [[ "${denom}" -eq 0 ]]; then + print_err_msg_exit " +The second argument to the \"${func_name}\" function (denom) cannot be zero: + denom = ${denom} +" + fi + + if ! [[ "${denom}" =~ ^[0-9]+$ ]]; then + print_err_msg_exit " +The second argument to the \"${func_name}\" function (denom) must be a positive +integer but isn't: + denom = ${denom} +" + fi +# +#----------------------------------------------------------------------- +# +# Let ceil(a,b) denote the ceiling of the quotient of a and b. It can be +# shown that for two positive integers a and b, we have: +# +# ceil(a,b) = floor((a+b-1)/b) +# +# where floor(a,b) is the integer obtained by rounding the quotient of +# a and b (i.e. a/b) down to the nearest integer. Since in bash a +# division returns only the integer part of the result, it is effectively +# the floor function. Thus the following. +# +#----------------------------------------------------------------------- +# + result=$(( (numer+denom-1)/denom )) + print_info_msg "${result}" +# +#----------------------------------------------------------------------- +# +# Restore the shell options saved at the beginning of this script/func- +# tion. +# +#----------------------------------------------------------------------- +# + { restore_shell_opts; } > /dev/null 2>&1 + +} + diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh new file mode 100755 index 0000000000..5cbf6638c6 --- /dev/null +++ b/ush/get_obs_ccpa.sh @@ -0,0 +1,454 @@ +#!/usr/bin/env bash + +# +#----------------------------------------------------------------------- +# +# Source the variable definitions file and the bash utility functions. +# +#----------------------------------------------------------------------- +# +. $USHdir/source_util_funcs.sh +source_config_for_task " " ${GLOBAL_VAR_DEFNS_FP} + +set -u +set -x +# +#----------------------------------------------------------------------- +# +# This script performs several important tasks for preparing data for +# verification tasks. Depending on the value of the environment variable +# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data +# set. +# +# If data is not available on disk (in the location specified by +# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), +# the script attempts to retrieve the data from HPSS using the retrieve_data.py +# script. Depending on the data set, there are a few strange quirks and/or +# bugs in the way data is organized; see in-line comments for details. +# +# +# CCPA (Climatology-Calibrated Precipitation Analysis) precipitation accumulation obs +# ---------- +# If data is available on disk, it must be in the following +# directory structure and file name conventions expected by verification +# tasks: +# +# {CCPA_OBS_DIR}/{YYYYMMDD}/ccpa.t{HH}z.01h.hrap.conus.gb2 +# +# If data is retrieved from HPSS, it will be automatically staged by this +# script. +# +# Notes about the data and how it's used for verification: +# +# 1. Accumulation is currently hardcoded to 01h. The verification will +# use MET/pcp-combine to sum 01h files into desired accumulations. +# +# 2. There is a problem with the valid time in the metadata for files +# valid from 19 - 00 UTC (or files under the '00' directory). This is +# accounted for in this script for data retrieved from HPSS, but if you +# have manually staged data on disk you should be sure this is accounted +# for. See in-line comments below for details. +# +#----------------------------------------------------------------------- +# + +# +#----------------------------------------------------------------------- +# +# Below, we will use the retrieve_data.py script to retrieve the CCPA +# grib2 file from a data store (e.g. HPSS). Before doing so, note the +# following: +# +# * The daily archive (tar) file containing CCPA obs has a name of the +# form +# +# [PREFIX].YYYYMMDD.tar +# +# where YYYYMMDD is a given year, month, and day combination, and +# [PREFIX] is a string that is not relevant to the discussion here +# (the value it can take on depends on which of several time periods +# YYYYMMDD falls in, and the retrieve_data.py tries various values +# until it finds one for which a tar file exists). Unintuitively, this +# archive file contains accumulation data for valid times starting at +# hour 19 of the PREVIOUS day (YYYYMM[DD-1]) to hour 18 of the current +# day (YYYYMMDD). In other words, the valid times of the contents of +# this archive file are shifted back by 6 hours relative to the time +# string appearing in the name of the file. See section "DETAILS..." +# for a detailed description of the directory structure in the CCPA +# archive files. +# +# * We call retrieve_data.py in a temporary cycle-specific subdirectory +# in order to prevent get_obs_ccpa tasks for different cycles from +# clobbering each other's output. We refer to this as the "raw" CCPA +# base directory because it contains files as they are found in the +# archives before any processing by this script. +# +# * In each (cycle-specific) raw base directory, the data is arranged in +# daily subdirectories with the same timing as in the archive (tar) +# files (which are described in the section "DETAILS..." below). In +# particular, each daily subdirectory has the form YYYYMDD, and it may +# contain CCPA grib2 files for accumulations valid at hour 19 of the +# previous day (YYYYMM[DD-1]) to hour 18 of the current day (YYYYMMDD). +# (Data valid at hours 19-23 of the current day (YYYYMMDD) go into the +# daily subdirectory for the next day, i.e. YYYYMM[DD+1].) We refer +# to these as raw daily (sub)directories to distinguish them from the +# processed daily subdirectories under the processed (final) CCPA base +# directory (basedir_proc). +# +# * For a given cycle, some of the valid times at which there is forecast +# output may not have a corresponding file under the raw base directory +# for that cycle. This is because another cycle that overlaps this cycle +# has already obtained the grib2 CCPA file for that valid time and placed +# it in its processed location; as a result, the retrieveal of that grib2 +# file for this cycle is skipped. +# +# * To obtain a more intuitive temporal arrangement of the data in the +# processed CCPA directory structure than the temporal arrangement used +# in the archives and raw directories, we process the raw files such +# that the data in the processed directory structure is shifted forward +# in time 6 hours relative to the data in the archives and raw directories. +# This results in a processed base directory that, like the raw base +# directory, also contains daily subdirectories of the form YYYYMMDD, +# but each such subdirectory may only contain CCPA data at valid hours +# within that day, i.e. at valid times YYYYMMDD[00, 01, ..., 23] (but +# may not contain data that is valid on the previous, next, or any other +# day). +# +# * For data between 20180718 and 20210504, the 01h accumulation data +# (which is the only accumulation we are retrieving) have incorrect +# metadata under the "00" directory in the archive files (meaning for +# hour 00 and hours 19-23, which are the ones in the "00" directory). +# Below, we use wgrib2 to make a correction for this when transferring +# (moving or copying) grib2 files from the raw daily directories to +# the processed daily directories. +# +# +# DETAILS OF DIRECTORY STRUCTURE IN CCPA ARCHIVE (TAR) FILES +# ---------------------------------------------------------- +# +# The daily archive file containing CCPA obs is named +# +# [PREFIX].YYYYMMDD.tar +# +# This file contains accumulation data for valid times starting at hour +# 19 of the PREVIOUS day (YYYYMM[DD-1]) to hour 18 of the current day +# (YYYYMMDD). In particular, when untarred, the daily archive file +# expands into four subdirectories: 00, 06, 12, and 18. The 06, 12, and +# 18 subdirectories contain grib2 files for accumulations valid at or +# below the hour-of-day given by the subdirectory name (and on YYYYMMDD). +# For example, the 06 directory contains data valid at: +# +# * YYYYMMDD[01, 02, 03, 04, 05, 06] for 01h accumulations; +# * YYYYMMDD[03, 06] for 03h accumulations; +# * YYYYMMDD[06] for 06h accumulations. +# +# The valid times for the data in the 12 and 18 subdirectories are +# analogous. However, the 00 subdirectory is different in that it +# contains accumulations at hour 00 on YYYYMMDD as well as ones BEFORE +# this time, i.e. the data for valid times other than YYYYMMDD00 are on +# the PREVIOUS day. Thus, the 00 subdirectory contains data valid at +# (note the DD-1, meaning one day prior): +# +# * YYYYMM[DD-1][19, 20, 21, 22, 23] and YYYYMMDD00 for 01h accumulations; +# * YYYYMM[DD-1][19] and YYYYMMDD00 for 03h accumulations; +# * YYYYMMDD00 for 06h accumulations. +# +#----------------------------------------------------------------------- +# + +# CCPA accumulation period to consider. Here, we only retrieve data for +# 1-hour accumulations. Other accumulations (03h, 06h, 24h) are obtained +# by other tasks in the workflow that add up these hourly values. +accum="01" + +# The day (in the form YYYMMDD) associated with the current task via the +# task's cycledefs attribute in the ROCOTO xml. +yyyymmdd_task=${PDY} + +# Base directory in which the daily subdirectories containing the CCPA +# grib2 files will appear after this script is done. We refer to this as +# the "processed" base directory because it contains the files after all +# processing by this script is complete. +basedir_proc=${OBS_DIR} + +# The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a +# scalar string containing all relevant forecast output times (each) in +# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings +# because in ROCOTO, there doesn't seem to be a way to pass a bash array +# from the XML to task's script. To have an array-valued variable to +# work with, here, we create the new variable output_times_all that is +# the array-valued counterpart of OUTPUT_TIMES_ALL. +output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) + +# List of times (each of the form YYYYMMDDHH) for which there is forecast +# APCP (accumulated precipitation) output for the current day. We start +# constructing this by extracting from the full list of all forecast APCP +# output times (i.e. from all cycles) all elements that contain the current +# task's day (in the form YYYYMMDD). +output_times_crnt_day=($(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}")) +# If the 0th hour of the current day is in this list (and if it is, it +# will be the first element), remove it because for APCP, that time is +# considered part of the previous day (because it represents precipitation +# that occurred during the last hour of the previous day). +if [[ ${output_times_crnt_day[0]} == "${yyyymmdd_task}00" ]]; then + output_times_crnt_day=(${output_times_crnt_day[@]:1}) +fi +# If the 0th hour of the next day (i.e. the day after yyyymmdd_task) is +# one of the output times in the list of all APCP output times, we include +# it in the list for the current day because for APCP, that time is +# considered part of the current day (because it represents precipitation +# that occured during the last hour of the current day). +yyyymmdd00_task_p1d=$(${DATE_UTIL} --date "${yyyymmdd_task} 1 day" +%Y%m%d%H) +if [[ ${output_times_all[@]} =~ ${yyyymmdd00_task_p1d} ]]; then + output_times_crnt_day+=(${yyyymmdd00_task_p1d}) +fi + +# If there are no forecast APCP output times on the day of the current +# task, exit the script. +num_output_times_crnt_day=${#output_times_crnt_day[@]} +if [[ ${num_output_times_crnt_day} -eq 0 ]]; then + print_info_msg " +None of the forecast APCP output times fall in the current day (including +the 0th hour of the next day). Thus, there is no need to retrieve any +obs files." + exit +fi + +# Obs files will be obtained by extracting them from the relevant 6-hourly +# archives. Thus, we need the sequence of archive hours over which to +# loop. In the simplest case, this sequence will be "6 12 18 24". This +# will be the case if the forecast output times include all hours of the +# task's day and if none of the obs files for this day already exist on +# disk. In other cases, the sequence we loop over will be a subset of +# "6 12 18 24". +# +# To generate this sequence, we first set its starting and ending values +# as well as the interval. + +# Sequence interval must be 6 hours because the archives are 6-hourly. +arcv_hr_incr=6 + +# Initial guess for starting archive hour. This is set to the hour +# corresponding to the first forecast output time of the day. +hh_first=$(echo ${output_times_crnt_day[0]} | cut -c9-10) +hr_first=$((10#${hh_first})) +arcv_hr_start=$(ceil ${hr_first} ${arcv_hr_incr}) +arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) + +# Ending archive hour. This is set to the hour corresponding to the last +# forecast output time of the day. +hh_last=$(echo ${output_times_crnt_day[-1]} | cut -c9-10) +hr_last=$((10#${hh_last})) +if [[ ${hr_last} -eq 0 ]]; then + arcv_hr_end=24 +else + arcv_hr_end=$(ceil ${hr_last} ${arcv_hr_incr}) + arcv_hr_end=$(( arcv_hr_end*arcv_hr_incr )) +fi + +# Check whether any obs files already exist on disk. If so, adjust the +# starting archive hour. In the process, keep a count of the number of +# files that already exist on disk. +num_existing_files=0 +for yyyymmddhh in ${output_times_crnt_day[@]}; do + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + day_dir_proc="${basedir_proc}/${yyyymmdd}" + fn_proc="ccpa.t${hh}z.${accum}h.hrap.conus.gb2" + fp_proc="${day_dir_proc}/${fn_proc}" + if [[ -f ${fp_proc} ]]; then + num_existing_files=$((num_existing_files+1)) + print_info_msg " +File already exists on disk: + fp_proc = \"${fp_proc}\"" + else + hr=$((10#${hh})) + arcv_hr_start=$(ceil ${hr} ${arcv_hr_incr}) + arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) + print_info_msg " +File does not exists on disk: + fp_proc = \"${fp_proc}\" +Setting the hour (since 00) of the first archive to retrieve to: + arcv_hr_start = \"${arcv_hr_start}\"" + break + fi +done + +# If the number of obs files that already exist on disk is equal to the +# number of files needed, then there is no need to retrieve any files. +num_needed_files=$((num_output_times_crnt_day)) +if [[ ${num_existing_files} -eq ${num_needed_files} ]]; then + print_info_msg " +All obs files needed for the current day (yyyymmdd_task) already exist +on disk: + yyyymmdd_task = \"${yyyymmdd_task}\" +Thus, there is no need to retrieve any files." + exit +# Otherwise, will need to retrieve files. In this case, set the sequence +# of hours corresponding to the archives from which files will be retrieved. +else + arcv_hrs=($(seq ${arcv_hr_start} ${arcv_hr_incr} ${arcv_hr_end})) + arcv_hrs_str="( "$( printf "%s " "${arcv_hrs[@]}" )")" + print_info_msg " +At least some obs files needed needed for the current day (yyyymmdd_task) +do not exist on disk: + yyyymmdd_task = \"${yyyymmdd_task}\" +The number of obs files needed is: + num_needed_files = ${num_needed_files} +The number of obs files that already exist on disk is: + num_existing_files = ${num_existing_files} +Will retrieve remaining files by looping over archives corresponding to +the following hours (since 00 of this day): + arcv_hrs = ${arcv_hrs_str} +" +fi +# +#----------------------------------------------------------------------- +# +# At this point, at least some obs files for the current day need to be +# retrieved. Thus, loop over the relevant archives that contain obs for +# the day given by yyyymmdd_task and retrieve files as needed. +# +#----------------------------------------------------------------------- +# + +# Whether to move or copy files from raw to processed directories. +#mv_or_cp="mv" +mv_or_cp="cp" +# If the raw directories and files are to be removed at the end of this +# script, no need to copy the files since the raw directories are going +# to be removed anyway. +if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then + mv_or_cp="mv" +fi + +# Base directory that will contain the daily subdirectories in which the +# CCPA grib2 files retrieved from archive (tar) files will be placed. +# We refer to this as the "raw" base directory because it contains files +# as they are found in the archives before any processing by this script. +basedir_raw="${basedir_proc}/${yyyymmdd_task}/raw" + +for arcv_hr in ${arcv_hrs[@]}; do + + print_info_msg " +arcv_hr = ${arcv_hr}" + + # Calculate the time information for the current archive. + yyyymmddhh_arcv=$(${DATE_UTIL} --date "${yyyymmdd_task} ${arcv_hr} hours" +%Y%m%d%H) + yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) + hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) + + # Directory that will contain the CCPA grib2 files retrieved from the + # current 6-hourly archive file. We refer to this as the "raw" quarter- + # daily directory because it will contain the files as they are in the + # archive before any processing by this script. + qrtrday_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" + + # Check whether any of the forecast APCP output times for the day associated + # with this task fall in the time interval spanned by the current archive. + # If so, set the flag (do_retrieve) to retrieve the files in the current + # archive. + yyyymmddhh_qrtrday_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 5 hours ago" +%Y%m%d%H) + yyyymmddhh_qrtrday_end=${yyyymmddhh_arcv} + do_retrieve="FALSE" + nout=${#output_times_crnt_day[@]} + for (( i=0; i<${nout}; i++ )); do + output_time=${output_times_crnt_day[i]} + if [[ "${output_time}" -ge "${yyyymmddhh_qrtrday_start}" ]] && \ + [[ "${output_time}" -le "${yyyymmddhh_qrtrday_end}" ]]; then + do_retrieve="TRUE" + break + fi + done + + if [[ ${do_retrieve} == "TRUE" ]]; then + + # Make sure the raw quarter-daily directory exists because it is used + # below as the output directory of the retrieve_data.py script (so if + # this directory doesn't already exist, that script will fail). Creating + # this directory also ensures that the raw base directory (basedir_raw) + # exists before we change location to it below. + mkdir -p ${qrtrday_dir_raw} + + # The retrieve_data.py script first extracts the contents of the archive + # file into the directory it was called from and then moves them to the + # specified output location (via the --output_path option). In order to + # avoid other get_obs_ccpa tasks (i.e. those associated with other days) + # from interfering with (clobbering) these files (because extracted files + # from different get_obs_ccpa tasks to have the same names or relative + # paths), we change location to the base raw directory so that files with + # same names are extracted into different directories. + cd ${basedir_raw} + + # Pull CCPA data from HPSS. This will get all 6 obs files in the current + # archive and place them in the raw quarter-daily directory. + cmd=" + python3 -u ${USHdir}/retrieve_data.py \ + --debug \ + --file_set obs \ + --config ${PARMdir}/data_locations.yml \ + --cycle_date ${yyyymmddhh_arcv} \ + --data_stores hpss \ + --data_type CCPA_obs \ + --output_path ${qrtrday_dir_raw} \ + --summary_file retrieve_data.log" + + print_info_msg "CALLING: ${cmd}" + $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." + + # Create the processed CCPA grib2 files. This usually consists of just + # moving or copying the raw files to their processed location, but for + # times between 20180718 and 20210504 and hours-of-day 19 through the + # end of the day (i.e. hour 0 of the next day), it involves using wgrib2 + # to correct an error in the metadata of the raw file and writing the + # corrected data to a new grib2 file in the processed location. + for hrs_ago in $(seq 5 -1 0); do + yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + if [[ ${output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + fn_raw="ccpa.t${hh}z.${accum}h.hrap.conus.gb2" + fp_raw="${qrtrday_dir_raw}/${fn_raw}" + day_dir_proc="${basedir_proc}/${yyyymmdd}" + mkdir -p ${day_dir_proc} + fn_proc="${fn_raw}" + fp_proc="${day_dir_proc}/${fn_proc}" + hh_noZero=$((10#${hh})) + # CCPA files for 1-hour accumulation have incorrect metadata in the files + # under the "00" directory from 20180718 to 20210504. After the data is + # pulled, reorganize into correct yyyymmdd structure. + if [[ ${yyyymmdd} -ge 20180718 && ${yyyymmdd} -le 20210504 ]] && \ + [[ (${hh_noZero} -ge 19 && ${hh_noZero} -le 23) || (${hh_noZero} -eq 0) ]]; then + wgrib2 ${fp_raw} -set_date -24hr -grib ${fp_proc} -s + else + ${mv_or_cp} ${fp_raw} ${fp_proc} + fi + fi + done + + else + + print_info_msg " +None of the current day's forecast APCP output times fall in the range +spanned by the current 6-hourly archive file. The bounds of the current +archive are: + yyyymmddhh_qrtrday_start = \"${yyyymmddhh_qrtrday_start}\" + yyyymmddhh_qrtrday_end = \"${yyyymmddhh_qrtrday_end}\" +The forecast output times for APCP are: + output_times_crnt_day = ($(printf "\"%s\" " ${output_times_crnt_day[@]}))" + + fi + +done +# +#----------------------------------------------------------------------- +# +# Clean up raw directories. +# +#----------------------------------------------------------------------- +# +if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then + print_info_msg "Removing raw directories and files..." + rm -rf ${basedir_raw} || print_err_msg_exit "\ +Failed to remove raw directories and files." +fi diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh new file mode 100755 index 0000000000..23896bbf38 --- /dev/null +++ b/ush/get_obs_mrms.sh @@ -0,0 +1,260 @@ +#!/usr/bin/env bash + +# +#----------------------------------------------------------------------- +# +# Source the variable definitions file and the bash utility functions. +# +#----------------------------------------------------------------------- +# +. $USHdir/source_util_funcs.sh +source_config_for_task " " ${GLOBAL_VAR_DEFNS_FP} + +set -u +#set -x +# +#----------------------------------------------------------------------- +# +# This script performs several important tasks for preparing data for +# verification tasks. Depending on the value of the environment variable +# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data +# set. +# +# If data is not available on disk (in the location specified by +# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), +# the script attempts to retrieve the data from HPSS using the retrieve_data.py +# script. Depending on the data set, there are a few strange quirks and/or +# bugs in the way data is organized; see in-line comments for details. +# +# +# MRMS (Multi-Radar Multi-Sensor) radar observations +# ---------- +# If data is available on disk, it must be in the following +# directory structure and file name conventions expected by verification +# tasks: +# +# {MRMS_OBS_DIR}/{YYYYMMDD}/[PREFIX]{YYYYMMDD}-{HH}0000.grib2, +# +# Where [PREFIX] is MergedReflectivityQCComposite_00.50_ for reflectivity +# data and EchoTop_18_00.50_ for echo top data. If data is not available +# at the top of the hour, you should rename the file closest in time to +# your hour(s) of interest to the above naming format. A script +# "ush/mrms_pull_topofhour.py" is provided for this purpose. +# +# If data is retrieved from HPSS, it will automatically staged by this +# this script. +# +#----------------------------------------------------------------------- +# + +# Create an array-valued counterpart of MRMS_FIELDS. MRMS_FIELDS is an +# environment variable created in the ROCOTO XML. It is a scalar variable +# because there doesn't seem to be a way to pass a bash array from the +# XML to the task's script. +mrms_fields=($(printf "%s" "${MRMS_FIELDS}")) + +# Loop over the fields (REFC and RETOP) and set the file base name +# corresponding to each. +fields_in_filenames=() +levels_in_filenames=() +for field in ${mrms_fields[@]}; do + # Set field-dependent parameters needed in forming grib2 file names. + if [ "${field}" = "REFC" ]; then + fields_in_filenames+=("MergedReflectivityQCComposite") + levels_in_filenames+=("00.50") + elif [ "${field}" = "RETOP" ]; then + fields_in_filenames+=("EchoTop") + levels_in_filenames+=("18_00.50") + else + print_err_msg_exit "\ +Invalid field specified: + field = \"${field}\" +Valid options are 'REFC', 'RETOP'." + fi +done + +# The day (in the form YYYMMDD) associated with the current task via the +# task's cycledefs attribute in the ROCOTO xml. +yyyymmdd_task=${PDY} + +# Base directory in which the daily subdirectories containing the MRMS +# grib2 files will appear after this script is done. We refer to this +# as the "processed" base directory because it contains the files after +# all processing by this script is complete. +basedir_proc=${OBS_DIR} + +# The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a +# scalar string containing all relevant forecast output times (each) in +# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings +# because in ROCOTO, there doesn't seem to be a way to pass a bash array +# from the XML to task's script. To have an array-valued variable to +# work with, here, we create the new variable output_times_all that is +# the array-valued counterpart of OUTPUT_TIMES_ALL. +output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) + +# List of times (each of the form YYYYMMDDHH) for which there is forecast +# output for the current day. We extract this list from the full list of +# all forecast output times (i.e. from all cycles). +output_times_crnt_day=($(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}")) + +# If there are no forecast output times on the day of the current task, +# exit the script. +num_output_times_crnt_day=${#output_times_crnt_day[@]} +if [[ ${num_output_times_crnt_day} -eq 0 ]]; then + print_info_msg " +None of the forecast output times fall in the current day. Thus, there +is no need to retrieve any obs files." + exit +fi + +# Check whether any obs files already exist on disk. If so, adjust the +# starting archive hour. In the process, keep a count of the number of +# files that already exist on disk. +num_existing_files=0 +num_mrms_fields=${#mrms_fields[@]} +for (( i=0; i<${num_mrms_fields}; i++ )); do + for yyyymmddhh in ${output_times_crnt_day[@]}; do + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + day_dir_proc="${basedir_proc}/${yyyymmdd}" + fn_proc="${fields_in_filenames[$i]}_${levels_in_filenames[$i]}_${yyyymmdd}-${hh}0000.grib2" + fp_proc="${day_dir_proc}/${fn_proc}" + if [[ -f ${fp_proc} ]]; then + num_existing_files=$((num_existing_files+1)) + print_info_msg " +File already exists on disk: + fp_proc = \"${fp_proc}\"" + else + break + fi + done +done + +# If the number of obs files that already exist on disk is equal to the +# number of files needed, then there is no need to retrieve any files. +num_needed_files=$((num_output_times_crnt_day*num_mrms_fields)) +if [[ ${num_existing_files} -eq $((num_needed_files)) ]]; then + print_info_msg " +All obs files needed for the current day (yyyymmdd_task) already exist +on disk: + yyyymmdd_task = \"${yyyymmdd_task}\" +Thus, there is no need to retrieve any files." + exit +# Otherwise, will need to retrieve files. +else + print_info_msg " +At least some obs files needed needed for the current day (yyyymmdd_task) +do not exist on disk: + yyyymmdd_task = \"${yyyymmdd_task}\" +The number of obs files needed is: + num_needed_files = ${num_needed_files} +The number of obs files that already exist on disk is: + num_existing_files = ${num_existing_files} +Will retrieve remaining files. +" +fi +# +#----------------------------------------------------------------------- +# +# At this point, at least some obs files for the current day need to be +# retrieved. +# +#----------------------------------------------------------------------- +# + +# Whether to move or copy files from raw to processed directories. +#mv_or_cp="mv" +mv_or_cp="cp" +# If the raw directories and files are to be removed at the end of this +# script, no need to copy the files since the raw directories are going +# to be removed anyway. +if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then + mv_or_cp="mv" +fi + +# Base directory that will contain the daily subdirectories in which the +# MRMS grib2 files retrieved from archive (tar) files will be placed. +# We refer to this as the "raw" base directory because it contains files +# as they are found in the archives before any processing by this script. +basedir_raw="${basedir_proc}/${yyyymmdd_task}/raw" + +# Time associated with the archive. MRMS data have daily archives that +# have the hour-of-day set to "00". +yyyymmddhh_arcv="${yyyymmdd_task}00" + +# Directory that will contain the MRMS grib2 files retrieved from the +# current 6-hourly archive file. We refer to this as the "raw" quarter- +# daily directory because it will contain the files as they are in the +# archive before any processing by this script. +day_dir_raw="${basedir_raw}/${yyyymmdd_task}" + +# Make sure the raw quarter-daily directory exists because it is used +# below as the output directory of the retrieve_data.py script (so if +# this directory doesn't already exist, that script will fail). Creating +# this directory also ensures that the raw base directory (basedir_raw) +# exists before we change location to it below. +mkdir -p ${day_dir_raw} + +# The retrieve_data.py script first extracts the contents of the archive +# file into the directory it was called from and then moves them to the +# specified output location (via the --output_path option). In order to +# avoid other get_obs_ndas tasks (i.e. those associated with other days) +# from interfering with (clobbering) these files (because extracted files +# from different get_obs_ndas tasks to have the same names or relative +# paths), we change location to the base raw directory so that files with +# same names are extracted into different directories. +cd ${basedir_raw} + +# Pull MRMS data from HPSS. This will get all 7 obs files in the current +# archive and place them in the raw quarter-daily directory, although we +# will make use of only 6 of these (we will not use the tm00 file). +cmd=" +python3 -u ${USHdir}/retrieve_data.py \ + --debug \ + --file_set obs \ + --config ${PARMdir}/data_locations.yml \ + --cycle_date ${yyyymmddhh_arcv} \ + --data_stores hpss \ + --data_type MRMS_obs \ + --output_path ${day_dir_raw} \ + --summary_file retrieve_data.log" + +print_info_msg "CALLING: ${cmd}" +$cmd || print_err_msg_exit "Could not retrieve obs from HPSS." +# +#----------------------------------------------------------------------- +# +# Loop over the 24 hour period starting with the zeroth hour of the day +# associated with this task and ending with the 23rd hour. +# +#----------------------------------------------------------------------- +# + +# Loop through all hours of the day associated with the task. For each +# hour, find the gzipped grib2 file in the raw daily directory that is +# closest in time to this hour. Then gunzip the file and copy it (in the +# process renaming it) to the processed location. +for hr in $(seq 0 1 23); do + yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_task} ${hr} hours" +%Y%m%d%H) + if [[ ${output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + for (( i=0; i<${num_mrms_fields}; i++ )); do + python ${USHdir}/mrms_pull_topofhour.py \ + --valid_time ${yyyymmddhh} \ + --outdir ${basedir_proc} \ + --source ${basedir_raw} \ + --product ${fields_in_filenames[$i]} + done + fi +done +# +#----------------------------------------------------------------------- +# +# Clean up raw directories. +# +#----------------------------------------------------------------------- +# +if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then + print_info_msg "Removing raw directories and files..." + rm -rf ${mrms_basedir_raw} || print_err_msg_exit "\ +Failed to remove raw directories and files." +fi diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh new file mode 100755 index 0000000000..d98f390c8b --- /dev/null +++ b/ush/get_obs_ndas.sh @@ -0,0 +1,305 @@ +#!/usr/bin/env bash + +# +#----------------------------------------------------------------------- +# +# Source the variable definitions file and the bash utility functions. +# +#----------------------------------------------------------------------- +# +. $USHdir/source_util_funcs.sh +source_config_for_task " " ${GLOBAL_VAR_DEFNS_FP} + +set -u +#set -x +# +#----------------------------------------------------------------------- +# +# This script performs several important tasks for preparing data for +# verification tasks. Depending on the value of the environment variable +# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data +# set. +# +# If data is not available on disk (in the location specified by +# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), +# the script attempts to retrieve the data from HPSS using the retrieve_data.py +# script. Depending on the data set, there are a few strange quirks and/or +# bugs in the way data is organized; see in-line comments for details. +# +# +# NDAS (NAM Data Assimilation System) conventional observations +# ---------- +# If data is available on disk, it must be in the following +# directory structure and file name conventions expected by verification +# tasks: +# +# {NDAS_OBS_DIR}/{YYYYMMDD}/prepbufr.ndas.{YYYYMMDDHH} +# +# Note that data retrieved from HPSS and other sources may be in a +# different format: nam.t{hh}z.prepbufr.tm{prevhour}.nr, where hh is +# either 00, 06, 12, or 18, and prevhour is the number of hours prior to +# hh (00 through 05). If using custom staged data, you will have to +# rename the files accordingly. +# +# If data is retrieved from HPSS, it will be automatically staged by this +# this script. +# +#----------------------------------------------------------------------- +# + +# The day (in the form YYYMMDD) associated with the current task via the +# task's cycledefs attribute in the ROCOTO xml. +yyyymmdd_task=${PDY} + +# Base directory in which the daily subdirectories containing the NDAS +# prepbufr files will appear after this script is done. We refer to this +# as the "processed" base directory because it contains the files after +# all processing by this script is complete. +basedir_proc=${OBS_DIR} + +# The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a +# scalar string containing all relevant forecast output times (each) in +# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings +# because in ROCOTO, there doesn't seem to be a way to pass a bash array +# from the XML to task's script. To have an array-valued variable to +# work with, here, we create the new variable output_times_all that is +# the array-valued counterpart of OUTPUT_TIMES_ALL. +output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) + +# List of times (each of the form YYYYMMDDHH) for which there is forecast +# output for the current day. We extract this list from the full list of +# all forecast output times (i.e. from all cycles). +output_times_crnt_day=($(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}")) + +# If there are no forecast output times on the day of the current task, +# exit the script. +num_output_times_crnt_day=${#output_times_crnt_day[@]} +if [[ ${num_output_times_crnt_day} -eq 0 ]]; then + print_info_msg " +None of the forecast output times fall in the current day. Thus, there +is no need to retrieve any obs files." + exit +fi + +# Obs files will be obtained by extracting them from the relevant 6-hourly +# archives. Thus, we need the sequence of archive hours over which to +# loop. In the simplest case, this sequence will be "6 12 18 24". This +# will be the case if the forecast output times include all hours of the +# task's day and if none of the obs files for this day already exist on +# disk. In other cases, the sequence we loop over will be a subset of +# "6 12 18 24". +# +# To generate this sequence, we first set its starting and ending values +# as well as the interval. + +# Sequence interval must be 6 hours because the archives are 6-hourly. +arcv_hr_incr=6 + +# Initial guess for starting archive hour. This is set to the hour +# corresponding to the first forecast output time of the day. +hh_first=$(echo ${output_times_crnt_day[0]} | cut -c9-10) +hr_first=$((10#${hh_first})) +arcv_hr_start=$(( (hr_first/arcv_hr_incr + 1)*arcv_hr_incr )) + +# Ending archive hour. This is set to the hour corresponding to the last +# forecast output time of the day. +hh_last=$(echo ${output_times_crnt_day[-1]} | cut -c9-10) +hr_last=$((10#${hh_last})) +arcv_hr_end=$(( (hr_last/arcv_hr_incr + 1)*arcv_hr_incr )) + +# Check whether any obs files already exist on disk. If so, adjust the +# starting archive hour. In the process, keep a count of the number of +# files that already exist on disk. +num_existing_files=0 +for yyyymmddhh in ${output_times_crnt_day[@]}; do + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + day_dir_proc="${basedir_proc}" + fn_proc="prepbufr.ndas.${yyyymmddhh}" + fp_proc="${day_dir_proc}/${fn_proc}" + if [[ -f ${fp_proc} ]]; then + num_existing_files=$((num_existing_files+1)) + print_info_msg " +File already exists on disk: + fp_proc = \"${fp_proc}\"" + else + hr=$((10#${hh})) + arcv_hr_start=$(( (hr/arcv_hr_incr + 1)*arcv_hr_incr )) + print_info_msg " +File does not exists on disk: + fp_proc = \"${fp_proc}\" +Setting the hour (since 00) of the first archive to retrieve to: + arcv_hr_start = \"${arcv_hr_start}\"" + break + fi +done + +# If the number of obs files that already exist on disk is equal to the +# number of files needed, then there is no need to retrieve any files. +num_needed_files=$((num_output_times_crnt_day)) +if [[ ${num_existing_files} -eq ${num_needed_files} ]]; then + print_info_msg " +All obs files needed for the current day (yyyymmdd_task) already exist +on disk: + yyyymmdd_task = \"${yyyymmdd_task}\" +Thus, there is no need to retrieve any files." + exit +# Otherwise, will need to retrieve files. In this case, set the sequence +# of hours corresponding to the archives from which files will be retrieved. +else + arcv_hrs=($(seq ${arcv_hr_start} ${arcv_hr_incr} ${arcv_hr_end})) + arcv_hrs_str="( "$( printf "%s " "${arcv_hrs[@]}" )")" + print_info_msg " +At least some obs files needed needed for the current day (yyyymmdd_task) +do not exist on disk: + yyyymmdd_task = \"${yyyymmdd_task}\" +The number of obs files needed is: + num_needed_files = ${num_needed_files} +The number of obs files that already exist on disk is: + num_existing_files = ${num_existing_files} +Will retrieve remaining files by looping over archives corresponding to +the following hours (since 00 of this day): + arcv_hrs = ${arcv_hrs_str} +" +fi +# +#----------------------------------------------------------------------- +# +# At this point, at least some obs files for the current day need to be +# retrieved. Thus, loop over the relevant archives that contain obs for +# the day given by yyyymmdd_task and retrieve files as needed. +# +#----------------------------------------------------------------------- +# + +# Whether to move or copy files from raw to processed directories. +#mv_or_cp="mv" +mv_or_cp="cp" +# If the raw directories and files are to be removed at the end of this +# script, no need to copy the files since the raw directories are going +# to be removed anyway. +if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then + mv_or_cp="mv" +fi + +# Base directory that will contain the daily subdirectories in which the +# NDAS prepbufr files retrieved from archive (tar) files will be placed. +# We refer to this as the "raw" base directory because it contains files +# as they are found in the archives before any processing by this script. +basedir_raw="${basedir_proc}/raw_${yyyymmdd_task}" + +for arcv_hr in ${arcv_hrs[@]}; do + + print_info_msg " +arcv_hr = ${arcv_hr}" + + # Calculate the time information for the current archive. + yyyymmddhh_arcv=$(${DATE_UTIL} --date "${yyyymmdd_task} ${arcv_hr} hours" +%Y%m%d%H) + yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) + hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) + + # Directory that will contain the NDAS prepbufr files retrieved from the + # current 6-hourly archive file. We refer to this as the "raw" quarter- + # daily directory because it will contain the files as they are in the + # archive before any processing by this script. + qrtrday_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" + + # Check whether any of the forecast output times for the day associated + # with this task fall in the time interval spanned by the current archive. + # If so, set the flag (do_retrieve) to retrieve the files in the current + # archive. + yyyymmddhh_qrtrday_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 6 hours ago" +%Y%m%d%H) + yyyymmddhh_qrtrday_end=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 1 hours ago" +%Y%m%d%H) + do_retrieve="FALSE" + nout=${#output_times_crnt_day[@]} + for (( i=0; i<${nout}; i++ )); do + output_time=${output_times_crnt_day[i]} + if [[ "${output_time}" -ge "${yyyymmddhh_qrtrday_start}" ]] && \ + [[ "${output_time}" -le "${yyyymmddhh_qrtrday_end}" ]]; then + do_retrieve="TRUE" + break + fi + done + + if [[ ${do_retrieve} == "TRUE" ]]; then + + # Make sure the raw quarter-daily directory exists because it is used + # below as the output directory of the retrieve_data.py script (so if + # this directory doesn't already exist, that script will fail). Creating + # this directory also ensures that the raw base directory (basedir_raw) + # exists before we change location to it below. + mkdir -p ${qrtrday_dir_raw} + + # The retrieve_data.py script first extracts the contents of the archive + # file into the directory it was called from and then moves them to the + # specified output location (via the --output_path option). In order to + # avoid other get_obs_ndas tasks (i.e. those associated with other days) + # from interfering with (clobbering) these files (because extracted files + # from different get_obs_ndas tasks to have the same names or relative + # paths), we change location to the base raw directory so that files with + # same names are extracted into different directories. + cd ${basedir_raw} + + # Pull NDAS data from HPSS. This will get all 7 obs files in the current + # archive and place them in the raw quarter-daily directory, although we + # will make use of only 6 of these (we will not use the tm00 file). + cmd=" + python3 -u ${USHdir}/retrieve_data.py \ + --debug \ + --file_set obs \ + --config ${PARMdir}/data_locations.yml \ + --cycle_date ${yyyymmddhh_arcv} \ + --data_stores hpss \ + --data_type NDAS_obs \ + --output_path ${qrtrday_dir_raw} \ + --summary_file retrieve_data.log" + + print_info_msg "CALLING: ${cmd}" + $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." + + # Create the processed NDAS prepbufr files. This consists of simply + # copying or moving (and in the process renaming) them from the raw + # quarter-daily directory to the processed directory. Note that the + # tm06 files contain more/better observations than tm00 for the + # equivalent time, so we use those. + for hrs_ago in $(seq --format="%02g" 6 -1 1); do + yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + if [[ ${output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + fn_raw="nam.t${hh_arcv}z.prepbufr.tm${hrs_ago}.nr" + fp_raw="${qrtrday_dir_raw}/${fn_raw}" + day_dir_proc="${basedir_proc}" + mkdir -p ${day_dir_proc} + fn_proc="prepbufr.ndas.${yyyymmddhh}" + fp_proc="${day_dir_proc}/${fn_proc}" + ${mv_or_cp} ${fp_raw} ${fp_proc} + fi + done + + else + + print_info_msg " +None of the current day's forecast output times fall in the range spanned +by the current 6-hourly archive file. The bounds of the current archive +are: + yyyymmddhh_qrtrday_start = \"${yyyymmddhh_qrtrday_start}\" + yyyymmddhh_qrtrday_end = \"${yyyymmddhh_qrtrday_end}\" +The forecast output times are: + output_times_crnt_day = ($(printf "\"%s\" " ${output_times_crnt_day[@]}))" + + fi + +done +# +#----------------------------------------------------------------------- +# +# Clean up raw directories. +# +#----------------------------------------------------------------------- +# +if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then + print_info_msg "Removing raw directories and files..." + rm -rf ${basedir_raw} || print_err_msg_exit "\ +Failed to remove raw directories and files." +fi From e14b1b8e4485666594894fe9501da7f29e0a4df7 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 16:52:46 -0600 Subject: [PATCH 054/260] Bug fixes to get_obs_... tasks. --- ush/get_obs_ccpa.sh | 15 ++++++++++----- ush/get_obs_mrms.sh | 11 ++++++++--- ush/get_obs_ndas.sh | 11 ++++++++--- 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index 5cbf6638c6..ef1d55eb05 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -185,12 +185,16 @@ output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) # constructing this by extracting from the full list of all forecast APCP # output times (i.e. from all cycles) all elements that contain the current # task's day (in the form YYYYMMDD). -output_times_crnt_day=($(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}")) +output_times_crnt_day=() +if [[ ${output_times_all[@]} =~ ${yyyymmdd_task} ]]; then + output_times_crnt_day=( $(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}") ) +fi # If the 0th hour of the current day is in this list (and if it is, it # will be the first element), remove it because for APCP, that time is # considered part of the previous day (because it represents precipitation # that occurred during the last hour of the previous day). -if [[ ${output_times_crnt_day[0]} == "${yyyymmdd_task}00" ]]; then +if [[ ${#output_times_crnt_day[@]} -gt 0 ]] && \ + [[ ${output_times_crnt_day[0]} == "${yyyymmdd_task}00" ]]; then output_times_crnt_day=(${output_times_crnt_day[@]:1}) fi # If the 0th hour of the next day (i.e. the day after yyyymmdd_task) is @@ -208,9 +212,10 @@ fi num_output_times_crnt_day=${#output_times_crnt_day[@]} if [[ ${num_output_times_crnt_day} -eq 0 ]]; then print_info_msg " -None of the forecast APCP output times fall in the current day (including -the 0th hour of the next day). Thus, there is no need to retrieve any -obs files." +None of the forecast APCP output times fall within the day (including the +0th hour of the next day) associated with the current task (yyyymmdd_task): + yyyymmdd_task = \"${yyyymmdd_task}\" +Thus, there is no need to retrieve any obs files." exit fi diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index 23896bbf38..92fc24fa56 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -95,15 +95,20 @@ output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) # List of times (each of the form YYYYMMDDHH) for which there is forecast # output for the current day. We extract this list from the full list of # all forecast output times (i.e. from all cycles). -output_times_crnt_day=($(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}")) +output_times_crnt_day=() +if [[ ${output_times_all[@]} =~ ${yyyymmdd_task} ]]; then + output_times_crnt_day=( $(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}") ) +fi # If there are no forecast output times on the day of the current task, # exit the script. num_output_times_crnt_day=${#output_times_crnt_day[@]} if [[ ${num_output_times_crnt_day} -eq 0 ]]; then print_info_msg " -None of the forecast output times fall in the current day. Thus, there -is no need to retrieve any obs files." +None of the forecast output times fall within the day associated with the +current task (yyyymmdd_task): + yyyymmdd_task = \"${yyyymmdd_task}\" +Thus, there is no need to retrieve any obs files." exit fi diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index d98f390c8b..441de7b31d 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -69,15 +69,20 @@ output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) # List of times (each of the form YYYYMMDDHH) for which there is forecast # output for the current day. We extract this list from the full list of # all forecast output times (i.e. from all cycles). -output_times_crnt_day=($(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}")) +output_times_crnt_day=() +if [[ ${output_times_all[@]} =~ ${yyyymmdd_task} ]]; then + output_times_crnt_day=( $(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}") ) +fi # If there are no forecast output times on the day of the current task, # exit the script. num_output_times_crnt_day=${#output_times_crnt_day[@]} if [[ ${num_output_times_crnt_day} -eq 0 ]]; then print_info_msg " -None of the forecast output times fall in the current day. Thus, there -is no need to retrieve any obs files." +None of the forecast output times fall within the day associated with the +current task (yyyymmdd_task): + yyyymmdd_task = \"${yyyymmdd_task}\" +Thus, there is no need to retrieve any obs files." exit fi From 609b4e99efbc06463329cb3d8348219c7843fc5c Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 17:02:09 -0600 Subject: [PATCH 055/260] Change paths to archive files to make retrieve_data.py work with new get_obs_...sh scripts. --- parm/data_locations.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/parm/data_locations.yml b/parm/data_locations.yml index dd3b5ddd17..a3712a1972 100644 --- a/parm/data_locations.yml +++ b/parm/data_locations.yml @@ -305,10 +305,7 @@ CCPA_obs: - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} archive_internal_dir: - - "./00" - - "./06" - - "./12" - - "./18" + - "./{hh}" archive_file_names: - "com2_ccpa_prod_ccpa.{yyyy}{mm}{dd}.tar" - "gpfs_dell1_nco_ops_com_ccpa_prod_ccpa.{yyyy}{mm}{dd}.tar" @@ -316,7 +313,7 @@ CCPA_obs: - "com_ccpa_v4.2_ccpa.{yyyy}{mm}{dd}.tar" file_names: obs: - - "ccpa.t{hh}z.01h.hrap.conus.gb2" + - "ccpa.t*z.01h.hrap.conus.gb2" MRMS_obs: hpss: From ed6b6771aa105cd9df5f1cc89acef02934e79dd7 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 17:21:19 -0600 Subject: [PATCH 056/260] Move most of the code for getting obs files out of the ex-script exregional_get_verif_obs.sh since those are now in the ush/get_obs_[ccpa|mrms|ndas|nohrsc].sh scripts. --- scripts/exregional_get_verif_obs.sh | 912 +--------------------------- 1 file changed, 19 insertions(+), 893 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 314273ba93..a055fc0bc4 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -18,7 +18,6 @@ source_config_for_task " " ${GLOBAL_VAR_DEFNS_FP} #----------------------------------------------------------------------- # { save_shell_opts; . $USHdir/preamble.sh; } > /dev/null 2>&1 -set -x # #----------------------------------------------------------------------- # @@ -108,908 +107,35 @@ set -x # # If data is retrieved from HPSS, it will automatically staged by this # this script. - -#----------------------------------------------------------------------- -# Create and enter top-level obs directory (so temporary data from HPSS won't collide with other tasks) -mkdir -p ${OBS_DIR} -cd ${OBS_DIR} - -# Set log file for retrieving obs -logfile=retrieve_data.log - -# PDY and cyc are defined in rocoto XML...they are the yyyymmdd and hh for initial forecast hour respectively -iyyyy=$(echo ${PDY} | cut -c1-4) -imm=$(echo ${PDY} | cut -c5-6) -idd=$(echo ${PDY} | cut -c7-8) -ihh=${cyc} - -echo -echo "HELLO AAAAAAAAAAA" -iyyyymmddhh=${PDY}${cyc} -echo "iyyyymmddhh = ${iyyyymmddhh}" - -# Unix date utility needs dates in yyyy-mm-dd hh:mm:ss format -unix_init_DATE="${iyyyy}-${imm}-${idd} ${ihh}:00:00" - -# This awk expression gets the last item of the list $FHR -fcst_length=$(echo ${FHR} | awk '{ print $NF }') - -echo -echo "BYE 00000000" -vdate_last=$($DATE_UTIL -d "${unix_init_DATE} ${fcst_length} hours" +%Y%m%d%H) -if [[ ${OBTYPE} == "NDAS" ]]; then -echo "BYE 111111111" - vhh_last=$(echo ${vdate_last} | cut -c9-10) - #hours_to_add=$(( vhh_last + 6 - (vhh_last % 6) )) - hours_to_add=$(( 6 - (vhh_last % 6) )) - fcst_length_rounded_up=$(( fcst_length + hours_to_add )) -# vdate_last_rounded_up=$($DATE_UTIL -d "${unix_init_DATE} ${fcst_length_rounded_up} hours" +%Y%m%d%H) -# fcst_length=${fcst_length_rounded_up} -fi - -# Make sure fcst_length isn't octal (leading zero) -fcst_length=$((10#${fcst_length})) - -processed_fp_list=() -current_fcst=0 -while [[ ${current_fcst} -le ${fcst_length} ]]; do - -echo -echo "HELLO BBBBBBBBBBB" -echo "current_fcst = ${current_fcst}" - - # Calculate valid date info using date utility - vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" +%Y%m%d%H) - unix_vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" "+%Y-%m-%d %H:00:00") - vyyyymmdd=$(echo ${vdate} | cut -c1-8) - vhh=$(echo ${vdate} | cut -c9-10) -echo -echo "BYE 222222222" -echo "vhh = ${vhh}" - - # Calculate valid date + 1 day; this is needed because some obs files - # are stored in the *next* day's 00h directory - vdate_p1d=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 day" +%Y%m%d%H) - vyyyymmdd_p1d=$(echo ${vdate_p1d} | cut -c1-8) - -echo -echo "HELLO CCCCCCCCCC" -echo "vyyyymmdd = ${vyyyymmdd}" -echo "vyyyymmdd_p1d = ${vyyyymmdd_p1d}" -echo "ihh = ${ihh}" - - #remove leading zero again, this time keep original - vhh_noZero=$((10#${vhh})) -# -#----------------------------------------------------------------------- -# -# Retrieve CCPA observations. -# -#----------------------------------------------------------------------- -# - if [[ ${OBTYPE} == "CCPA" ]]; then - - # CCPA is accumulation observations. We do not need to retrieve any - # observed accumulations at forecast hour 0 because there aren't yet - # any accumulations in the forecast(s) to compare it to. - if [[ ${current_fcst} -eq 0 ]]; then - current_fcst=$((current_fcst + 1)) - continue - fi - - # CCPA accumulation period to consider. Here, we only retrieve data for - # 01h accumulations (see note above). Other accumulations (03h, 06h, 24h) - # are obtained elsewhere in the workflow by adding up these 01h accumulations. - accum=01 - - # Base directory in which the daily subdirectories containing the CCPA - # grib2 files will appear after this script is done, and the daily such - # subdirectory for the current valid time (year, month, and day). We - # refer to these as the "processed" base and daily subdirectories because - # they contain the final files after all processing by this script is - # complete. - ccpa_basedir_proc=${OBS_DIR} - ccpa_day_dir_proc="${ccpa_basedir_proc}/${vyyyymmdd}" - # Make sure these directories exist. - mkdir -p ${ccpa_day_dir_proc} - - # Name of the grib2 file to extract from the archive (tar) file as well - # as the name of the processed grib2 file. - ccpa_fn="ccpa.t${vhh}z.${accum}h.hrap.conus.gb2" - - # Full path to the location of the processed CCPA grib2 file for the - # current valid time. Note that this path includes the valid date (year, - # month, and day) information in the name of a subdirectory and the valid - # hour-of-day in the name of the file. - ccpa_fp_proc="${ccpa_day_dir_proc}/${ccpa_fn}" - - # Store the full path to the processed file in a list for later use. - if [ ${vyyyymmdd}${vhh} -le ${vdate_last} ]; then - processed_fp_list+=(${ccpa_fp_proc}) - fi - - # Check if the CCPA grib2 file for the current valid time already exists - # at its procedded location on disk. If so, skip and go to the next valid - # time. If not, pull it. - if [[ -f "${ccpa_fp_proc}" ]]; then - - echo "${OBTYPE} file exists on disk:" - echo " ccpa_fp_proc = \"${ccpa_fp_proc}\"" - echo "Will NOT attempt to retrieve from remote locations." - - else - - echo "${OBTYPE} file does not exist on disk:" - echo " ccpa_fp_proc = \"${ccpa_fp_proc}\"" - echo "Will attempt to retrieve from remote locations." - # - #----------------------------------------------------------------------- - # - # Below, we will use the retrieve_data.py script to retrieve the CCPA - # grib2 file from a data store (e.g. HPSS). Before doing so, note the - # following: - # - # * The daily archive (tar) file containing CCPA obs has a name of the - # form - # - # [PREFIX].YYYYMMDD.tar - # - # where YYYYMMDD is a given year, month, and day combination, and - # [PREFIX] is a string that is not relevant to the discussion here - # (the value it can take on depends on which of several time periods - # YYYYMMDD falls in, and the retrieve_data.py tries various values - # until it finds one for which a tar file exists). Unintuitively, this - # archive file contains accumulation data for valid times starting at - # hour 19 of the PREVIOUS day (YYYYMM[DD-1]) to hour 18 of the current - # day (YYYYMMDD). In other words, the valid times of the contents of - # this archive file are shifted back by 6 hours relative to the time - # string appearing in the name of the file. See section "DETAILS..." - # for a detailed description of the directory structure in the CCPA - # archive files. - # - # * We call retrieve_data.py in a temporary cycle-specific subdirectory - # in order to prevent get_obs_ccpa tasks for different cycles from - # clobbering each other's output. We refer to this as the "raw" CCPA - # base directory because it contains files as they are found in the - # archives before any processing by this script. - # - # * In each (cycle-specific) raw base directory, the data is arranged in - # daily subdirectories with the same timing as in the archive (tar) - # files (which are described in the section "DETAILS..." below). In - # particular, each daily subdirectory has the form YYYYMDD, and it may - # contain CCPA grib2 files for accumulations valid at hour 19 of the - # previous day (YYYYMM[DD-1]) to hour 18 of the current day (YYYYMMDD). - # (Data valid at hours 19-23 of the current day (YYYYMMDD) go into the - # daily subdirectory for the next day, i.e. YYYYMM[DD+1].) We refer - # to these as raw daily (sub)directories to distinguish them from the - # processed daily subdirectories under the processed (final) CCPA base - # directory (ccpa_basedir_proc). - # - # * For a given cycle, some of the valid times at which there is forecast - # output may not have a corresponding file under the raw base directory - # for that cycle. This is because another cycle that overlaps this cycle - # has already obtained the grib2 CCPA file for that valid time and placed - # it in its processed location; as a result, the retrieveal of that grib2 - # file for this cycle is skipped. - # - # * To obtain a more intuitive temporal arrangement of the data in the - # processed CCPA directory structure than the temporal arrangement used - # in the archives and raw directories, we process the raw files such - # that the data in the processed directory structure is shifted forward - # in time 6 hours relative to the data in the archives and raw directories. - # This results in a processed base directory that, like the raw base - # directory, also contains daily subdirectories of the form YYYYMMDD, - # but each such subdirectory may only contain CCPA data at valid hours - # within that day, i.e. at valid times YYYYMMDD[00, 01, ..., 23] (but - # may not contain data that is valid on the previous, next, or any other - # day). - # - # * For data between 20180718 and 20210504, the 01h accumulation data - # (which is the only accumulation we are retrieving) have incorrect - # metadata under the "00" directory in the archive files (meaning for - # hour 00 and hours 19-23, which are the ones in the "00" directory). - # Below, we use wgrib2 to make a correction for this when transferring - # (moving or copying) grib2 files from the raw daily directories to - # the processed daily directories. - # - # - # DETAILS OF DIRECTORY STRUCTURE IN CCPA ARCHIVE (TAR) FILES - # ---------------------------------------------------------- - # - # The daily archive file containing CCPA obs is named - # - # [PREFIX].YYYYMMDD.tar - # - # This file contains accumulation data for valid times starting at hour - # 19 of the PREVIOUS day (YYYYMM[DD-1]) to hour 18 of the current day - # (YYYYMMDD). In particular, when untarred, the daily archive file - # expands into four subdirectories: 00, 06, 12, and 18. The 06, 12, and - # 18 subdirectories contain grib2 files for accumulations valid at or - # below the hour-of-day given by the subdirectory name (and on YYYYMMDD). - # For example, the 06 directory contains data valid at: - # - # * YYYYMMDD[01, 02, 03, 04, 05, 06] for 01h accumulations; - # * YYYYMMDD[03, 06] for 03h accumulations; - # * YYYYMMDD[06] for 06h accumulations. - # - # The valid times for the data in the 12 and 18 subdirectories are - # analogous. However, the 00 subdirectory is different in that it - # contains accumulations at hour 00 on YYYYMMDD as well as ones BEFORE - # this time, i.e. the data for valid times other than YYYYMMDD00 are on - # the PREVIOUS day. Thus, the 00 subdirectory contains data valid at - # (note the DD-1, meaning one day prior): - # - # * YYYYMM[DD-1][19, 20, 21, 22, 23] and YYYYMMDD00 for 01h accumulations; - # * YYYYMM[DD-1][19] and YYYYMMDD00 for 03h accumulations; - # * YYYYMMDD00 for 06h accumulations. - # - #----------------------------------------------------------------------- - # - - # Set parameters for retrieving CCPA data using retrieve_data.py. - # Definitions: - # - # valid_time: - # The valid time in the name of the archive (tar) file from which data - # will be pulled. Due to the way the data is arranged in the CCPA archive - # files (as described above), for valid hours 19 to 23 of the current day, - # this must be set to the corresponding valid time on the NEXT day. - # - # ccpa_basedir_raw: - # Raw base directory that will contain the raw daily subdirectory in which - # the retrieved CCPA grib2 file will be placed. Note that this must be - # cycle-dependent (where the cycle is given by the variable iyyyymmddhh) - # to avoid get_obs_ccpa workflow tasks for other cycles writing to the - # same directories/files. Note also that this doesn't have to depend on - # the current valid hour (0-18 vs. 19-23), but for clarity and ease of - # debugging, here we do make it valid-hour-dependent. - # - # ccpa_day_dir_raw: - # Raw daily subdirectory under the raw base directory. This is dependent - # on the valid hour (i.e. different for hours 19-23 than for hours 0-18) - # in order to maintain the same data timing arrangement in the raw daily - # directories as in the archive files. - # - if [[ ${vhh_noZero} -ge 0 && ${vhh_noZero} -le 18 ]]; then - valid_time=${vyyyymmdd}${vhh} - ccpa_basedir_raw="${ccpa_basedir_proc}/raw_cyc${iyyyymmddhh}" - ccpa_day_dir_raw="${ccpa_basedir_raw}/${vyyyymmdd}" - elif [[ ${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23 ]]; then - valid_time=${vyyyymmdd_p1d}${vhh} - ccpa_basedir_raw="${ccpa_basedir_proc}/raw_cyc${iyyyymmddhh}_vhh19-23" - ccpa_day_dir_raw="${ccpa_basedir_raw}/${vyyyymmdd_p1d}" - fi - mkdir -p ${ccpa_day_dir_raw} - - # Before calling retrieve_data.py, change location to the raw base - # directory to avoid get_obs_ccpa tasks for other cycles from clobbering - # the output from this call to retrieve_data.py. Note that retrieve_data.py - # extracts the CCPA tar files into the directory it was called from, - # which is the working directory of this script right before retrieve_data.py - # is called. - cd ${ccpa_basedir_raw} - - # Pull CCPA data from HPSS. This will get a single grib2 (.gb2) file - # corresponding to the current valid time (valid_time). - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${valid_time} \ - --data_stores hpss \ - --data_type CCPA_obs \ - --output_path ${ccpa_day_dir_raw} \ - --summary_file ${logfile}" - - echo "CALLING: ${cmd}" - $cmd || print_err_msg_exit "\ - Could not retrieve CCPA data from HPSS. - - The following command exited with a non-zero exit status: - ${cmd} -" - - # Create the processed CCPA grib2 files. This usually consists of just - # moving or copying the raw file to its processed location, but for valid - # times between 20180718 and 20210504, it involves using wgrib2 to correct - # an error in the metadata of the raw file and writing the corrected data - # to a new grib2 file in the processed location. - # - # Since this script is part of a workflow, another get_obs_ccpa task (i.e. - # for another cycle) may have extracted and placed the current file in its - # processed location between the time we checked for its existence above - # (and didn't find it) and now. This can happen because there can be - # overlap between the verification times for the current cycle and those - # of other cycles. For this reason, check again for the existence of the - # processed file. If it has already been created by another get_obs_ccpa - # task, don't bother to recreate it. - if [[ -f "${ccpa_fp_proc}" ]]; then - - echo "${OBTYPE} file exists on disk:" - echo " ccpa_fp_proc = \"{ccpa_fp_proc}\"" - echo "It was likely created by a get_obs_ccpa workflow task for another cycle that overlaps the current one." - echo "NOT moving or copying file from its raw location to its processed location." - - else - - # Full path to the CCPA file that was pulled and extracted above and - # placed in the raw directory. - ccpa_fp_raw="${ccpa_day_dir_raw}/${ccpa_fn}" - - #mv_or_cp="mv" - mv_or_cp="cp" - if [[ ${vhh_noZero} -ge 1 && ${vhh_noZero} -le 18 ]]; then - ${mv_or_cp} ${ccpa_fp_raw} ${ccpa_fp_proc} - elif [[ (${vhh_noZero} -eq 0) || (${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23) ]]; then - # One hour CCPA files have incorrect metadata in the files under the "00" - # directory from 20180718 to 20210504. After data is pulled, reorganize - # into correct valid yyyymmdd structure. - if [[ ${vyyyymmdd} -ge 20180718 && ${vyyyymmdd} -le 20210504 ]]; then - wgrib2 ${ccpa_fp_raw} -set_date -24hr -grib ${ccpa_fp_proc} -s - else - ${mv_or_cp} ${ccpa_fp_raw} ${ccpa_fp_proc} - fi - fi - - fi - - fi -# -#----------------------------------------------------------------------- -# -# Retrieve MRMS observations. -# -#----------------------------------------------------------------------- -# - elif [[ ${OBTYPE} == "MRMS" ]]; then - - # Base directory in which the daily subdirectories containing the MRMS - # grib2 files for REFC (composite reflectivity) and REFC (echo top) will - # be located after this script is done, and the daily such subdirectory - # for the current valid time (year, month, and day). We refer to these - # as the "processed" base and daily subdirectories because they contain - # the final files after all processing by this script is complete. - mrms_basedir_proc=${OBS_DIR} - mrms_day_dir_proc="${mrms_basedir_proc}/${vyyyymmdd}" - - # Loop over the fields (REFC and RETOP). - for field in ${VAR[@]}; do - - # Set field-dependent parameters needed in forming grib2 file names. - if [ "${field}" = "REFC" ]; then - file_base_name="MergedReflectivityQCComposite" - level="_00.50_" - elif [ "${field}" = "RETOP" ]; then - file_base_name="EchoTop" - level="_18_00.50_" - else - echo "Invalid field: ${field}" - print_err_msg_exit "\ - Invalid field specified: ${field} - - Valid options are 'REFC', 'RETOP'. -" - fi - - # Name of the MRMS grib2 file for the current field and valid time that - # will appear in the processed daily subdirectory after this script finishes. - # This is the name of the processed file. Note that this is generally - # not the name of the gzipped grib2 files that may be retrieved below - # from archive files using the retrieve_data.py script. - mrms_fn="${file_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" - - # Full path to the processed MRMS grib2 file for the current field and - # valid time. - mrms_fp_proc="${mrms_day_dir_proc}/${mrms_fn}" - - # Store the full path to the processed file in a list for later use. - if [ ${vyyyymmdd}${vhh} -le ${vdate_last} ]; then - processed_fp_list+=(${mrms_fp_proc}) - fi - - # Check if the processed MRMS grib2 file for the current field and valid - # time already exists on disk. If so, skip this valid time and go to the - # next one. If not, pull it. - if [[ -f "${mrms_fp_proc}" ]]; then - - echo "${OBTYPE} file exists on disk:" - echo " mrms_fp_proc = \"${mrms_fp_proc}\"" - echo "Will NOT attempt to retrieve from remote locations." - - else - - echo "${OBTYPE} file does not exist on disk:" - echo " mrms_fp_proc = \"${mrms_fp_proc}\"" - echo "Will attempt to retrieve from remote locations." - - # Base directory that will contain the daily subdirectories in which the - # gzipped MRMS grib2 files retrieved from archive files will be placed, - # and the daily subdirectory for the current valid year, month, and day. - # We refer to these as the "raw" MRMS base and daily directories because - # they contain files as they are found in the archives before any processing - # by this script. - # - # Note that the name of the raw base directory depends on (contains) the - # valid year, month, and day (but not on the cycle, i.e. not on iyyyymmddhh) - # in order to avoid having get_obs_mrms tasks from other cycles clobbering - # the output from this one. It is also possible to make the name of this - # directory name depend instead on the cycle, but that turns out to cause - # an inefficiency in that get_obs_mrms tasks for different cycles will - # not be able to detect that another cycle has already retrieved the data - # for the current valid day will unnecessarily repeat the retrieval. - mrms_basedir_raw="${mrms_basedir_proc}/raw_day${vyyyymmdd}" - mrms_day_dir_raw="${mrms_basedir_raw}/${vyyyymmdd}" - - # Check if the raw daily directory already exists on disk. If so, it - # means all the gzipped MRMS grib2 files -- i.e. for both REFC and RETOP - # and for all times (hours, minutes, and seconds) in the current valid - # day -- have already been or are in the process of being retrieved from - # the archive (tar) files. If so, skip the retrieval process. If not, - # proceed to retrieve all the files and place them in the raw daily - # directory. - # - # Note that despite the check on the existence of the raw daily directory - # below, it is possible for two get_obs_mrms tasks to try to retrieve - # obs for the same day. To minimize this possibility, sleep for a random - # number of seconds (with a maximum wait of maxwait seconds set below) - # before performing the directory existence check - maxwait=30 - sleep_duration_secs=$((RANDOM % maxwait)) - echo "Sleeping for ${sleep_duration_secs} seconds..." - sleep "${sleep_duration_secs}s" - - if [[ -d "${mrms_day_dir_raw}" ]]; then - - echo "${OBTYPE} raw daily directory for day ${vyyyymmdd} exists on disk:" - echo " mrms_day_dir_raw = \"${mrms_day_dir_raw}\"" - echo "This means MRMS files for all hours of the current valid day (${vyyyymmdd}) have been or are being retrieved." - echo "Thus, we will NOT attempt to retrieve MRMS data for the current valid time from remote locations." - - else - - mkdir -p ${mrms_day_dir_raw} - valid_time=${vyyyymmdd}${vhh} - - # Before calling retrieve_data.py, change location to the raw base - # directory to avoid get_obs_mrms tasks for other cycles from clobbering - # the output from this call to retrieve_data.py. Note that retrieve_data.py - # extracts the MRMS tar files into the directory it was called from, - # which is the working directory of this script right before retrieve_data.py - # is called. - cd ${mrms_basedir_raw} - - # Use the retrieve_data.py script to retrieve all the gzipped MRMS grib2 - # files -- i.e. for both REFC and RETOP and for all times (hours, minutes, - # and seconds) in the current valid day -- and place them in the raw daily - # directory. Note that this will pull both the REFC and RETOP files in - # one call. - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${valid_time} \ - --data_stores hpss \ - --data_type MRMS_obs \ - --output_path ${mrms_day_dir_raw} \ - --summary_file ${logfile}" - - echo "CALLING: ${cmd}" - - $cmd || print_err_msg_exit "\ - Could not retrieve MRMS data from HPSS - - The following command exited with a non-zero exit status: - ${cmd} -" - - # Create a flag file that can be used to confirm the completion of the - # retrieval of all files for the current valid day. - touch ${mrms_day_dir_raw}/pull_completed.txt - - fi - - # Make sure the retrieval process for the current day (which may have - # been executed above for this cycle or for another cycle) has completed - # by checking for the existence of the flag file that marks completion. - # If not, keep checking until the flag file shows up. - while [[ ! -f "${mrms_day_dir_raw}/pull_completed.txt" ]]; do - echo "Waiting for the retrieval process for valid day ${vyyyymmdd} to complete..." - sleep 5s - done - - # Since this script is part of a workflow, another get_obs_mrms task (i.e. - # for another cycle) may have extracted and placed the current file in its - # processed location between the time we checked for its existence above - # (and didn't find it) and now. This can happen because there can be - # overlap between the verification times for the current cycle and those - # of other cycles. For this reason, check again for the existence of the - # processed file. If it has already been created by another get_obs_mrms - # task, don't bother to recreate it. - if [[ -f "${mrms_fp_proc}" ]]; then - - echo "${OBTYPE} file exists on disk:" - echo " mrms_fp_proc = \"${mrms_fp_proc}\"" - echo "Will NOT attempt to retrieve from remote locations." - - else - - # Search the raw daily directory for the current valid day to find the - # gizipped MRMS grib2 file whose time stamp (in the file name) is closest - # to the current valid day and hour. Then unzip that file and copy it - # to the processed daily directory, in the process renaming it to replace - # the minutes and hours in the file name with "0000". - valid_time=${vyyyymmdd}${vhh} - python ${USHdir}/mrms_pull_topofhour.py \ - --valid_time ${valid_time} \ - --outdir ${mrms_basedir_proc} \ - --source ${mrms_basedir_raw} \ - --product ${file_base_name} - - fi - - fi - - done -# -#----------------------------------------------------------------------- -# -# Retrieve NDAS observations. -# -#----------------------------------------------------------------------- -# - elif [[ ${OBTYPE} == "NDAS" ]]; then - - # Calculate valid date plus 1 hour. This is needed because we need to - # check whether this date corresponds to one of the valid hours-of-day - # 00, 06, 12, and 18 on which the NDAS archives are provided. - unix_vdate_p1h=$($DATE_UTIL -d "${unix_init_DATE} $((current_fcst+1)) hours" "+%Y-%m-%d %H:00:00") - vdate_p1h=$($DATE_UTIL -d "${unix_vdate_p1h}" +%Y%m%d%H) - vyyyymmdd_p1h=$(echo ${vdate_p1h} | cut -c1-8) - vhh_p1h=$(echo ${vdate_p1h} | cut -c9-10) - vhh_p1h_noZero=$((10#${vhh_p1h})) - -echo "" -echo "HELLO DDDDDDDDDDD" -echo "vdate = ${vdate}" -echo "vyyyymmdd = ${vyyyymmdd}" -echo "vhh = ${vhh}" -echo "vhh_noZero = ${vhh_noZero}" -echo "vdate = ${vdate}" -echo "vdate_p1h = ${vdate_p1h}" - - # Base directory in which the hourly NDAS prepbufr files will be located. - # We refer to this as the "processed" base directory because it contains - # the final files after all processing by this script is complete. - ndas_basedir_proc=${OBS_DIR} - - # Name of the NDAS prepbufr file for the current valid time that will - # appear in the processed daily subdirectory after this script finishes. - # This is the name of the processed file. Note that this is not the - # same as the name of the raw file, i.e. the file extracted from the - # archive (tar) file retrieved below by the retrieve_data.py script. - ndas_fn="prepbufr.ndas.${vdate}" - - # Full path to the processed NDAS prepbufr file for the current field and - # valid time. - ndas_fp_proc="${ndas_basedir_proc}/${ndas_fn}" - - # Store the full path to the processed file in a list for later use. -echo -echo "EEEEEEEEEEEEEE" - if [ ${vdate} -le ${vdate_last} ]; then -echo "FFFFFFFFFFFFFF" -echo "processed_fp_list = |${processed_fp_list[@]}" - processed_fp_list+=(${ndas_fp_proc}) - fi - - # Check if the processed NDAS prepbufr file for the current valid time - # already exists on disk. If so, skip this valid time and go to the next - # one. - if [[ -f "${ndas_fp_proc}" ]]; then - - echo "${OBTYPE} file exists on disk:" - echo " ndas_fp_proc = \"${ndas_fp_proc}\"" - echo "Will NOT attempt to retrieve from remote locations." - - else - - echo "${OBTYPE} file does not exist on disk:" - echo " ndas_fp_proc = \"${ndas_fp_proc}\"" - echo "Will attempt to retrieve from remote locations." - # NDAS data is available in 6-hourly combined tar files, each with 7 1-hour prepbufr files: - # nam.tHHz.prepbufr.tm00.nr, nam.tHHz.prepbufr.tm01.nr, ... , nam.tHHz.prepbufr.tm06.nr - # - # The "tm" here means "time minus", so nam.t12z.prepbufr.tm00.nr is valid for 12z, - # nam.t00z.prepbufr.tm03.nr is valid for 21z the previous day, etc. - # This means that every six hours we have two obs files valid for the same time: - # nam.tHHz.prepbufr.tm00.nr and nam.t[HH+6]z.prepbufr.tm06.nr - # We want to use the tm06 file because it contains more/better obs (confirmed with EMC: even - # though the earlier files are larger, this is because the time window is larger) - - # Whether to move or copy extracted files from the raw directories to their - # final locations. - #mv_or_cp="mv" - mv_or_cp="cp" - -echo "" -echo "HELLO GGGGGGGGGGGGG" -echo "vhh_noZero = ${vhh_noZero}" -echo "vhh_p1h_noZero = ${vhh_p1h_noZero}" - - # Due to the way NDAS archives are organized, we can only retrieve the - # archive (tar) file containing data for the current valid hour (and the - # 5 hours preceeding it) if the hour-of-day corresponding to the current - # valid time plus 1 hour corresponds to one of 0, 6, 12, and 18. - if [[ ${vhh_p1h_noZero} -eq 0 || ${vhh_p1h_noZero} -eq 6 || \ - ${vhh_p1h_noZero} -eq 12 || ${vhh_p1h_noZero} -eq 18 || \ - ${current_fcst} -eq ${fcst_length} ]]; then - - if [[ ${vhh_p1h_noZero} -eq 0 || ${vhh_p1h_noZero} -eq 6 || \ - ${vhh_p1h_noZero} -eq 12 || ${vhh_p1h_noZero} -eq 18 ]]; then - unix_vdate_archive="${unix_vdate_p1h}" - vdate_archive="${vdate_p1h}" - vyyyymmdd_archive="${vyyyymmdd_p1h}" - vhh_archive=${vhh_p1h} - elif [[ ${current_fcst} -eq ${fcst_length} ]]; then - hours_to_archive=$(( 6 - (vhh % 6) )) - unix_vdate_archive=$($DATE_UTIL -d "${unix_vdate} ${hours_to_archive} hours" "+%Y-%m-%d %H:00:00") - vdate_archive=$($DATE_UTIL -d "${unix_vdate} ${hours_to_archive} hours" +%Y%m%d%H) - vyyyymmdd_archive=$(echo ${vdate_archive} | cut -c1-8) - vhh_archive=$(echo ${vdate_archive} | cut -c9-10) - fi - - # Base directory that will contain the 6-hourly subdirectories in which - # the NDAS prepbufr files retrieved from archive files will be placed, - # and the 6-hourly subdirectory for the current valid time plus 1 hour. - # We refer to these as the "raw" NDAS base and 6-hourly directories - # because they contain files as they are found in the archives before - # any processing by this script. - ndas_basedir_raw="${ndas_basedir_proc}/raw_day${vyyyymmdd_archive}" - ndas_day_dir_raw="${ndas_basedir_raw}/${vdate_archive}" - - # Check if the raw 6-hourly directory already exists on disk. If so, it - # means the NDAS prepbufr files for the current valid hour and the 5 hours - # preceeding it have already been or are in the process of being retrieved - # from the archive (tar) files. If so, skip the retrieval process. If - # not, proceed to retrieve the archive file, extract the prepbufr files - # from it, and place them in the raw daily directory. - # - # Note that despite the check on the existence of the raw daily directory - # below, it is possible for two get_obs_mrms tasks to try to retrieve - # obs for the same day. To minimize this possibility, sleep for a random - # number of seconds (with a maximum wait of maxwait seconds set below) - # before performing the directory existence check - maxwait=30 - sleep_duration_secs=$((RANDOM % maxwait)) - echo "Sleeping for ${sleep_duration_secs} seconds..." - sleep "${sleep_duration_secs}s" - - if [[ -d "${ndas_day_dir_raw}" ]]; then - - print_info_msg " -${OBTYPE} raw 6-hourly directory ${vdate_archive} exists on disk: - ndas_day_dir_raw = \"${ndas_day_dir_raw}\" -This means NDAS files for the current valid time (${vdate}) and the -5 hours preceeding it have been or are being retrieved by a get_obs_ndas -workflow task for another cycle. Thus, we will NOT attempt to retrieve -NDAS data for the current valid time from remote locations." - - else - - mkdir -p ${ndas_day_dir_raw} - - # Before calling retrieve_data.py, change location to the raw base - # directory to avoid get_obs_ndas tasks for other cycles from clobbering - # the output from this call to retrieve_data.py. Note that retrieve_data.py - # extracts the NDAS prepbufr files the archive into the directory it was - # called from, which is the working directory of this script right before - # retrieve_data.py is called. - cd ${ndas_basedir_raw} - - # Use the retrieve_data.py script to retrieve all the NDAS prepbufr files - # for the current valid hour and the 5 hours preceeding it and place them - # in the raw 6-hourly directory. - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vdate_archive} \ - --data_stores hpss \ - --data_type NDAS_obs \ - --output_path ${ndas_day_dir_raw} \ - --summary_file ${logfile}" - - echo "CALLING: ${cmd}" - - $cmd || print_err_msg_exit "\ - Could not retrieve NDAS data from HPSS - - The following command exited with a non-zero exit status: - ${cmd} -" - - # Create a flag file that can be used to confirm the completion of the - # retrieval of all files for the 6-hour interval ending in vdate_archive. - touch ${ndas_day_dir_raw}/pull_completed.txt - - fi - - # Make sure the retrieval process for the 6-hour interval ending in - # vdate_archive (which may have been executed above for this cycle or for - # another cycle) has completed by checking for the existence of the flag - # file that marks completion. If not, keep checking until the flag file - # shows up. - while [[ ! -f "${ndas_day_dir_raw}/pull_completed.txt" ]]; do - echo "Waiting for completion of the NDAS obs retrieval process for the" - echo "6-hour interval ending on ${vdate_archive} ..." - sleep 5s - done - - # Since this script is part of a workflow, another get_obs_ndas task (i.e. - # for another cycle) may have extracted and placed the current file in its - # processed location between the time we checked for its existence above - # (and didn't find it) and now. This can happen because there can be - # overlap between the verification times for the current cycle and those - # of other cycles. For this reason, check again for the existence of the - # processed file. If it has already been created by another get_obs_ndas - # task, don't bother to recreate it. - if [[ -f "${ndas_fp_proc}" ]]; then - - echo "${OBTYPE} file exists on disk:" - echo " ndas_fp_proc = \"${ndas_fp_proc}\"" - echo "Will NOT attempt to retrieve from remote locations." - - else - - # Create the processed NDAS prepbufr files for the current valid hour as - # well as the preceeding 5 hours (or fewer if they're outside the time - # interval of the forecast) by copying or moving (and in the process - # renaming) them from the raw 6-hourly directory. In the following loop, - # "tm" means "time minus". Note that the tm06 files contain more/better - # observations than tm00 for the equivalent time. - for tm in $(seq 6 -1 1); do -# for tm in $(seq --format="%02g" 6 -1 1); do - vdate_archive_tm=$($DATE_UTIL -d "${unix_vdate_archive} ${tm} hours ago" +%Y%m%d%H) - if [[ ${vdate_archive_tm} -le ${vdate_last} ]]; then - tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') - ${mv_or_cp} ${ndas_day_dir_raw}/nam.t${vhh_archive}z.prepbufr.tm${tm2}.nr \ - ${ndas_basedir_proc}/prepbufr.ndas.${vdate_archive_tm} - fi - done - - fi - - fi - - fi -# -#----------------------------------------------------------------------- -# -# Retrieve NOHRSC observations. # #----------------------------------------------------------------------- # - elif [[ ${OBTYPE} == "NOHRSC" ]]; then - - #NOHRSC is accumulation observations, so none to retrieve for hour zero - if [[ ${current_fcst} -eq 0 ]]; then - current_fcst=$((${current_fcst} + 1)) - continue - fi - - # Reorganized NOHRSC location (no need for raw data dir) - nohrsc_proc=${OBS_DIR} - - nohrsc06h_file="$nohrsc_proc/${vyyyymmdd}/sfav2_CONUS_06h_${vyyyymmdd}${vhh}_grid184.grb2" - nohrsc24h_file="$nohrsc_proc/${vyyyymmdd}/sfav2_CONUS_24h_${vyyyymmdd}${vhh}_grid184.grb2" - retrieve=0 - # If 24-hour files should be available (at 00z and 12z) then look for both files - # Otherwise just look for 6hr file - if (( ${current_fcst} % 12 == 0 )) && (( ${current_fcst} >= 24 )) ; then - if [[ ! -f "${nohrsc06h_file}" || ! -f "${nohrsc24h_file}" ]] ; then - retrieve=1 - echo "${OBTYPE} files do not exist on disk:" - echo "${nohrsc06h_file}" - echo "${nohrsc24h_file}" - echo "Will attempt to retrieve from remote locations" - else - echo "${OBTYPE} files exist on disk:" - echo "${nohrsc06h_file}" - echo "${nohrsc24h_file}" - fi - elif (( ${current_fcst} % 6 == 0 )) ; then - if [[ ! -f "${nohrsc06h_file}" ]]; then - retrieve=1 - echo "${OBTYPE} file does not exist on disk:" - echo "${nohrsc06h_file}" - echo "Will attempt to retrieve from remote locations" - else - echo "${OBTYPE} file exists on disk:" - echo "${nohrsc06h_file}" - fi - fi - if [ $retrieve == 1 ]; then - if [[ ! -d "$nohrsc_proc/${vyyyymmdd}" ]]; then - mkdir -p $nohrsc_proc/${vyyyymmdd} - fi - - # Pull NOHRSC data from HPSS; script will retrieve all files so only call once - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vyyyymmdd}${vhh} \ - --data_stores hpss \ - --data_type NOHRSC_obs \ - --output_path $nohrsc_proc/${vyyyymmdd} \ - --summary_file ${logfile}" - - echo "CALLING: ${cmd}" - - $cmd || print_err_msg_exit "\ - Could not retrieve NOHRSC data from HPSS - - The following command exited with a non-zero exit status: - ${cmd} +if [[ ${OBTYPE} == "CCPA" ]]; then + $USHdir/get_obs_ccpa.sh +elif [[ ${OBTYPE} == "MRMS" ]]; then + $USHdir/get_obs_mrms.sh +elif [[ ${OBTYPE} == "NDAS" ]]; then + $USHdir/get_obs_ndas.sh +elif [[ ${OBTYPE} == "NOHRSC" ]]; then + $USHdir/get_obs_nohrsc.sh +else + print_err_msg_exit "\ +Invalid OBTYPE specified for script: + OBTYPE = \"${OBTYPE}\" +Valid options are CCPA, MRMS, NDAS, and NOHRSC. " - # 6-hour forecast needs to be renamed - mv $nohrsc_proc/${vyyyymmdd}/sfav2_CONUS_6h_${vyyyymmdd}${vhh}_grid184.grb2 ${nohrsc06h_file} - fi - - else - print_err_msg_exit "\ - Invalid OBTYPE specified for script; valid options are CCPA, MRMS, NDAS, and NOHRSC - " - fi # Increment to next forecast hour - - # Increment to next forecast hour - echo "Finished fcst hr=${current_fcst}" - current_fcst=$((${current_fcst} + 1)) - -done -echo "SSSSSSSSSSSSSSSS" -# -#----------------------------------------------------------------------- -# -# At this point, the processed data files for all output forecast hours -# for this cycle are either being created (by a get_obs_... task for -# another cycle) or have already been created (either by this get_obs_... -# task or one for another cycle). In case they are still being created, -# make sure they have in fact been created before exiting this script. -# If we don't do this, it is possible for this get_obs_... task to complete -# successfully but still have processed obs files for some forecast hours -# not yet created, which is undesirable. -# -#----------------------------------------------------------------------- -# -echo "HHHHHHHHHHHHHHHH" -echo "processed_fp_list = |${processed_fp_list[@]}" -num_proc_files=${#processed_fp_list[@]} -echo "num_proc_files = ${num_proc_files}" -for (( i=0; i<${num_proc_files}; i++ )); do - obs_fp="${processed_fp_list[$i]}" - while [[ ! -f "${obs_fp}" ]]; do - echo "Waiting for ${OBTYPE} file to be created on disk (by a get_obs_... workflow task for another cycle):" - echo " obs_fp = \"${obs_fp}\"" - sleep 5s - done -done +fi # #----------------------------------------------------------------------- # -# Clean up raw directories. +# Create flag file that indicates completion of task. This is needed by +# the workflow. # #----------------------------------------------------------------------- # -#remove_raw="TRUE" -remove_raw="FALSE" -if [ "${remove_raw}" = "TRUE" ]; then - rm -rf ${OBS_DIR}/raw_* -fi +obtype=$(echo_lowercase ${OBTYPE}) +mkdir -p ${WFLOW_FLAG_FILES_DIR} +touch "${WFLOW_FLAG_FILES_DIR}/get_obs_${obtype}_${PDY}_complete.txt" # #----------------------------------------------------------------------- # From 2ee3a46d525bb25eda4137f36df91d516fe790d5 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 17:49:09 -0600 Subject: [PATCH 057/260] Add ceil.sh to the set of scripts/functions that are sourced by source_util_funcs.sh. --- ush/source_util_funcs.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ush/source_util_funcs.sh b/ush/source_util_funcs.sh index 7fe3025d6a..3884793fea 100644 --- a/ush/source_util_funcs.sh +++ b/ush/source_util_funcs.sh @@ -96,6 +96,15 @@ function source_util_funcs() { # #----------------------------------------------------------------------- # +# Source the file containing the function that returns the ceiling of +# the quotient of two positive integers. +# +#----------------------------------------------------------------------- +# + . ${bashutils_dir}/ceil.sh +# +#----------------------------------------------------------------------- +# # Source the file containing the functions that will echo given strings # as uppercase or lowercase # From d60f6511d4b221fc98edd8473374c4cb449cd58e Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 17:55:54 -0600 Subject: [PATCH 058/260] Changes to reflect the fact that the output of PcpCombine_obs tasks now goes into a cycle-based subdirectory (under the experiment directory), as opposed to a obs day based subdirectory. --- ...onal_run_met_genensprod_or_ensemblestat.sh | 2 +- ...gional_run_met_gridstat_or_pointstat_vx.sh | 9 +++--- ...un_met_gridstat_or_pointstat_vx_ensprob.sh | 2 +- scripts/exregional_run_met_pcpcombine.sh | 28 ++++--------------- 4 files changed, 13 insertions(+), 28 deletions(-) diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 5034369851..24f12be786 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -131,7 +131,7 @@ if [ "${grid_or_point}" = "grid" ]; then case "${FIELDNAME_IN_MET_FILEDIR_NAMES}" in "APCP"*) - OBS_INPUT_DIR="${vx_output_basedir}/metprd/PcpCombine_obs" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" OBS_INPUT_FN_TEMPLATE="${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" FCST_INPUT_DIR="${vx_output_basedir}" ;; diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index e54dd7b553..f833ee534c 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -132,6 +132,7 @@ time_lag=$( bc -l <<< "${ENS_TIME_LAG_HRS[$i]}*${SECS_PER_HOUR}" ) # vx_fcst_input_basedir=$( eval echo "${VX_FCST_INPUT_BASEDIR}" ) vx_output_basedir=$( eval echo "${VX_OUTPUT_BASEDIR}" ) + ensmem_indx=$(printf "%0${VX_NDIGITS_ENSMEM_NAMES}d" $(( 10#${ENSMEM_INDX}))) ensmem_name="mem${ensmem_indx}" if [ "${RUN_ENVIR}" = "nco" ]; then @@ -162,15 +163,15 @@ if [ "${grid_or_point}" = "grid" ]; then case "${FIELDNAME_IN_MET_FILEDIR_NAMES}" in "APCP"*) - OBS_INPUT_DIR="${vx_output_basedir}/metprd/PcpCombine_obs" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" OBS_INPUT_FN_TEMPLATE="${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" - FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/${slash_ensmem_subdir_or_null}/metprd/PcpCombine_fcst" + FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}/metprd/PcpCombine_fcst" FCST_INPUT_FN_TEMPLATE="${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "ASNOW"*) OBS_INPUT_DIR="${OBS_DIR}" OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" - FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/${slash_ensmem_subdir_or_null}/metprd/PcpCombine_fcst" + FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}/metprd/PcpCombine_fcst" FCST_INPUT_FN_TEMPLATE="${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "REFC") @@ -198,7 +199,7 @@ fi OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_INPUT_FN_TEMPLATE} ) FCST_INPUT_FN_TEMPLATE=$( eval echo ${FCST_INPUT_FN_TEMPLATE} ) -OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}/${slash_ensmem_subdir_or_null}" +OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}" STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 5952ed3785..23bafe91fd 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -119,7 +119,7 @@ if [ "${grid_or_point}" = "grid" ]; then case "${FIELDNAME_IN_MET_FILEDIR_NAMES}" in "APCP"*) - OBS_INPUT_DIR="${vx_output_basedir}/metprd/PcpCombine_obs" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" OBS_INPUT_FN_TEMPLATE="${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "ASNOW"*) diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 20ae1a9794..ee999d1de3 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -163,6 +163,9 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then slash_ensmem_subdir_or_null="" fi fi +elif [ "${FCST_OR_OBS}" = "OBS" ]; then + slash_cdate_or_null="/${CDATE}" + slash_ensmem_subdir_or_null="/obs" fi OBS_INPUT_DIR="" @@ -175,7 +178,7 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then FCST_INPUT_DIR="${vx_fcst_input_basedir}" FCST_INPUT_FN_TEMPLATE=$( eval echo ${FCST_SUBDIR_TEMPLATE:+${FCST_SUBDIR_TEMPLATE}/}${FCST_FN_TEMPLATE} ) - OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}/${slash_ensmem_subdir_or_null}" + OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_fcst" OUTPUT_FN_TEMPLATE=$( eval echo ${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT} ) STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" @@ -185,7 +188,7 @@ elif [ "${FCST_OR_OBS}" = "OBS" ]; then OBS_INPUT_DIR="${OBS_DIR}" OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_CCPA_APCP_FN_TEMPLATE} ) - OUTPUT_BASE="${vx_output_basedir}" + OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_obs" OUTPUT_FN_TEMPLATE=$( eval echo ${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT} ) STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" @@ -212,28 +215,9 @@ elif [ "${FCST_OR_OBS}" = "OBS" ]; then num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" fi -# If processing obs, then for all cylces except the last one, calculate -# a "forecast length" that will hours up to but not including the initial -# (zeroth) hour of the next cycle. For the last cycle, take the "forecast -# length" of the obs to be the same as that of the forecast for the cycle. -# This ensures that the PcpCombine_obs tasks for different cycles do not -# overwrite or clobber output from another cycle (because with this -# approach, the valid times on which the current PcpCombine_obs task is -# operating is distinct from the ones for the PcpCombine_obs tasks for -# every other cycle). -fcst_len_hrs="${FCST_LEN_HRS}" -if [ "${FCST_OR_OBS}" = "OBS" ]; then - yyyymmddhhmn="${PDY}${cyc}00" - if [ ${yyyymmddhhmn} -lt ${DATE_LAST_CYCL} ] && \ - [ ${FCST_LEN_HRS} -ge ${INCR_CYCL_FREQ} ]; then - output_incr_hrs="1" - fcst_len_hrs=$((INCR_CYCL_FREQ - output_incr_hrs + 1)) - fi -fi - set_vx_fhr_list \ cdate="${CDATE}" \ - fcst_len_hrs="${fcst_len_hrs}" \ + fcst_len_hrs="${FCST_LEN_HRS}" \ field="$VAR" \ accum_hh="${ACCUM_HH}" \ base_dir="${base_dir}" \ From 009666f994ddb61c81f95988dd02574850248a53 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 17:59:08 -0600 Subject: [PATCH 059/260] Simplify names of some variables; add variable for where workflow task flag files should be placed; fix template for output files from pcpcombine_obs tasks. --- ush/config_defaults.yaml | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index e564444b49..9ac5bc39f8 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -335,10 +335,10 @@ platform: # #----------------------------------------------------------------------- # - REMOVE_RAW_OBS_DIRS_CCPA: true - REMOVE_RAW_OBS_DIRS_MRMS: true - REMOVE_RAW_OBS_DIRS_NDAS: true - REMOVE_RAW_OBS_DIRS_NOHRSC: true + REMOVE_RAW_OBS_CCPA: true + REMOVE_RAW_OBS_MRMS: true + REMOVE_RAW_OBS_NDAS: true + REMOVE_RAW_OBS_NOHRSC: true # #----------------------------------------------------------------------- # @@ -547,12 +547,17 @@ workflow: # default will point to: # # EXPTDIR: "${EXPT_BASEDIR}/${EXPT_SUBDIR}" + # + # WFLOW_FLAG_FILES_DIR: + # Directory in which flag files marking completion of various workflow + # tasks can be placed. #----------------------------------------------------------------------- # EXPT_BASEDIR: '' # This will be set in setup.py prior to extend_yaml() being called EXPT_SUBDIR: 'experiment' EXEC_SUBDIR: "exec" EXPTDIR: '{{ [workflow.EXPT_BASEDIR, workflow.EXPT_SUBDIR]|path_join }}' + WFLOW_FLAG_FILES_DIR: '{{ [workflow.EXPTDIR, "wflow_flag_files"]|path_join }}' # #----------------------------------------------------------------------- # @@ -1726,7 +1731,7 @@ task_run_fcst: #----------------------------------------------------------------------- # # KMP_AFFINITY_*: - # From Intel: "The Intel® runtime library has the ability to bind OpenMP + # From Intel: "The Intel runtime library has the ability to bind OpenMP # threads to physical processing units. The interface is controlled using # the KMP_AFFINITY environment variable. Depending on the system (machine) # topology, application, and operating system, thread affinity can have a @@ -2464,7 +2469,7 @@ verification: # METplus Pb2nc tool on NDAS observations. (These files will contain # obs ADPSFC or ADPUPA fields in NetCDF format.) # - OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${OBS_CCPA_APCP_FN_TEMPLATE}_a${ACCUM_HH}h.nc' + OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: 'ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2_a${ACCUM_HH}h.nc' OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: '${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}.nc' # # VX_FCST_MODEL_NAME: From 1622c0e45493ad5403501c32c1109ce2d259bc04 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 18:01:59 -0600 Subject: [PATCH 060/260] Add file accidentally left out of commit two hashes ago. --- scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index f08c002d5f..e769a194fc 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -119,7 +119,7 @@ if [ "${grid_or_point}" = "grid" ]; then case "${FIELDNAME_IN_MET_FILEDIR_NAMES}" in "APCP"*) - OBS_INPUT_DIR="${vx_output_basedir}/metprd/PcpCombine_obs" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" OBS_INPUT_FN_TEMPLATE="${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "ASNOW"*) From 9a26289c08d854cecd64b45f4e97207d1b4ca7b0 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 18:05:21 -0600 Subject: [PATCH 061/260] Modifications to exregional_run_met_pb2nc_obs.sh so that the corresponding task is day-based (i.e. it is run for each day for which obs are needed). The script now also creates a flag file indicating it completed successfully (needed for the workflow to work properly). --- scripts/exregional_run_met_pb2nc_obs.sh | 105 +++++++++++++++++++++--- 1 file changed, 94 insertions(+), 11 deletions(-) diff --git a/scripts/exregional_run_met_pb2nc_obs.sh b/scripts/exregional_run_met_pb2nc_obs.sh index 985cd33c7f..c5f007f7c0 100755 --- a/scripts/exregional_run_met_pb2nc_obs.sh +++ b/scripts/exregional_run_met_pb2nc_obs.sh @@ -71,6 +71,43 @@ to convert NDAS prep buffer observation files to NetCDF format. # #----------------------------------------------------------------------- # +# +# +#----------------------------------------------------------------------- +# +# The day (in the form YYYMMDD) associated with the current task via the +# task's cycledefs attribute in the ROCOTO xml. +yyyymmdd_task=${PDY} + +# The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a +# scalar string containing all relevant forecast output times (each) in +# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings +# because in ROCOTO, there doesn't seem to be a way to pass a bash array +# from the XML to task's script. To have an array-valued variable to +# work with, here, we create the new variable output_times_all that is +# the array-valued counterpart of OUTPUT_TIMES_ALL. +output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) + +# List of times (each of the form YYYYMMDDHH) for which there is forecast +# output for the current day. We extract this list from the full list of +# all forecast output times (i.e. from all cycles). +output_times_crnt_day=() +if [[ ${output_times_all[@]} =~ ${yyyymmdd_task} ]]; then + output_times_crnt_day=( $(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}") ) +fi + +num_output_times_crnt_day=${#output_times_crnt_day[@]} +if [[ ${num_output_times_crnt_day} -eq 0 ]]; then + print_info_msg " +None of the forecast output times fall within the day associated with the +current task (yyyymmdd_task): + yyyymmdd_task = \"${yyyymmdd_task}\" +Thus, there is no need to run ${METPLUSTOOLNAME} on any prepbufr files." + exit +fi +# +#----------------------------------------------------------------------- +# # Get the cycle date and time in YYYYMMDDHH format. # #----------------------------------------------------------------------- @@ -123,16 +160,52 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${MetplusToolName}_obs" # #----------------------------------------------------------------------- # -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - base_dir="${OBS_INPUT_DIR}" \ - fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" +FHR_LIST="" +num_missing_files=0 +for yyyymmddhh in ${output_times_crnt_day[@]}; do + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + fn="prepbufr.ndas.${yyyymmddhh}" + fp="${OBS_INPUT_DIR}/${fn}" + if [[ -f "${fp}" ]]; then + print_info_msg " +Found ${OBTYPE} obs file corresponding to forecast output time (yyyymmddhh): + yyyymmddhh = \"${yyyymmddhh}\" + fp = \"${fp}\" +" + hh_noZero=$((10#${hh})) + #FHR_LIST+=("${yyyymmddhh}") + FHR_LIST="${FHR_LIST},${hh_noZero}" + else + num_missing_files=$((num_missing_files+1)) + print_info_msg " +${OBTYPE} obs file corresponding to forecast output time (yyyymmddhh) does +not exist on disk: + yyyymmddhh = \"${yyyymmddhh}\" + fp = \"${fp}\" +Removing this time from the list of times to be processed by ${METPLUSTOOLNAME}. +" + fi +done + +# If the number of missing files is greater than the maximum allowed +# (specified by num_missing_files_max), print out an error message and +# exit. +if [ "${num_missing_files}" -gt "${NUM_MISSING_OBS_FILES_MAX}" ]; then + print_err_msg_exit "\ +The number of missing ${OBTYPE} obs files (num_missing_files) is greater +than the maximum allowed number (num_missing_files_max): + num_missing_files = ${num_missing_files} + num_missing_files_max = ${num_missing_files_max}" +fi + +# Remove leading comma from FHR_LIST. +FHR_LIST=$( echo "${FHR_LIST}" | $SED "s/^,//g" ) +print_info_msg "$VERBOSE" "\ +Final (i.e. after filtering for missing files) set of forecast hours +(saved in a scalar string variable) is: + FHR_LIST = \"${FHR_LIST}\" +" # #----------------------------------------------------------------------- # @@ -282,7 +355,7 @@ uw template render \ -o ${metplus_config_fp} \ --verbose \ --values-file "${tmpfile}" \ - --search-path "/" + --search-path "/" err=$? rm $tmpfile @@ -315,6 +388,16 @@ METplus configuration file used is: # #----------------------------------------------------------------------- # +# Create flag file that indicates completion of task. This is needed by +# the workflow. +# +#----------------------------------------------------------------------- +# +mkdir -p ${WFLOW_FLAG_FILES_DIR} +touch "${WFLOW_FLAG_FILES_DIR}/run_met_pb2nc_obs_${PDY}_complete.txt" +# +#----------------------------------------------------------------------- +# # Print message indicating successful completion of script. # #----------------------------------------------------------------------- From e8a6f7dcfb4f81105acdb766c46272dac7073aec Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 18:15:36 -0600 Subject: [PATCH 062/260] Move sections of WE2E yaml files around to be consistent with the order in config_defaults.yaml. --- .../config.MET_ensemble_verification_only_vx.yaml | 6 +++--- ...config.MET_ensemble_verification_only_vx_time_lag.yaml | 8 +++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx.yaml b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx.yaml index 812e805645..80b2e3099f 100644 --- a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx.yaml @@ -8,9 +8,6 @@ metadata: user: RUN_ENVIR: community -nco: - NET_default: rrfs - workflow: PREDEF_GRID_NAME: RRFS_CONUS_25km DATE_FIRST_CYCL: '2019061500' @@ -18,6 +15,9 @@ workflow: FCST_LEN_HRS: 6 PREEXISTING_DIR_METHOD: rename +nco: + NET_default: rrfs + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml", "parm/wflow/verify_ens.yaml"]|include }}' diff --git a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml index f7d82cb8cd..d0edccca01 100644 --- a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml @@ -16,18 +16,20 @@ workflow: FCST_LEN_HRS: 6 PREEXISTING_DIR_METHOD: rename +nco: + NET_default: 'RRFSE_CONUS' + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml", "parm/wflow/verify_ens.yaml"]|include }}' -nco: - NET_default: 'RRFSE_CONUS' - global: DO_ENSEMBLE: true NUM_ENS_MEMBERS: 2 ENS_TIME_LAG_HRS: '[ 0, 12 ]' +# If the following is commented out, then the obs files staged on each +# platform will be (found and) used. platform: CCPA_OBS_DIR: '{{ workflow.EXPTDIR }}/obs_data/ccpa/proc' MRMS_OBS_DIR: '{{ workflow.EXPTDIR }}/obs_data/mrms/proc' From a3c8c4d34676e8410a0bedc27c2a8b201f4faf5a Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 3 Sep 2024 18:17:00 -0600 Subject: [PATCH 063/260] Changes to workflow yaml files for vx portion of the workflow in order to have cycledefs for obs-day-based tasks that include only the obs days for which obs are actually needed but not on any days (inbetween the first and last obs days over all cycles) for which obs are not needed. --- parm/wflow/verify_det.yaml | 64 +++++++++++++++++++-------------- parm/wflow/verify_ens.yaml | 74 +++++++++++++++++++++----------------- parm/wflow/verify_pre.yaml | 60 ++++++++++++++++++++----------- 3 files changed, 118 insertions(+), 80 deletions(-) diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index 35358c9b67..89f614b4e8 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -47,31 +47,9 @@ metatask_GridStat_CCPA_all_accums_all_mems: walltime: 02:00:00 dependency: and: - # The following will include dependencies on the PcpCombine_obs task for - # the current cycle as well as those from other cycles that process CCPA - # obs at valid times that are part of the current cycle's forecast. This - # dependence is necessary because each PcpCombine_obs task except the - # last one processes obs at valid times starting with the initial time - # of the current cycle's forecast and ending with the last output time - # of this forecast that is before the initial time of the next cycle's - # forecast. taskdep_pcpcombine_obs: attrs: - task: '{%- set num_cycl_dep = (workflow.FCST_LEN_HRS/workflow.INCR_CYCL_FREQ)|round(0,"ceil")|int %} - {%- set num_cycl_dep = num_cycl_dep %} - {%- for n in range(0, num_cycl_dep) %} - {%- set cycl_offset = n*workflow.INCR_CYCL_FREQ %} - {%- if n == 0 %} - {{- "run_MET_PcpCombine_obs_APCP#ACCUM_HH#h\" cycle_offset=\"%02d:00:00\"/>\n" % cycl_offset }} - {%- else %} - {{- " \n" }} - {{- " \n" % cycl_offset }} - {{- " \n" % cycl_offset }} - {{- " \n" }} - {%- endif %} - {%- endfor %} - {{- " \n" }} - {{- " \n" }} + {{- indent ~ "\n" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/get_obs_mrms_" ~ yyyymmdd ~ "_complete.txt" }} + {%- endif %} + {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} + {%- endfor %}' datadep_post_files_exist: attrs: age: 00:00:00:30 @@ -163,9 +157,25 @@ metatask_PointStat_NDAS_all_mems: walltime: 01:00:00 dependency: and: - taskdep_pb2nc: + datadep_all_pb2nc_obs_complete: attrs: - task: run_MET_Pb2nc_obs + age: 00:00:00:30 + # Check that the flag files that indicate that the Pb2NC tasks are + # complete are all present before launching any PointStat task. + text: '{%- set num_obs_days = workflow.OBS_DAYS_ALL_CYCLES_INST|length %} + {%- set indent = " " %} + {%- set indent_p2 = indent + " " %} + {%- for n in range(0, num_obs_days) %} + {%- set yyyymmdd = workflow.OBS_DAYS_ALL_CYCLES_INST[n] %} + {%- if n == 0 %} + {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_" ~ yyyymmdd ~ "_complete.txt" }} + {%- else %} + {{- indent ~ "\n" }} + {{- indent ~ "\n" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_" ~ yyyymmdd ~ "_complete.txt" }} + {%- endif %} + {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} + {%- endfor %}' datadep_post_files_exist: attrs: age: 00:00:00:30 diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index 17086c6bc6..0893248863 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -48,31 +48,9 @@ metatask_GenEnsProd_EnsembleStat_CCPA: FCST_THRESH: 'none' dependency: and: - # The following will include dependencies on the PcpCombine_obs task for - # the current cycle as well as those from other cycles that process CCPA - # obs at valid times that are part of the current cycle's forecast. This - # dependence is necessary because each PcpCombine_obs task except the - # last one processes obs at valid times starting with the initial time - # of the current cycle's forecast and ending with the last output time - # of this forecast that is before the initial time of the next cycle's - # forecast. taskdep_pcpcombine_obs: &taskdep_pcpcombine_obs attrs: - task: '{%- set num_cycl_dep = (workflow.FCST_LEN_HRS/workflow.INCR_CYCL_FREQ)|round(0,"ceil")|int %} - {%- set num_cycl_dep = num_cycl_dep %} - {%- for n in range(0, num_cycl_dep) %} - {%- set cycl_offset = n*workflow.INCR_CYCL_FREQ %} - {%- if n == 0 %} - {{- "run_MET_PcpCombine_obs_APCP#ACCUM_HH#h\" cycle_offset=\"%02d:00:00\"/>\n" % cycl_offset }} - {%- else %} - {{- " \n" }} - {{- " \n" % cycl_offset }} - {{- " \n" % cycl_offset }} - {{- " \n" }} - {%- endif %} - {%- endfor %} - {{- " \n" }} - {{- " \n" }} + {{- indent ~ "\n" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/get_obs_mrms_" ~ yyyymmdd ~ "_complete.txt" }} + {%- endif %} + {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} + {%- endfor %}' taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_#VAR# @@ -175,9 +169,25 @@ metatask_GenEnsProd_EnsembleStat_NDAS: walltime: 01:00:00 dependency: and: - taskdep_pb2nc: + datadep_all_pb2nc_obs_complete: &all_pb2nc_obs_complete attrs: - task: run_MET_Pb2nc_obs + age: 00:00:00:30 + # Check that the flag files that indicate that the Pb2NC tasks are + # complete are all present before launching any EnsembleStat task. + text: '{%- set num_obs_days = workflow.OBS_DAYS_ALL_CYCLES_INST|length %} + {%- set indent = " " %} + {%- set indent_p2 = indent + " " %} + {%- for n in range(0, num_obs_days) %} + {%- set yyyymmdd = workflow.OBS_DAYS_ALL_CYCLES_INST[n] %} + {%- if n == 0 %} + {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_" ~ yyyymmdd ~ "_complete.txt" }} + {%- else %} + {{- indent ~ "\n" }} + {{- indent ~ "\n" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_" ~ yyyymmdd ~ "_complete.txt" }} + {%- endif %} + {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} + {%- endfor %}' taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_#VAR# @@ -254,9 +264,8 @@ metatask_GridStat_MRMS_ensprob: FCST_THRESH: 'all' dependency: and: - taskdep_get_obs_mrms: - attrs: - task: get_obs_mrms + datadep_all_get_obs_mrms_complete: + <<: *all_get_obs_mrms_complete taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_#VAR# @@ -282,9 +291,8 @@ metatask_PointStat_NDAS_ensmeanprob: FCST_THRESH: 'all' dependency: and: - taskdep_pb2nc: - attrs: - task: run_MET_Pb2nc_obs + datadep_all_pb2nc_obs_complete: + <<: *all_pb2nc_obs_complete taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_#VAR# diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 2357c6bc5e..13815a9752 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -1,7 +1,7 @@ default_task_verify_pre: &default_task_verify_pre account: '&ACCOUNT;' attrs: - cycledefs: forecast + cycledefs: cycledef_obs_days_inst maxtries: '1' envars: &default_vars GLOBAL_VAR_DEFNS_FP: '&GLOBAL_VAR_DEFNS_FP;' @@ -23,14 +23,19 @@ default_task_verify_pre: &default_task_verify_pre task_get_obs_ccpa: <<: *default_task_verify_pre + attrs: + cycledefs: cycledef_obs_days_cumul + maxtries: '1' command: '&LOAD_MODULES_RUN_TASK_FP; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars - ACCUM_HH: '01' OBS_DIR: '&CCPA_OBS_DIR;' OBTYPE: 'CCPA' - REMOVE_RAW_OBS_DIRS: '{{ platform.REMOVE_RAW_OBS_DIRS_CCPA }}' - FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' + OUTPUT_TIMES_ALL: &output_times_all_cumul + '{%- for i in range(0, (workflow.OUTPUT_TIMES_ALL_CYCLES_CUMUL|length)) %} + {{- " " ~ workflow.OUTPUT_TIMES_ALL_CYCLES_CUMUL[i] }} + {%- endfor %}' + REMOVE_RAW_OBS: 'platform.REMOVE_RAW_OBS_CCPA' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -43,8 +48,8 @@ task_get_obs_nohrsc: <<: *default_vars OBS_DIR: '&NOHRSC_OBS_DIR;' OBTYPE: 'NOHRSC' - REMOVE_RAW_OBS_DIRS: '{{ platform.REMOVE_RAW_OBS_DIRS_NOHRSC }}' - FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' + OUTPUT_TIMES_ALL: *output_times_all_cumul + REMOVE_RAW_OBS: 'platform.REMOVE_RAW_OBS_NOHRSC' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -57,9 +62,12 @@ task_get_obs_mrms: <<: *default_vars OBS_DIR: '&MRMS_OBS_DIR;' OBTYPE: 'MRMS' - VAR: 'REFC RETOP' - REMOVE_RAW_OBS_DIRS: '{{ platform.REMOVE_RAW_OBS_DIRS_MRMS }}' - FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' + MRMS_FIELDS: 'REFC RETOP' + OUTPUT_TIMES_ALL: &output_times_all_inst + '{%- for i in range(0, (workflow.OUTPUT_TIMES_ALL_CYCLES_INST|length)) %} + {{- " " ~ workflow.OUTPUT_TIMES_ALL_CYCLES_INST[i] }} + {%- endfor %}' + REMOVE_RAW_OBS: 'platform.REMOVE_RAW_OBS_MRMS' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -72,8 +80,8 @@ task_get_obs_ndas: <<: *default_vars OBS_DIR: '&NDAS_OBS_DIR;' OBTYPE: 'NDAS' - REMOVE_RAW_OBS_DIRS: '{{ platform.REMOVE_RAW_OBS_DIRS_NDAS }}' - FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' + OUTPUT_TIMES_ALL: *output_times_all_inst + REMOVE_RAW_OBS: 'platform.REMOVE_RAW_OBS_NDAS' queue: "&QUEUE_HPSS;" native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' @@ -82,7 +90,7 @@ task_get_obs_ndas: task_run_MET_Pb2nc_obs: <<: *default_task_verify_pre attrs: - cycledefs: forecast + cycledefs: cycledef_obs_days_inst maxtries: '2' command: '&LOAD_MODULES_RUN_TASK_FP; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PB2NC_OBS"' envars: @@ -92,6 +100,7 @@ task_run_MET_Pb2nc_obs: FCST_OR_OBS: OBS OBTYPE: NDAS OBS_DIR: '&NDAS_OBS_DIR;' + OUTPUT_TIMES_ALL: *output_times_all_inst METPLUSTOOLNAME: 'PB2NC' dependency: and: @@ -127,14 +136,25 @@ metatask_PcpCombine_obs: and: datadep: text: "&CCPA_OBS_DIR;" - or: - not: - taskvalid: - attrs: - task: get_obs_ccpa - taskdep: - attrs: - task: get_obs_ccpa + datadep_all_get_obs_ccpa_complete: + attrs: + age: 00:00:00:30 + # Check that the flag files that indicate that the get_obs_ccpa tasks + # are complete are all present before launching any PcpCombine task. + text: '{%- set num_obs_days = workflow.OBS_DAYS_ALL_CYCLES_CUMUL|length %} + {%- set indent = " " %} + {%- set indent_p2 = indent + " " %} + {%- for n in range(0, num_obs_days) %} + {%- set yyyymmdd = workflow.OBS_DAYS_ALL_CYCLES_CUMUL[n] %} + {%- if n == 0 %} + {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/get_obs_ccpa_" ~ yyyymmdd ~ "_complete.txt" }} + {%- else %} + {{- indent ~ "\n" }} + {{- indent ~ "\n" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/get_obs_ccpa_" ~ yyyymmdd ~ "_complete.txt" }} + {%- endif %} + {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} + {%- endfor %}' metatask_check_post_output_all_mems: var: From c774e40cc4d9f3f48fee2ef49a01a6bae0b47e21 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 5 Sep 2024 12:29:06 -0600 Subject: [PATCH 064/260] Add code to be able to generate forecast output times and obs days, both for instantaneous and cumulative fields, and pass this info to the ROCOTO xml generation system. --- ush/set_cycle_dates.py | 268 +++++++++++++++++++++++++++++++++++++++-- ush/setup.py | 44 ++++++- 2 files changed, 298 insertions(+), 14 deletions(-) diff --git a/ush/set_cycle_dates.py b/ush/set_cycle_dates.py index 0c63a87e49..c3969d8ef3 100644 --- a/ush/set_cycle_dates.py +++ b/ush/set_cycle_dates.py @@ -1,11 +1,10 @@ #!/usr/bin/env python3 from datetime import datetime, timedelta, date - +from pprint import pprint from python_utils import print_input_args, print_err_msg_exit - -def set_cycle_dates(date_start, date_end, incr_cycl_freq): +def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl): """This file defines a function that, given the start and end dates as date time objects, and a cycling frequency, returns an array of cycle date-hours whose elements have the form YYYYMMDDHH. Here, @@ -13,22 +12,267 @@ def set_cycle_dates(date_start, date_end, incr_cycl_freq): two-digit day of the month, and HH is a two-digit hour of the day. Args: - date_start: start date, datetime object - date_end: end date, datetime object - incr_cycl_freq: cycle frequency increment in hours, an int + start_time_first_cycl: + Starting time of first cycle; a datetime object. + + start_time_last_cycl: + Starting time of last cycle; a datetime object. + + cycl_intvl: + Time interval between cycle starting times; a timedelta object. + Returns: - A list of dates in a format YYYYMMDDHH + A list of strings containing cycle starting times in the format + 'YYYYMMDDHH' """ print_input_args(locals()) - freq_delta = timedelta(hours=incr_cycl_freq) - # iterate over cycles all_cdates = [] - cdate = date_start - while cdate <= date_end: + cdate = start_time_first_cycl + while cdate <= start_time_last_cycl: cyc = datetime.strftime(cdate, "%Y%m%d%H") all_cdates.append(cyc) - cdate += freq_delta + cdate += cycl_intvl return all_cdates + + +def set_fcst_output_times_and_obs_days_all_cycles( + start_time_first_cycl, start_time_last_cycl, cycl_intvl, fcst_len, fcst_output_intvl): + """Given the starting time of the first cycle of an SRW App experiment, the + starting time of the last cycle, the interval between cycle start times, + the forecast length, and the forecast output interval, this function + returns two pairs of lists: the first of each pair is a list of strings + of forecast output times over all cycles (each element of the form + 'YYYYMMDDHH'), and the second is a list of days over all cycles on which + observations are needed to perform verification (each element of the form + 'YYYYMMDD'). The first pair of lists is for instantaneous output fields + (e.g. REFC, RETOP, T2m), and the second pair is for cumulative ones (e.g. + APCP or accumulated precipitation). + + Args: + start_time_first_cycl: + Starting time of first cycle; a datetime object. + + start_time_last_cycl: + Starting time of last cycle; a datetime object. + + cycl_intvl: + Time interval between cycle starting times; a timedelta object. + + fcst_len: + The length of each forecast; a timedelta object. + + fcst_output_intvl: + Time interval between forecast output times; a timedelta object. + + Returns: + output_times_all_cycles_inst: + List of forecast output times over all cycles of instantaneous fields. + Each element is a string of the form 'YYYYMMDDHH'. + + obs_days_all_cycles_inst: + List of observation days (i.e. days on which observations are needed to + perform verification) over all cycles of instantaneous fields. Each + element is a string of the form 'YYYYMMDD'. + + output_times_all_cycles_cumul: + List of forecast output times over all cycles of cumulative fields. Each + element is a string of the form 'YYYYMMDDHH'. + + obs_days_all_cycles_cumul: + List of observation days (i.e. days on which observations are needed to + perform verification) over all cycles of cumulative fields. Each element + is a string of the form 'YYYYMMDD'. + + """ + + # Get the list containing the starting times of the cycles. Each element + # of the list is a string of the form 'YYYYMMDDHH'. + cycle_start_times_str \ + = set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl) + + # Convert cycle_start_times_str to a list of datetime objects. + cycle_start_times = [datetime.strptime(yyyymmddhh, "%Y%m%d%H") for yyyymmddhh in cycle_start_times_str] + + # Get the number of forecast output times per cycle/forecast. + num_output_times_per_cycle = int(fcst_len/fcst_output_intvl + 1) + + # Initialize sets that will contain the various forecast output and obs + # day information. + output_times_all_cycles_inst = set() + obs_days_all_cycles_inst = set() + output_times_all_cycles_cumul = set() + obs_days_all_cycles_cumul = set() + + for i, start_time_crnt_cycle in enumerate(cycle_start_times): + # Create a list of forecast output times of instantaneous fields for the + # current cycle. + output_times_crnt_cycle_inst \ + = [start_time_crnt_cycle + i*fcst_output_intvl + for i in range(0,num_output_times_per_cycle)] + # Include the output times of instantaneous fields for the current cycle + # in the set of all such output times over all cycles. + output_times_all_cycles_inst \ + = output_times_all_cycles_inst | set(output_times_crnt_cycle_inst) + + # Create a list of instantaneous field obs days (i.e. days on which + # observations of instantaneous fields are needed for verification) for + # the current cycle. We do this by dropping the hour-of-day from each + # element of the list of forecast output times and keeping only unique + # elements. + tmp = [datetime_obj.date() for datetime_obj in output_times_crnt_cycle_inst] + obs_days_crnt_cycl_inst = sorted(set(tmp)) + # Include the obs days for instantaneous fields for the current cycle + # in the set of all such obs days over all cycles. + obs_days_all_cycles_inst = obs_days_all_cycles_inst | set(obs_days_crnt_cycl_inst) + + # Create a list of forecast output times of cumulative fields for the + # current cycle. This is simply the list of forecast output times for + # instantaneous fields but with the first time dropped (because nothing + # has yet accumulated at the starting time of the cycle). + output_times_crnt_cycle_cumul = output_times_crnt_cycle_inst + output_times_crnt_cycle_cumul.pop(0) + # Include the obs days for cumulative fields for the current cycle in the + # set of all such obs days over all cycles. + output_times_all_cycles_cumul \ + = output_times_all_cycles_cumul | set(output_times_crnt_cycle_cumul) + + # Create a list of cumulative field obs days (i.e. days on which + # observations of cumulative fields are needed for verification) for + # the current cycle. We do this by dropping the hour-of-day from each + # element of the list of forecast output times and keeping only unique + # elements. Note, however, that before dropping the hour-of-day from + # the list of forecast output times, we remove the last forecast output + # time if it happens to be the 0th hour of a day. This is because in + # the scripts/tasks that get observations of cumulative fields, the + # zeroth hour of a day is considered part of the previous day (because + # it represents accumulation that occurred on the previous day). + tmp = output_times_crnt_cycle_cumul + last_output_time_cumul = output_times_crnt_cycle_cumul[-1] + if last_output_time_cumul.hour == 0: + tmp.pop() + tmp = [datetime_obj.date() for datetime_obj in tmp] + obs_days_crnt_cycl_cumul = sorted(set(tmp)) + # Include the obs days for cumulative fields for the current cycle in the + # set of all such obs days over all cycles. + obs_days_all_cycles_cumul = obs_days_all_cycles_cumul | set(obs_days_crnt_cycl_cumul) + + # Convert the set of output times of instantaneous fields over all cycles + # to a sorted list of strings of the form 'YYYYMMDDHH'. + output_times_all_cycles_inst = sorted(output_times_all_cycles_inst) + output_times_all_cycles_inst = [datetime.strftime(output_times_all_cycles_inst[i], "%Y%m%d%H") + for i in range(len(output_times_all_cycles_inst))] + + # Convert the set of obs days for instantaneous fields over all cycles + # to a sorted list of strings of the form 'YYYYMMDD'. + obs_days_all_cycles_inst = sorted(obs_days_all_cycles_inst) + obs_days_all_cycles_inst = [datetime.strftime(obs_days_all_cycles_inst[i], "%Y%m%d") + for i in range(len(obs_days_all_cycles_inst))] + + # Convert the set of output times of cumulative fields over all cycles to + # a sorted list of strings of the form 'YYYYMMDDHH'. + output_times_all_cycles_cumul = sorted(output_times_all_cycles_cumul) + output_times_all_cycles_cumul = [datetime.strftime(output_times_all_cycles_cumul[i], "%Y%m%d%H") + for i in range(len(output_times_all_cycles_cumul))] + + # Convert the set of obs days for cumulative fields over all cycles to a + # sorted list of strings of the form 'YYYYMMDD'. + obs_days_all_cycles_cumul = sorted(obs_days_all_cycles_cumul) + obs_days_all_cycles_cumul = [datetime.strftime(obs_days_all_cycles_cumul[i], "%Y%m%d") + for i in range(len(obs_days_all_cycles_cumul))] + + return output_times_all_cycles_inst, obs_days_all_cycles_inst, \ + output_times_all_cycles_cumul, obs_days_all_cycles_cumul + + +def set_cycledefs_for_obs_days(obs_days_all_cycles): + """Given a list of days on which obs are needed, this function generates a + list of ROCOTO-style cycledef strings that together span the days (over + all cycles of an SRW App experiment) on which obs are needed. The input + list of days must be increasing in time, but the days do not have to be + consecutive, i.e. there may be gaps between days that are greater than + one day. + + Each cycledef string in the output list represents a set of consecutive + days in the input string (when used inside a tag in a ROCOTO + XML). Thus, when the cycledef strings in the output string are all + assigned to the same cycledef group in a ROCOTO XML, that group will + represent all the days on which observations are needed. + + Args: + obs_days_all_cycles: + A list of strings of the form 'YYYYMMDD', with each string representing + a day on which observations are needed. Note that the list must be + sorted, i.e. the days must be increasing in time, but there may be + gaps between days. + + Returns: + cycledef_all_obs_days: + A list of strings, with each string being a ROCOTO-style cycledef of + the form + + '{yyyymmdd_start}0000 {yyyymmdd_end}0000 24:00:00' + + where {yyyymmdd_start} is the starting day of the first cycle in the + cycledef, and {yyyymmdd_end} is the starting day of the last cycle (note + that the minutes and hours in these cycledef stirngs are always set to + '00'). Thus, one of the elements of the output list may be as follows: + + '202404290000 202405010000 24:00:00' + """ + + # To enable arithmetic with dates, convert input sting list of observation + # days (i.e. days on which observations are needed) over all cycles to a + # list of datetime objects. + tmp = [datetime.strptime(yyyymmdd, "%Y%m%d") for yyyymmdd in obs_days_all_cycles] + + # Initialize the variable that in the loop below contains the date of + # the previous day. This is just the first element of the list of + # datetime objects constructed above. Then use it to initialize the + # list (contin_obs_day_lists) that will contain lists of consecutive + # observation days. Thus, after its construction is complete, each + # element of contin_obs_day_lists will itself be a list containing + # datetime objects that are 24 hours apart. + day_prev = tmp[0] + contin_obs_day_lists = list() + contin_obs_day_lists.append([day_prev]) + + # Remove the first element of the list of obs days since it has already + # been used initiliaze contin_obs_day_lists. + tmp.pop(0) + + # Loop over the remaining list of obs days and construct the list of + # lists of consecutive obs days. + one_day = timedelta(days=1) + for day_crnt in tmp: + # If the current obs day comes 24 hours after the previous obs day, i.e. + # if it is the next day of the previous obs day, append it to the last + # existing list in contin_obs_day_lists. + if day_crnt == day_prev + one_day: + contin_obs_day_lists[-1].append(day_crnt) + # If the current obs day is NOT the next day of the previous obs day, + # append a new element to contin_obs_day_lists and initialize it as a + # list containing a single element -- the current obs day. + else: + contin_obs_day_lists.append([day_crnt]) + # Update the value of the previous day in preparation for the next + # iteration of the loop. + day_prev = day_crnt + + # Use the list of lists of consecutive obs days to construct a list of + # ROCOTO-style cycledef strings that each represent a set of consecutive + # obs days when included in a tag in a ROCOTO XML. Each + # string in this new list corresponds to a series of consecutive days on + # which observations are needed (where by "consecutive" we mean no days + # are skipped), and there is at least a one day gap between each such + # series. These cycledefs together represent all the days (i.e. over all + # cycles of the experiment) on which observations are needed. + cycledef_all_obs_days = list() + for contin_obs_day_list in contin_obs_day_lists: + cycledef_start = contin_obs_day_list[0].strftime('%Y%m%d%H%M') + cycledef_end = contin_obs_day_list[-1].strftime('%Y%m%d%H%M') + cycledef_all_obs_days.append(' '.join([cycledef_start, cycledef_end, '24:00:00'])) + + return cycledef_all_obs_days diff --git a/ush/setup.py b/ush/setup.py index 0511653fa2..b6f4256fc4 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -10,6 +10,7 @@ from textwrap import dedent import yaml +from pprint import pprint from python_utils import ( log_info, @@ -37,7 +38,9 @@ load_xml_file, ) -from set_cycle_dates import set_cycle_dates +from set_cycle_dates import \ + set_cycle_dates, set_fcst_output_times_and_obs_days_all_cycles, \ + set_cycledefs_for_obs_days from set_predef_grid_params import set_predef_grid_params from set_gridparams_ESGgrid import set_gridparams_ESGgrid from set_gridparams_GFDLgrid import set_gridparams_GFDLgrid @@ -754,6 +757,43 @@ def get_location(xcs, fmt, expt_cfg): date_last_cycl = workflow_config.get("DATE_LAST_CYCL") incr_cycl_freq = int(workflow_config.get("INCR_CYCL_FREQ")) + # Set the forecast output interval. Ideally, this should be obtained + # from the SRW App's configuration file, but such a variable doesn't + # yet exist in that file. + fcst_output_intvl_hrs = 1 + + # To enable arithmetic with dates and times, convert various time + # intervals from integer to datetime.timedelta objects. + cycl_intvl = datetime.timedelta(days=0, hours=incr_cycl_freq, minutes=0, seconds=0) + fcst_len = datetime.timedelta(days=0, hours=fcst_len_hrs, minutes=0, seconds=0) + fcst_output_intvl = datetime.timedelta(days=0, hours=fcst_output_intvl_hrs, minutes=0, seconds=0) + + # Generate a list of forecast output times and a list of obs days (i.e. + # days on which observations are needed to perform verification) over all + # cycles, both for instantaneous fields (e.g. T2m, REFC, RETOP) and for + # cumulative ones (e.g. APCP). + output_times_all_cycles_inst, obs_days_all_cycles_inst, \ + output_times_all_cycles_cumul, obs_days_all_cycles_cumul \ + = set_fcst_output_times_and_obs_days_all_cycles( \ + date_first_cycl, date_last_cycl, cycl_intvl, fcst_len, fcst_output_intvl) + + # Add the list generated above to the dictionary containing workflow + # configuration variables. These will be needed in generating the ROCOTO + # XML. + workflow_config['OUTPUT_TIMES_ALL_CYCLES_INST'] = output_times_all_cycles_inst + workflow_config['OBS_DAYS_ALL_CYCLES_INST'] = obs_days_all_cycles_inst + workflow_config['OUTPUT_TIMES_ALL_CYCLES_CUMUL'] = output_times_all_cycles_cumul + workflow_config['OBS_DAYS_ALL_CYCLES_CUMUL'] = obs_days_all_cycles_cumul + + # Generate lists of ROCOTO cycledef strings corresonding to the obs days + # for instantaneous fields and those for cumulative ones. + cycledef_obs_days_inst = set_cycledefs_for_obs_days(obs_days_all_cycles_inst) + cycledef_obs_days_cumul = set_cycledefs_for_obs_days(obs_days_all_cycles_cumul) + # Save the lists of cycledefs in the dictionary containing values needed + # to construct the ROCOTO XML. + rocoto_config['cycledefs']['cycledef_obs_days_inst'] = cycledef_obs_days_inst + rocoto_config['cycledefs']['cycledef_obs_days_cumul'] = cycledef_obs_days_cumul + # set varying forecast lengths only when fcst_len_hrs=-1 if fcst_len_hrs == -1: fcst_len_cycl = workflow_config.get("FCST_LEN_CYCL") @@ -765,7 +805,7 @@ def get_location(xcs, fmt, expt_cfg): num_cycles = len(set_cycle_dates( date_first_cycl, date_last_cycl, - incr_cycl_freq)) + cycl_incr)) if num_cycles != len(fcst_len_cycl): logger.error(f""" The number of entries in FCST_LEN_CYCL does From 7bb8b1fefdc0b96c20e93c1e8c3551e89bc8c05d Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 5 Sep 2024 14:06:02 -0600 Subject: [PATCH 065/260] Fix bug in the way the start time of the second cycle is calculated. --- parm/wflow/default_workflow.yaml | 8 +++++++- ush/setup.py | 13 +++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/parm/wflow/default_workflow.yaml b/parm/wflow/default_workflow.yaml index c79415b3be..d7198cf524 100644 --- a/parm/wflow/default_workflow.yaml +++ b/parm/wflow/default_workflow.yaml @@ -51,7 +51,13 @@ rocoto: forecast: - !startstopfreq ['{{workflow.DATE_FIRST_CYCL}}', '{{workflow.DATE_LAST_CYCL}}', '{{workflow.INCR_CYCL_FREQ}}'] cycled_from_second: - - !startstopfreq ['{%- if workflow.DATE_FIRST_CYCL != workflow.DATE_LAST_CYCL %}{{ [workflow.DATE_FIRST_CYCL[0:8], "{:02d}".format(workflow.INCR_CYCL_FREQ)]|join }}{%- else %}{{workflow.DATE_FIRST_CYCL}}{%- endif %}', '{{workflow.DATE_LAST_CYCL}}', '{{workflow.INCR_CYCL_FREQ}}'] + - !startstopfreq ['{%- if workflow.DATE_FIRST_CYCL != workflow.DATE_LAST_CYCL %} + {{- workflow.DATE_SECOND_CYCL }} + {%- else %} + {{- workflow.DATE_FIRST_CYCL }} + {%- endif %}', + '{{ workflow.DATE_LAST_CYCL }}', + '{{ workflow.INCR_CYCL_FREQ }}'] log: !cycstr '&LOGDIR;/FV3LAM_wflow.{% if user.RUN_ENVIR == "nco" %}{{ workflow.WORKFLOW_ID + "." }}{% endif %}log' tasks: taskgroups: '{{ ["parm/wflow/prep.yaml", "parm/wflow/coldstart.yaml", "parm/wflow/post.yaml", "parm/wflow/test.yaml"]|include }}' diff --git a/ush/setup.py b/ush/setup.py index b6f4256fc4..4b56294aaa 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -794,6 +794,19 @@ def get_location(xcs, fmt, expt_cfg): rocoto_config['cycledefs']['cycledef_obs_days_inst'] = cycledef_obs_days_inst rocoto_config['cycledefs']['cycledef_obs_days_cumul'] = cycledef_obs_days_cumul + # The "cycled_from_second" cycledef in the default workflow configuration + # file (default_workflow.yaml) requires the starting date of the second + # cycle. That is difficult to calculate in the yaml file itself because + # currently, there are no utilities to perform arithmetic with dates. + # Thus, we calculate it here and save it as a variable in the workflow + # configuration dictionary. Note that correct functioning of the default + # workflow yaml file also requires that DATE_[FIRST|SECOND|LAST]_CYCL all + # be strings, not datetime objects. We perform those conversions here. + date_second_cycl = date_first_cycl + cycl_intvl + workflow_config['DATE_FIRST_CYCL'] = datetime.datetime.strftime(date_first_cycl, "%Y%m%d%H") + workflow_config['DATE_SECOND_CYCL'] = datetime.datetime.strftime(date_second_cycl, "%Y%m%d%H") + workflow_config['DATE_LAST_CYCL'] = datetime.datetime.strftime(date_last_cycl, "%Y%m%d%H") + # set varying forecast lengths only when fcst_len_hrs=-1 if fcst_len_hrs == -1: fcst_len_cycl = workflow_config.get("FCST_LEN_CYCL") From ef6aafe6c1e2f02407bf417b4faa9a4f559d1962 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 5 Sep 2024 14:07:24 -0600 Subject: [PATCH 066/260] Rename script for clarity. --- ush/{set_cycle_dates.py => set_cycle_and_obs_timeinfo.py} | 0 ush/setup.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename ush/{set_cycle_dates.py => set_cycle_and_obs_timeinfo.py} (100%) diff --git a/ush/set_cycle_dates.py b/ush/set_cycle_and_obs_timeinfo.py similarity index 100% rename from ush/set_cycle_dates.py rename to ush/set_cycle_and_obs_timeinfo.py diff --git a/ush/setup.py b/ush/setup.py index 4b56294aaa..9ecc5e402b 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -38,7 +38,7 @@ load_xml_file, ) -from set_cycle_dates import \ +from set_cycle_and_obs_timeinfo import \ set_cycle_dates, set_fcst_output_times_and_obs_days_all_cycles, \ set_cycledefs_for_obs_days from set_predef_grid_params import set_predef_grid_params From 99246f2e5d08d6e837f0043eeb8d2f418072ff2c Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 8 Sep 2024 05:54:05 -0600 Subject: [PATCH 067/260] Allow use of dots in WE2E test names. --- tests/WE2E/run_WE2E_tests.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/WE2E/run_WE2E_tests.py b/tests/WE2E/run_WE2E_tests.py index 5d4bd81105..d3c2cb98ab 100755 --- a/tests/WE2E/run_WE2E_tests.py +++ b/tests/WE2E/run_WE2E_tests.py @@ -141,7 +141,6 @@ def run_we2e_tests(homedir, args) -> None: pretty_list = "\n".join(str(x) for x in tests_to_run) logging.info(f'Will run {len(tests_to_run)} tests:\n{pretty_list}') - config_default_file = os.path.join(ushdir,'config_defaults.yaml') logging.debug(f"Loading config defaults file {config_default_file}") config_defaults = load_config_file(config_default_file) @@ -159,7 +158,13 @@ def run_we2e_tests(homedir, args) -> None: # test-specific options, then write resulting complete config.yaml starttime = datetime.now() starttime_string = starttime.strftime("%Y%m%d%H%M%S") - test_name = os.path.basename(test).split('.')[1] + test_fn = os.path.basename(test) + # Set the test name to all characters between the initial "config." and + # the final ".yaml" in the file name. This will allow any characters to + # be used as part of the test name, in particular a ".". + prefix = 'config.' + suffix = '.yaml' + test_name = test_fn[test_fn.find(prefix)+len(prefix):test_fn.rfind(suffix)] logging.debug(f"For test {test_name}, constructing config.yaml") test_cfg = load_config_file(test) From f1d2c29261a8a9978757ae9a46edcdeda7e04dee Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 8 Sep 2024 05:54:57 -0600 Subject: [PATCH 068/260] Remove extraneous "proc" subdirectory in default obs directories. --- ush/config_defaults.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 9ac5bc39f8..6486388cad 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -317,10 +317,10 @@ platform: # #----------------------------------------------------------------------- # - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa/proc" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc/proc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms/proc" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas/proc" + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" # #----------------------------------------------------------------------- # From 5945b027f9bf65c30945d56a2b2237be036b72d2 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 8 Sep 2024 05:56:08 -0600 Subject: [PATCH 069/260] Comment out "set -x". --- ush/get_obs_ccpa.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index ef1d55eb05..0f3dd248ea 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -11,7 +11,7 @@ source_config_for_task " " ${GLOBAL_VAR_DEFNS_FP} set -u -set -x +#set -x # #----------------------------------------------------------------------- # From b20f2c9c617c0ff26567ec45b97df194e3ef9a29 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 8 Sep 2024 17:54:26 -0600 Subject: [PATCH 070/260] Add WE2E test configuration files for getting obs, doing vx pre-processing, and deterministic vx. --- ...7hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml | 63 ++++++++++++++++++ ...1hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml | 65 +++++++++++++++++++ ...24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml | 63 ++++++++++++++++++ ...24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml | 65 +++++++++++++++++++ ...24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 64 ++++++++++++++++++ ...24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml | 63 ++++++++++++++++++ ...96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 65 +++++++++++++++++++ 7 files changed, 448 insertions(+) create mode 100644 tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml create mode 100644 tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml create mode 100644 tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml create mode 100644 tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml create mode 100644 tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml create mode 100644 tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml create mode 100644 tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml new file mode 100644 index 0000000000..8b840a8ea8 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml @@ -0,0 +1,63 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from NCEP's operational version of the HRRR. + + This test is for the scenario in which there are multiple, short (i.e. + shorter than 24hr), overlapping forecasts in a day. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 7 + DATE_FIRST_CYCL: '2024042902' + DATE_LAST_CYCL: '2024043006' + FCST_LEN_HRS: 9 + +platform: + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'hrrr_ncep' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + VX_FCST_MODEL_NAME: 'hrrr_ncep' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml new file mode 100644 index 0000000000..18558e0d95 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml @@ -0,0 +1,65 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from NCEP's operational version of the HRRR. + + This test is for the scenario in which there are multiple, short (i.e. + shorter than 24hr), NON-overlapping forecasts in a day with multi-hour + (but < 24hr) gaps between the end of one forecast and the start of the + next. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 11 + DATE_FIRST_CYCL: '2024042902' + DATE_LAST_CYCL: '2024043022' + FCST_LEN_HRS: 3 + +platform: + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'hrrr_ncep' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + VX_FCST_MODEL_NAME: 'hrrr_ncep' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml new file mode 100644 index 0000000000..5d6929cd4a --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml @@ -0,0 +1,63 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from NCEP's operational version of the HRRR. + + This test is for the scenario in which the first obs needed is at 00z. It + tests the special treatment needed for obtaining CCPA and NDAS obs at 00z. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042900' + DATE_LAST_CYCL: '2024043000' + FCST_LEN_HRS: 3 + +platform: + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'hrrr_ncep' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + VX_FCST_MODEL_NAME: 'hrrr_ncep' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml new file mode 100644 index 0000000000..7ec2264509 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml @@ -0,0 +1,65 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from one of NSSL's MPAS prototypes submitted + to the 2024 HWT Spring Forecast Experiment. + + This test is for the scenario in which forecasts do not include 00z. It + is the simplest case of obtaining CCPA and NDAS obs because it avoids + testing the special treatment needed at 00z for these obs types. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042912' + DATE_LAST_CYCL: '2024043012' + FCST_LEN_HRS: 3 + +platform: + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'mpashn4nssl' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml new file mode 100644 index 0000000000..29427201e7 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -0,0 +1,64 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from one of NSSL's MPAS prototypes submitted + to the 2024 HWT Spring Forecast Experiment. + + This test is for the scenario in which there are multiple, long (i.e. + longer than 24hr) overlapping forecasts. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042912' + DATE_LAST_CYCL: '2024050212' + FCST_LEN_HRS: 48 + +platform: + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'mpashn4nssl' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml new file mode 100644 index 0000000000..508d14c7fa --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml @@ -0,0 +1,63 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from NCEP's operational version of the HRRR. + + This test is for the scenario in which the last obs needed is at 00z. It + tests the special treatment needed for obtaining CCPA and NDAS obs at 00z. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042921' + DATE_LAST_CYCL: '2024043021' + FCST_LEN_HRS: 3 + +platform: + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'hrrr_ncep' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + VX_FCST_MODEL_NAME: 'hrrr_ncep' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml new file mode 100644 index 0000000000..c838e8581d --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -0,0 +1,65 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from one of NSSL's MPAS prototypes submitted + to the 2024 HWT Spring Forecast Experiment. + + This test is for the scenario in which there are multiple, long (i.e. + longer than 24hr) NON-overlapping forecasts with multi-day gaps between + the end of one forecast and the start of the next. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 96 + DATE_FIRST_CYCL: '2024042912' + DATE_LAST_CYCL: '2024051112' + FCST_LEN_HRS: 48 + +platform: + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'mpashn4nssl' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' From e3b1f6fdf41831aa628c3dcf046f90c4b6e9db67 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 8 Sep 2024 18:02:11 -0600 Subject: [PATCH 071/260] Rename files. --- ...multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml} | 0 ...multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml} | 0 ....multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml} | 0 ....multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml} | 0 ....multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml} | 0 ....multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml} | 0 ....multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml} | 0 7 files changed, 0 insertions(+), 0 deletions(-) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml => config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml => config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml => config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml => config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml => config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml => config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml => config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml} (100%) diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml rename to tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml rename to tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml rename to tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml rename to tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml rename to tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml rename to tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml rename to tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml From 2725832d80b1c6f72642c5b594022a1dd8d378cd Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 9 Sep 2024 13:34:21 -0600 Subject: [PATCH 072/260] Bug fixes after merging in develop. --- parm/wflow/verify_pre.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index a237246eb7..6c1b8ff2b5 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -26,7 +26,7 @@ task_get_obs_ccpa: attrs: cycledefs: cycledef_obs_days_cumul maxtries: '1' - command: '&LOAD_MODULES_RUN_TASK_FP; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' + command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars OBS_DIR: '&CCPA_OBS_DIR;' @@ -75,7 +75,7 @@ task_get_obs_mrms: task_get_obs_ndas: <<: *default_task_verify_pre - command: '&LOAD_MODULES_RUN_TASK_FP; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' + command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars OBS_DIR: '&NDAS_OBS_DIR;' From c38e9816ac1a7890f481dd6cc8904a52ff1bb962 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 9 Sep 2024 15:47:06 -0600 Subject: [PATCH 073/260] Bug fixes to use new yaml version of var_defns.sh. --- scripts/exregional_get_verif_obs.sh | 2 +- ush/get_obs_ccpa.sh | 4 +++- ush/get_obs_mrms.sh | 4 +++- ush/get_obs_ndas.sh | 4 +++- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index b971ab3615..fbd751718f 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -31,7 +31,7 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user nco ; do +for sect in user workflow nco ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done # diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index 0f3dd248ea..b17dda97e0 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -8,7 +8,9 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -source_config_for_task " " ${GLOBAL_VAR_DEFNS_FP} +for sect in user ; do + source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} +done set -u #set -x diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index 92fc24fa56..a7a47d3f98 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -8,7 +8,9 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -source_config_for_task " " ${GLOBAL_VAR_DEFNS_FP} +for sect in user ; do + source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} +done set -u #set -x diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index 441de7b31d..f4cddfe19a 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -8,7 +8,9 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -source_config_for_task " " ${GLOBAL_VAR_DEFNS_FP} +for sect in user ; do + source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} +done set -u #set -x From d8cb3a06e17afe2a42fae78703b2197edc9b168e Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 9 Sep 2024 18:12:11 -0600 Subject: [PATCH 074/260] Bug fix to remove crontab line once experiment is complete (regardless of whether it succeeded or failed). --- ush/get_crontab_contents.py | 2 +- ush/launch_FV3LAM_wflow.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ush/get_crontab_contents.py b/ush/get_crontab_contents.py index 6b0548141c..fbdf80dae9 100644 --- a/ush/get_crontab_contents.py +++ b/ush/get_crontab_contents.py @@ -162,7 +162,7 @@ def delete_crontab_line(called_from_cron, machine, crontab_line, debug): crontab_contents = crontab_contents.replace(crontab_line + "\n", "") crontab_contents = crontab_contents.replace(crontab_line, "") else: - print(f"\nWARNING: line not found in crontab, nothing to remove:\n {crontab_line}\n") + print(f"\nWARNING: line not found in crontab, nothing to remove:\n{crontab_line}\n") run_command(f"""echo '{crontab_contents}' | {crontab_cmd}""") diff --git a/ush/launch_FV3LAM_wflow.sh b/ush/launch_FV3LAM_wflow.sh index 7c26511f4f..7a4a16e4b5 100644 --- a/ush/launch_FV3LAM_wflow.sh +++ b/ush/launch_FV3LAM_wflow.sh @@ -353,9 +353,9 @@ script for this experiment: # Remove CRONTAB_LINE from cron table # if [ "${called_from_cron}" = "TRUE" ]; then - python3 $USHdir/get_crontab_contents.py --remove -m=${machine} -l="${CRONTAB_LINE}" -c -d + python3 $USHdir/get_crontab_contents.py --remove -m=${machine} -l='${CRONTAB_LINE}' -c -d else - python3 $USHdir/get_crontab_contents.py --remove -m=${machine} -l="${CRONTAB_LINE}" -d + python3 $USHdir/get_crontab_contents.py --remove -m=${machine} -l='${CRONTAB_LINE}' -d fi fi # From f7b77fd8a383b49dd6895a603af2b829e67b404b Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 9 Sep 2024 23:27:05 -0600 Subject: [PATCH 075/260] Use SRW's standard way of doing boolean comparisons in bash. --- ush/get_obs_ccpa.sh | 6 +++--- ush/get_obs_mrms.sh | 4 ++-- ush/get_obs_ndas.sh | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index b17dda97e0..21f85e2ff4 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -325,7 +325,7 @@ mv_or_cp="cp" # If the raw directories and files are to be removed at the end of this # script, no need to copy the files since the raw directories are going # to be removed anyway. -if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then +if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then mv_or_cp="mv" fi @@ -368,7 +368,7 @@ arcv_hr = ${arcv_hr}" fi done - if [[ ${do_retrieve} == "TRUE" ]]; then + if [[ $(boolify "${do_retrieve}") == "TRUE" ]]; then # Make sure the raw quarter-daily directory exists because it is used # below as the output directory of the retrieve_data.py script (so if @@ -454,7 +454,7 @@ done # #----------------------------------------------------------------------- # -if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then +if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then print_info_msg "Removing raw directories and files..." rm -rf ${basedir_raw} || print_err_msg_exit "\ Failed to remove raw directories and files." diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index a7a47d3f98..d82ba842c0 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -175,7 +175,7 @@ mv_or_cp="cp" # If the raw directories and files are to be removed at the end of this # script, no need to copy the files since the raw directories are going # to be removed anyway. -if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then +if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then mv_or_cp="mv" fi @@ -260,7 +260,7 @@ done # #----------------------------------------------------------------------- # -if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then +if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then print_info_msg "Removing raw directories and files..." rm -rf ${mrms_basedir_raw} || print_err_msg_exit "\ Failed to remove raw directories and files." diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index f4cddfe19a..b65e3173f1 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -185,7 +185,7 @@ mv_or_cp="cp" # If the raw directories and files are to be removed at the end of this # script, no need to copy the files since the raw directories are going # to be removed anyway. -if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then +if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then mv_or_cp="mv" fi @@ -228,7 +228,7 @@ arcv_hr = ${arcv_hr}" fi done - if [[ ${do_retrieve} == "TRUE" ]]; then + if [[ $(boolify "${do_retrieve}") == "TRUE" ]]; then # Make sure the raw quarter-daily directory exists because it is used # below as the output directory of the retrieve_data.py script (so if @@ -305,7 +305,7 @@ done # #----------------------------------------------------------------------- # -if [[ "${REMOVE_RAW_OBS}" == "TRUE" ]]; then +if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then print_info_msg "Removing raw directories and files..." rm -rf ${basedir_raw} || print_err_msg_exit "\ Failed to remove raw directories and files." From 6425b822ea8b26642d48c07ab6750c8b9d1c8b78 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 10 Sep 2024 01:21:13 -0600 Subject: [PATCH 076/260] Make script more compact. --- scripts/exregional_get_verif_obs.sh | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index fbd751718f..4e981b3958 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -135,21 +135,17 @@ done # #----------------------------------------------------------------------- # -if [[ ${OBTYPE} == "CCPA" ]]; then - $USHdir/get_obs_ccpa.sh -elif [[ ${OBTYPE} == "MRMS" ]]; then - $USHdir/get_obs_mrms.sh -elif [[ ${OBTYPE} == "NDAS" ]]; then - $USHdir/get_obs_ndas.sh -elif [[ ${OBTYPE} == "NOHRSC" ]]; then - $USHdir/get_obs_nohrsc.sh -else +valid_obtypes=("CCPA" "MRMS" "NDAS" "NOHRSC") +if [[ ! ${valid_obtypes[@]} =~ ${OBTYPE} ]]; then print_err_msg_exit "\ -Invalid OBTYPE specified for script: +Invalid observation type (OBTYPE) specified for script: OBTYPE = \"${OBTYPE}\" -Valid options are CCPA, MRMS, NDAS, and NOHRSC. +Valid observation types are: + $(printf "\"%s\" " ${valid_obtypes[@]}) " fi +script_bn="get_obs_$(echo_lowercase ${OBTYPE})" +$USHdir/${script_bn}.sh # #----------------------------------------------------------------------- # @@ -158,9 +154,8 @@ fi # #----------------------------------------------------------------------- # -obtype=$(echo_lowercase ${OBTYPE}) mkdir -p ${WFLOW_FLAG_FILES_DIR} -touch "${WFLOW_FLAG_FILES_DIR}/get_obs_${obtype}_${PDY}_complete.txt" +touch "${WFLOW_FLAG_FILES_DIR}/${script_bn}_${PDY}_complete.txt" # #----------------------------------------------------------------------- # From 24c8c5912e3b6416bfff3623c55445fd87a30fa8 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 10 Sep 2024 01:25:32 -0600 Subject: [PATCH 077/260] Bug fixes. --- parm/wflow/verify_pre.yaml | 8 ++++---- ush/get_obs_mrms.sh | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 6c1b8ff2b5..c6e33e4565 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -35,7 +35,7 @@ task_get_obs_ccpa: '{%- for i in range(0, (workflow.OUTPUT_TIMES_ALL_CYCLES_CUMUL|length)) %} {{- " " ~ workflow.OUTPUT_TIMES_ALL_CYCLES_CUMUL[i] }} {%- endfor %}' - REMOVE_RAW_OBS: 'platform.REMOVE_RAW_OBS_CCPA' + REMOVE_RAW_OBS: '{{ platform.REMOVE_RAW_OBS_CCPA }}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -49,7 +49,7 @@ task_get_obs_nohrsc: OBS_DIR: '&NOHRSC_OBS_DIR;' OBTYPE: 'NOHRSC' OUTPUT_TIMES_ALL: *output_times_all_cumul - REMOVE_RAW_OBS: 'platform.REMOVE_RAW_OBS_NOHRSC' + REMOVE_RAW_OBS: '{{ platform.REMOVE_RAW_OBS_NOHRSC }}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -67,7 +67,7 @@ task_get_obs_mrms: '{%- for i in range(0, (workflow.OUTPUT_TIMES_ALL_CYCLES_INST|length)) %} {{- " " ~ workflow.OUTPUT_TIMES_ALL_CYCLES_INST[i] }} {%- endfor %}' - REMOVE_RAW_OBS: 'platform.REMOVE_RAW_OBS_MRMS' + REMOVE_RAW_OBS: '{{ platform.REMOVE_RAW_OBS_MRMS }}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -81,7 +81,7 @@ task_get_obs_ndas: OBS_DIR: '&NDAS_OBS_DIR;' OBTYPE: 'NDAS' OUTPUT_TIMES_ALL: *output_times_all_inst - REMOVE_RAW_OBS: 'platform.REMOVE_RAW_OBS_NDAS' + REMOVE_RAW_OBS: '{{ platform.REMOVE_RAW_OBS_NDAS }}' queue: "&QUEUE_HPSS;" native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index d82ba842c0..377ffb25c0 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -262,6 +262,6 @@ done # if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then print_info_msg "Removing raw directories and files..." - rm -rf ${mrms_basedir_raw} || print_err_msg_exit "\ + rm -rf ${basedir_raw} || print_err_msg_exit "\ Failed to remove raw directories and files." fi From 2622f8b47809449ff797a80f5cdb883b406bd2ee Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 10 Sep 2024 10:27:39 -0600 Subject: [PATCH 078/260] Get REMOVE_RAW_OBS... variables directly from var_defns.yaml file instead of setting it as an enviroment variable in the rocoto xml. This makes it possible to change their settings by chaning the values in var_defns.yaml and rerunning the get_obs_... tasks. --- parm/wflow/verify_pre.yaml | 4 ---- ush/get_obs_ccpa.sh | 15 +++++++++------ ush/get_obs_mrms.sh | 15 +++++++++------ ush/get_obs_ndas.sh | 15 +++++++++------ 4 files changed, 27 insertions(+), 22 deletions(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index c6e33e4565..4a9e750c56 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -35,7 +35,6 @@ task_get_obs_ccpa: '{%- for i in range(0, (workflow.OUTPUT_TIMES_ALL_CYCLES_CUMUL|length)) %} {{- " " ~ workflow.OUTPUT_TIMES_ALL_CYCLES_CUMUL[i] }} {%- endfor %}' - REMOVE_RAW_OBS: '{{ platform.REMOVE_RAW_OBS_CCPA }}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -49,7 +48,6 @@ task_get_obs_nohrsc: OBS_DIR: '&NOHRSC_OBS_DIR;' OBTYPE: 'NOHRSC' OUTPUT_TIMES_ALL: *output_times_all_cumul - REMOVE_RAW_OBS: '{{ platform.REMOVE_RAW_OBS_NOHRSC }}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -67,7 +65,6 @@ task_get_obs_mrms: '{%- for i in range(0, (workflow.OUTPUT_TIMES_ALL_CYCLES_INST|length)) %} {{- " " ~ workflow.OUTPUT_TIMES_ALL_CYCLES_INST[i] }} {%- endfor %}' - REMOVE_RAW_OBS: '{{ platform.REMOVE_RAW_OBS_MRMS }}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -81,7 +78,6 @@ task_get_obs_ndas: OBS_DIR: '&NDAS_OBS_DIR;' OBTYPE: 'NDAS' OUTPUT_TIMES_ALL: *output_times_all_inst - REMOVE_RAW_OBS: '{{ platform.REMOVE_RAW_OBS_NDAS }}' queue: "&QUEUE_HPSS;" native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index 21f85e2ff4..6d217c43c2 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -8,7 +8,7 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user ; do +for sect in user platform ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done @@ -322,10 +322,13 @@ fi # Whether to move or copy files from raw to processed directories. #mv_or_cp="mv" mv_or_cp="cp" +# Whether to remove raw observations after processed directories have +# been created from them. +remove_raw_obs="${REMOVE_RAW_OBS_CCPA}" # If the raw directories and files are to be removed at the end of this # script, no need to copy the files since the raw directories are going # to be removed anyway. -if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then +if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then mv_or_cp="mv" fi @@ -450,12 +453,12 @@ done # #----------------------------------------------------------------------- # -# Clean up raw directories. +# Clean up raw obs directories. # #----------------------------------------------------------------------- # -if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then - print_info_msg "Removing raw directories and files..." +if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then + print_info_msg "Removing raw obs directories..." rm -rf ${basedir_raw} || print_err_msg_exit "\ -Failed to remove raw directories and files." +Failed to remove raw obs directories." fi diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index 377ffb25c0..4fbe503b25 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -8,7 +8,7 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user ; do +for sect in user platform ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done @@ -172,10 +172,13 @@ fi # Whether to move or copy files from raw to processed directories. #mv_or_cp="mv" mv_or_cp="cp" +# Whether to remove raw observations after processed directories have +# been created from them. +remove_raw_obs="${REMOVE_RAW_OBS_MRMS}" # If the raw directories and files are to be removed at the end of this # script, no need to copy the files since the raw directories are going # to be removed anyway. -if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then +if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then mv_or_cp="mv" fi @@ -256,12 +259,12 @@ done # #----------------------------------------------------------------------- # -# Clean up raw directories. +# Clean up raw obs directories. # #----------------------------------------------------------------------- # -if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then - print_info_msg "Removing raw directories and files..." +if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then + print_info_msg "Removing raw obs directories..." rm -rf ${basedir_raw} || print_err_msg_exit "\ -Failed to remove raw directories and files." +Failed to remove raw obs directories." fi diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index b65e3173f1..23e0496e35 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -8,7 +8,7 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user ; do +for sect in user platform ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done @@ -182,10 +182,13 @@ fi # Whether to move or copy files from raw to processed directories. #mv_or_cp="mv" mv_or_cp="cp" +# Whether to remove raw observations after processed directories have +# been created from them. +remove_raw_obs="${REMOVE_RAW_OBS_NDAS}" # If the raw directories and files are to be removed at the end of this # script, no need to copy the files since the raw directories are going # to be removed anyway. -if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then +if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then mv_or_cp="mv" fi @@ -301,12 +304,12 @@ done # #----------------------------------------------------------------------- # -# Clean up raw directories. +# Clean up raw obs directories. # #----------------------------------------------------------------------- # -if [[ $(boolify "${REMOVE_RAW_OBS}") == "TRUE" ]]; then - print_info_msg "Removing raw directories and files..." +if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then + print_info_msg "Removing raw obs directories..." rm -rf ${basedir_raw} || print_err_msg_exit "\ -Failed to remove raw directories and files." +Failed to remove raw obs directories." fi From 2a379983419e636149656ac8d84f1fe10250a55e Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 11 Sep 2024 16:48:05 -0600 Subject: [PATCH 079/260] Minor fixes to comments and output messages. --- ush/bash_utils/eval_METplus_timestr_tmpl.sh | 15 ++++++++------- ush/set_cycle_and_obs_timeinfo.py | 3 ++- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/ush/bash_utils/eval_METplus_timestr_tmpl.sh b/ush/bash_utils/eval_METplus_timestr_tmpl.sh index 572f7c68c4..c3e19c8935 100644 --- a/ush/bash_utils/eval_METplus_timestr_tmpl.sh +++ b/ush/bash_utils/eval_METplus_timestr_tmpl.sh @@ -92,7 +92,8 @@ function eval_METplus_timestr_tmpl() { # if [ -z "${METplus_timestr_tmpl}" ]; then print_err_msg_exit "\ -The specified METplus time string template (METplus_timestr_tmpl) cannot be empty: +The specified METplus time string template (METplus_timestr_tmpl) cannot +be empty: METplus_timestr_tmpl = \"${METplus_timestr_tmpl}\"" fi @@ -100,21 +101,21 @@ The specified METplus time string template (METplus_timestr_tmpl) cannot be empt if [[ ${init_time} =~ ^[0-9]+$ ]]; then if [ "$len" -ne 10 ] && [ "$len" -ne 12 ] && [ "$len" -ne 14 ]; then print_err_msg_exit "\ -The specified initial time string (init_time) must contain exactly 10, -12, or 14 integers (but contains $len): +The specified initial time string (init_time) must contain 10, 12, or 14 +digits (but contains $len): init_time = \"${init_time}\"" fi else print_err_msg_exit "\ -The specified initial time string (init_time) must consist of only -integers and cannot be empty: +The specified initial time string (init_time) must consist of digits only +and cannot be empty: init_time = \"${init_time}\"" fi if ! [[ $fhr =~ ^[0-9]+$ ]]; then print_err_msg_exit "\ -The specified forecast hour (fhr) must consist of only integers and -cannot be empty: +The specified forecast hour (fhr) must consist of digits only and cannot +be empty: fhr = \"${fhr}\"" fi # diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index c3969d8ef3..36635b643e 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -49,7 +49,8 @@ def set_fcst_output_times_and_obs_days_all_cycles( observations are needed to perform verification (each element of the form 'YYYYMMDD'). The first pair of lists is for instantaneous output fields (e.g. REFC, RETOP, T2m), and the second pair is for cumulative ones (e.g. - APCP or accumulated precipitation). + APCP or accumulated precipitation). The accumulation period for the latter + is the forecast output interval. Args: start_time_first_cycl: From 5160d3ccedf78f36c849846f01d3961500ae4a4e Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 12 Sep 2024 14:54:06 -0600 Subject: [PATCH 080/260] Fix typos. --- ush/get_obs_ccpa.sh | 2 +- ush/get_obs_mrms.sh | 2 +- ush/get_obs_ndas.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index 6d217c43c2..54537b9e97 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -174,7 +174,7 @@ yyyymmdd_task=${PDY} basedir_proc=${OBS_DIR} # The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a -# scalar string containing all relevant forecast output times (each) in +# scalar string containing all relevant forecast output times (each in # the form YYYYMMDDHH) separated by spaces. It isn't an array of strings # because in ROCOTO, there doesn't seem to be a way to pass a bash array # from the XML to task's script. To have an array-valued variable to diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index 4fbe503b25..d4e32a4c54 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -86,7 +86,7 @@ yyyymmdd_task=${PDY} basedir_proc=${OBS_DIR} # The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a -# scalar string containing all relevant forecast output times (each) in +# scalar string containing all relevant forecast output times (each in # the form YYYYMMDDHH) separated by spaces. It isn't an array of strings # because in ROCOTO, there doesn't seem to be a way to pass a bash array # from the XML to task's script. To have an array-valued variable to diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index 23e0496e35..b69d4cb1bc 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -60,7 +60,7 @@ yyyymmdd_task=${PDY} basedir_proc=${OBS_DIR} # The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a -# scalar string containing all relevant forecast output times (each) in +# scalar string containing all relevant forecast output times (each in # the form YYYYMMDDHH) separated by spaces. It isn't an array of strings # because in ROCOTO, there doesn't seem to be a way to pass a bash array # from the XML to task's script. To have an array-valued variable to From f910f6fcb98ec8e0fe09d7eaeda036dfbf581c3b Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 12 Sep 2024 14:56:54 -0600 Subject: [PATCH 081/260] Fix more typos. --- ush/get_obs_ccpa.sh | 2 +- ush/get_obs_mrms.sh | 2 +- ush/get_obs_ndas.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index 54537b9e97..1eee37339c 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -177,7 +177,7 @@ basedir_proc=${OBS_DIR} # scalar string containing all relevant forecast output times (each in # the form YYYYMMDDHH) separated by spaces. It isn't an array of strings # because in ROCOTO, there doesn't seem to be a way to pass a bash array -# from the XML to task's script. To have an array-valued variable to +# from the XML to the task's script. To have an array-valued variable to # work with, here, we create the new variable output_times_all that is # the array-valued counterpart of OUTPUT_TIMES_ALL. output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index d4e32a4c54..6d84141eab 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -89,7 +89,7 @@ basedir_proc=${OBS_DIR} # scalar string containing all relevant forecast output times (each in # the form YYYYMMDDHH) separated by spaces. It isn't an array of strings # because in ROCOTO, there doesn't seem to be a way to pass a bash array -# from the XML to task's script. To have an array-valued variable to +# from the XML to the task's script. To have an array-valued variable to # work with, here, we create the new variable output_times_all that is # the array-valued counterpart of OUTPUT_TIMES_ALL. output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index b69d4cb1bc..595ecc2688 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -63,7 +63,7 @@ basedir_proc=${OBS_DIR} # scalar string containing all relevant forecast output times (each in # the form YYYYMMDDHH) separated by spaces. It isn't an array of strings # because in ROCOTO, there doesn't seem to be a way to pass a bash array -# from the XML to task's script. To have an array-valued variable to +# from the XML to the task's script. To have an array-valued variable to # work with, here, we create the new variable output_times_all that is # the array-valued counterpart of OUTPUT_TIMES_ALL. output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) From de0b7f8e7a67fdfb5f9de02adba60732d1429409 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Thu, 12 Sep 2024 23:39:02 +0000 Subject: [PATCH 082/260] Convert all uses of log_info in setup.py to use logging module --- ush/setup.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/ush/setup.py b/ush/setup.py index 79bdf2ba8d..adb9ae0926 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -13,7 +13,6 @@ from uwtools.api.config import get_yaml_config from python_utils import ( - log_info, cd_vrfy, date_to_str, mkdir_vrfy, @@ -366,7 +365,7 @@ def setup(USHdir, user_config_fn="config.yaml", debug: bool = False): logger = logging.getLogger(__name__) # print message - log_info( + logger.info( f""" ======================================================================== Starting function setup() in \"{os.path.basename(__file__)}\"... @@ -395,11 +394,11 @@ def setup(USHdir, user_config_fn="config.yaml", debug: bool = False): workflow_config = expt_config["workflow"] workflow_id = workflow_config["WORKFLOW_ID"] - log_info(f"""WORKFLOW ID = {workflow_id}""") + logger.info(f"""WORKFLOW ID = {workflow_id}""") debug = workflow_config.get("DEBUG") if debug: - log_info( + logger.info( """ Setting VERBOSE to \"TRUE\" because DEBUG has been set to \"TRUE\"...""" ) @@ -611,7 +610,7 @@ def remove_tag(tasks, tag): if not vx_fields_obstype: for metatask in vx_metatasks_all[obstype]: if metatask in rocoto_config['tasks']: - logging.info(dedent( + logger.info(dedent( f""" Removing verification [meta]task "{metatask}" @@ -884,7 +883,7 @@ def get_location(xcs, fmt, expt_cfg): ) expt_config["grid_params"] = grid_params elif not run_any_coldstart_task: - log_info("No coldstart tasks specified, not setting grid parameters") + logger.warning("No coldstart tasks specified, not setting grid parameters") else: errmsg = dedent( f""" @@ -1444,8 +1443,8 @@ def get_location(xcs, fmt, expt_cfg): if workflow_config["SDF_USES_THOMPSON_MP"]: - logging.debug(f'Selected CCPP suite ({workflow_config["CCPP_PHYS_SUITE"]}) uses Thompson MP') - logging.debug(f'Setting up links for additional fix files') + logger.debug(f'Selected CCPP suite ({workflow_config["CCPP_PHYS_SUITE"]}) uses Thompson MP') + logger.debug(f'Setting up links for additional fix files') # If the model ICs or BCs are not from RAP or HRRR, they will not contain aerosol # climatology data needed by the Thompson scheme, so we need to provide a separate file @@ -1461,8 +1460,8 @@ def get_location(xcs, fmt, expt_cfg): for fix_file in fixed_files["THOMPSON_FIX_FILES"]: fixed_files["CYCLEDIR_LINKS_TO_FIXam_FILES_MAPPING"].append(f"{fix_file} | {fix_file}") - logging.debug(f'New fix file list:\n{fixed_files["FIXgsm_FILES_TO_COPY_TO_FIXam"]=}') - logging.debug(f'New fix file mapping:\n{fixed_files["CYCLEDIR_LINKS_TO_FIXam_FILES_MAPPING"]=}') + logger.debug(f'New fix file list:\n{fixed_files["FIXgsm_FILES_TO_COPY_TO_FIXam"]=}') + logger.debug(f'New fix file mapping:\n{fixed_files["CYCLEDIR_LINKS_TO_FIXam_FILES_MAPPING"]=}') # @@ -1482,11 +1481,11 @@ def get_location(xcs, fmt, expt_cfg): # print content of var_defns if DEBUG=True all_lines = cfg_to_yaml_str(expt_config) - log_info(all_lines, verbose=debug) + logger.debug(all_lines) global_var_defns_fp = workflow_config["GLOBAL_VAR_DEFNS_FP"] # print info message - log_info( + logger.info( f""" Generating the global experiment variable definitions file here: GLOBAL_VAR_DEFNS_FP = '{global_var_defns_fp}' From 84adc80ad6b5268b9f98ede6c2b2dbfd0bd206fd Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Thu, 12 Sep 2024 23:43:12 +0000 Subject: [PATCH 083/260] Simplify ASCII2NC tasks: - only need one task per obtype - Don't need call to set_vx_params, as those variables don't appear in the config file --- parm/wflow/verify_pre.yaml | 3 ++- scripts/exregional_run_met_ascii2nc_obs.sh | 29 ---------------------- 2 files changed, 2 insertions(+), 30 deletions(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 250ec2604b..fa3f536e92 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -132,10 +132,11 @@ task_run_MET_Pb2nc_obs: metatask_ASCII2nc_obs: var: METAVAR: '{% for var in verification.VX_FIELDS %}{% if var in ["AOD", "PM25", "PM10"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + METAVAR: '{% for var in verification.VX_FIELDS %}{% if var =="AOD" %}AOD {% elif var in ["PM25", "PM10"] %}PM25 {% endif %}{% endfor %}' METAOBTYPE: '{% for var in verification.VX_FIELDS %}{% if var =="AOD" %}AERONET {% elif var in ["PM25", "PM10"] %}AIRNOW {% endif %}{% endfor %}' METAOBS_DIR: '{% for var in verification.VX_FIELDS %}{% if var =="AOD" %}&AERONET_OBS_DIR; {% elif var in ["PM25", "PM10"] %}&AIRNOW_OBS_DIR; {% endif %}{% endfor %}' TASKDEP: '{% for var in verification.VX_FIELDS %}{% if var =="AOD" %}get_obs_aeronet {% elif var in ["PM25", "PM10"] %}get_obs_airnow {% endif %}{% endfor %}' - task_run_MET_ASCII2nc_obs_#METAVAR#: + task_run_MET_ASCII2nc_obs_#METAOBTYPE#: <<: *default_task_verify_pre attrs: cycledefs: forecast diff --git a/scripts/exregional_run_met_ascii2nc_obs.sh b/scripts/exregional_run_met_ascii2nc_obs.sh index 2c3f141eb0..a0ea10f33d 100755 --- a/scripts/exregional_run_met_ascii2nc_obs.sh +++ b/scripts/exregional_run_met_ascii2nc_obs.sh @@ -73,29 +73,6 @@ CDATE="${PDY}${cyc}" # #----------------------------------------------------------------------- # -# Set various verification parameters associated with the field to be -# verified. Not all of these are necessarily used later below but are -# set here for consistency with other verification ex-scripts. -# -#----------------------------------------------------------------------- -# -FIELDNAME_IN_OBS_INPUT="" -FIELDNAME_IN_FCST_INPUT="" -FIELDNAME_IN_MET_OUTPUT="" -FIELDNAME_IN_MET_FILEDIR_NAMES="" - -set_vx_params \ - obtype="${OBTYPE}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - outvarname_grid_or_point="grid_or_point" \ - outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ - outvarname_fieldname_in_fcst_input="FIELDNAME_IN_FCST_INPUT" \ - outvarname_fieldname_in_MET_output="FIELDNAME_IN_MET_OUTPUT" \ - outvarname_fieldname_in_MET_filedir_names="FIELDNAME_IN_MET_FILEDIR_NAMES" -# -#----------------------------------------------------------------------- -# # Set paths and file templates for input to and output from the MET/ # METplus tool to be run as well as other file/directory parameters. # @@ -256,13 +233,7 @@ settings="\ # # Field information. # - 'fieldname_in_obs_input': '${FIELDNAME_IN_OBS_INPUT}' - 'fieldname_in_fcst_input': '${FIELDNAME_IN_FCST_INPUT}' - 'fieldname_in_met_output': '${FIELDNAME_IN_MET_OUTPUT}' - 'fieldname_in_met_filedir_names': '${FIELDNAME_IN_MET_FILEDIR_NAMES}' 'obtype': '${OBTYPE}' - 'accum_hh': '${ACCUM_HH:-}' - 'accum_no_pad': '${ACCUM_NO_PAD:-}' " # Render the template to create a METplus configuration file From bb8582c2170182a04fa60db73bc7ee6de9b69902 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Thu, 12 Sep 2024 23:44:55 +0000 Subject: [PATCH 084/260] Same treatment to pb2nc: no need to call set_vx_params --- scripts/exregional_run_met_pb2nc_obs.sh | 29 ------------------------- 1 file changed, 29 deletions(-) diff --git a/scripts/exregional_run_met_pb2nc_obs.sh b/scripts/exregional_run_met_pb2nc_obs.sh index ce07f82942..2166992ef3 100755 --- a/scripts/exregional_run_met_pb2nc_obs.sh +++ b/scripts/exregional_run_met_pb2nc_obs.sh @@ -81,29 +81,6 @@ CDATE="${PDY}${cyc}" # #----------------------------------------------------------------------- # -# Set various verification parameters associated with the field to be -# verified. Not all of these are necessarily used later below but are -# set here for consistency with other verification ex-scripts. -# -#----------------------------------------------------------------------- -# -FIELDNAME_IN_OBS_INPUT="" -FIELDNAME_IN_FCST_INPUT="" -FIELDNAME_IN_MET_OUTPUT="" -FIELDNAME_IN_MET_FILEDIR_NAMES="" - -set_vx_params \ - obtype="${OBTYPE}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - outvarname_grid_or_point="grid_or_point" \ - outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ - outvarname_fieldname_in_fcst_input="FIELDNAME_IN_FCST_INPUT" \ - outvarname_fieldname_in_MET_output="FIELDNAME_IN_MET_OUTPUT" \ - outvarname_fieldname_in_MET_filedir_names="FIELDNAME_IN_MET_FILEDIR_NAMES" -# -#----------------------------------------------------------------------- -# # Set paths and file templates for input to and output from the MET/ # METplus tool to be run as well as other file/directory parameters. # @@ -266,13 +243,7 @@ settings="\ # # Field information. # - 'fieldname_in_obs_input': '${FIELDNAME_IN_OBS_INPUT}' - 'fieldname_in_fcst_input': '${FIELDNAME_IN_FCST_INPUT}' - 'fieldname_in_met_output': '${FIELDNAME_IN_MET_OUTPUT}' - 'fieldname_in_met_filedir_names': '${FIELDNAME_IN_MET_FILEDIR_NAMES}' 'obtype': '${OBTYPE}' - 'accum_hh': '${ACCUM_HH:-}' - 'accum_no_pad': '${ACCUM_NO_PAD:-}' " # Render the template to create a METplus configuration file From aa7188b7897c8497be2817bb0e056358e932c9bc Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 13 Sep 2024 16:53:30 -0600 Subject: [PATCH 085/260] Fix up comments. --- ush/get_obs_ccpa.sh | 10 +++++----- ush/get_obs_mrms.sh | 2 +- ush/get_obs_ndas.sh | 10 +++++----- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index 1eee37339c..79ce882da3 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -235,15 +235,15 @@ fi # Sequence interval must be 6 hours because the archives are 6-hourly. arcv_hr_incr=6 -# Initial guess for starting archive hour. This is set to the hour -# corresponding to the first forecast output time of the day. +# Initial guess for starting archive hour. This is set to the archive +# hour containing obs at the first forecast output time of the day. hh_first=$(echo ${output_times_crnt_day[0]} | cut -c9-10) hr_first=$((10#${hh_first})) arcv_hr_start=$(ceil ${hr_first} ${arcv_hr_incr}) arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) -# Ending archive hour. This is set to the hour corresponding to the last -# forecast output time of the day. +# Ending archive hour. This is set to the archive hour containing obs at +# the last forecast output time of the day. hh_last=$(echo ${output_times_crnt_day[-1]} | cut -c9-10) hr_last=$((10#${hh_last})) if [[ ${hr_last} -eq 0 ]]; then @@ -255,7 +255,7 @@ fi # Check whether any obs files already exist on disk. If so, adjust the # starting archive hour. In the process, keep a count of the number of -# files that already exist on disk. +# obs files that already exist on disk. num_existing_files=0 for yyyymmddhh in ${output_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index 6d84141eab..6aac0159b3 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -116,7 +116,7 @@ fi # Check whether any obs files already exist on disk. If so, adjust the # starting archive hour. In the process, keep a count of the number of -# files that already exist on disk. +# obs files that already exist on disk. num_existing_files=0 num_mrms_fields=${#mrms_fields[@]} for (( i=0; i<${num_mrms_fields}; i++ )); do diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index 595ecc2688..2954552412 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -102,21 +102,21 @@ fi # Sequence interval must be 6 hours because the archives are 6-hourly. arcv_hr_incr=6 -# Initial guess for starting archive hour. This is set to the hour -# corresponding to the first forecast output time of the day. +# Initial guess for starting archive hour. This is set to the archive +# hour containing obs at the first forecast output time of the day. hh_first=$(echo ${output_times_crnt_day[0]} | cut -c9-10) hr_first=$((10#${hh_first})) arcv_hr_start=$(( (hr_first/arcv_hr_incr + 1)*arcv_hr_incr )) -# Ending archive hour. This is set to the hour corresponding to the last -# forecast output time of the day. +# Ending archive hour. This is set to the archive hour containing obs at +# the last forecast output time of the day. hh_last=$(echo ${output_times_crnt_day[-1]} | cut -c9-10) hr_last=$((10#${hh_last})) arcv_hr_end=$(( (hr_last/arcv_hr_incr + 1)*arcv_hr_incr )) # Check whether any obs files already exist on disk. If so, adjust the # starting archive hour. In the process, keep a count of the number of -# files that already exist on disk. +# obs files that already exist on disk. num_existing_files=0 for yyyymmddhh in ${output_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) From 3c794f258b1e827ec0b484ab030f14220b20d6a8 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 16 Sep 2024 13:41:42 -0600 Subject: [PATCH 086/260] Add new function and clean up existing. --- ush/bash_utils/eval_METplus_timestr_tmpl.sh | 232 ++++++++++++++++---- 1 file changed, 194 insertions(+), 38 deletions(-) diff --git a/ush/bash_utils/eval_METplus_timestr_tmpl.sh b/ush/bash_utils/eval_METplus_timestr_tmpl.sh index c3e19c8935..ae4a1c7ebf 100644 --- a/ush/bash_utils/eval_METplus_timestr_tmpl.sh +++ b/ush/bash_utils/eval_METplus_timestr_tmpl.sh @@ -1,8 +1,9 @@ # #----------------------------------------------------------------------- # -# This file defines a function that evaluates a METplus time-string -# template. +# This function evaluates a METplus time-string template, i.e. a string +# (e.g. a file name template) containing one or more METplus time- +# formatting strings. # #----------------------------------------------------------------------- # @@ -49,7 +50,7 @@ function eval_METplus_timestr_tmpl() { "init_time" \ "fhr" \ "METplus_timestr_tmpl" \ - "outvarname_formatted_time" \ + "outvarname_evaluated_timestr" \ ) process_args valid_args "$@" # @@ -69,16 +70,171 @@ function eval_METplus_timestr_tmpl() { # #----------------------------------------------------------------------- # - local fmt \ - formatted_time \ + local crnt_timefmt \ + crnt_timefmt_esc \ + evaluated_timestr \ + regex_search_tmpl \ + the_time \ + tmpl_remainder +# +#----------------------------------------------------------------------- +# +# Loop over all METplus time-formatting strings in the given METplus +# template and evaluate each using the given initial time (init_time) and +# forecast hour (fhr). +# +# Note that the while-loop below is over all METplus time-formatting +# strings of the form {...} in the template METplus_timestr_tmpl; it +# continues until all such time-formatting strings have been evaluated +# to actual times. +# +#----------------------------------------------------------------------- +# +# Regular expression used by the sed utility below to pick out the next +# METplus time-formatting string in the given METplus time-string template. +# + regex_search_tmpl="(.*)(\{.*\})(.*)" +# +# Initialize while-loop variables. +# + evaluated_timestr="${METplus_timestr_tmpl}" + + crnt_timefmt=$( printf "%s" "${METplus_timestr_tmpl}" | \ + $SED -n -r -e "s|${regex_search_tmpl}|\2|p" ) + tmpl_remainder=$( printf "%s" "${METplus_timestr_tmpl}" | \ + $SED -n -r -e "s|${regex_search_tmpl}|\1\3|p" ) + + while [ ! -z "${crnt_timefmt}" ]; do + + eval_single_METplus_timefmt \ + init_time="${init_time}" \ + fhr="${fhr}" \ + METplus_timefmt="${crnt_timefmt}" \ + outvarname_evaluated_timefmt="the_time" +# +# Replace the next METplus time string in evaluated_timestr with an actual +# time. +# +# Note that when using sed, we need to escape various characters (question +# mark, closing and opening curly braces, etc) in the METplus template in +# order for the sed command below to work properly. +# + crnt_timefmt_esc=$( echo "${crnt_timefmt}" | \ + $SED -r -e "s/\?/\\\?/g" -e "s/\{/\\\{/g" -e "s/\}/\\\}/g" ) + evaluated_timestr=$( echo "${evaluated_timestr}" | \ + $SED -n -r "s|(.*)(${crnt_timefmt_esc})(.*)|\1${the_time}\3|p" ) +# +# Set up values for the next iteration of the while-loop. +# + crnt_timefmt=$( printf "%s" "${tmpl_remainder}" | \ + $SED -n -r -e "s|${regex_search_tmpl}|\2|p" ) + tmpl_remainder=$( printf "%s" "${tmpl_remainder}" | \ + $SED -n -r -e "s|${regex_search_tmpl}|\1\3|p" ) + + done +# +#----------------------------------------------------------------------- +# +# Set output variables. +# +#----------------------------------------------------------------------- +# + if [ ! -z "${outvarname_evaluated_timestr}" ]; then + printf -v ${outvarname_evaluated_timestr} "%s" "${evaluated_timestr}" + fi +# +#----------------------------------------------------------------------- +# +# Restore the shell options saved at the beginning of this script/function. +# +#----------------------------------------------------------------------- +# + { restore_shell_opts; } > /dev/null 2>&1 + +} + +# +#----------------------------------------------------------------------- +# +# This function uses the specified initial forecast time and forecast +# hour to evaluate a single METplus time-formatting string and return +# the corresponding time. +# +#----------------------------------------------------------------------- +# +function eval_single_METplus_timefmt() { +# +#----------------------------------------------------------------------- +# +# Save current shell options (in a global array). Then set new options +# for this script/function. +# +#----------------------------------------------------------------------- +# + { save_shell_opts; . ${USHdir}/preamble.sh; } > /dev/null 2>&1 +# +#----------------------------------------------------------------------- +# +# Get the full path to the file in which this script/function is located +# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in +# which the file is located (scrfunc_dir). +# +#----------------------------------------------------------------------- +# + local scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) + local scrfunc_fn=$( basename "${scrfunc_fp}" ) + local scrfunc_dir=$( dirname "${scrfunc_fp}" ) +# +#----------------------------------------------------------------------- +# +# Get the name of this function. +# +#----------------------------------------------------------------------- +# + local func_name="${FUNCNAME[0]}" +# +#----------------------------------------------------------------------- +# +# Specify the set of valid argument names for this script/function. Then +# process the arguments provided to this script/function (which should +# consist of a set of name-value pairs of the form arg1="value1", etc). +# +#----------------------------------------------------------------------- +# + local valid_args=( \ + "init_time" \ + "fhr" \ + "METplus_timefmt" \ + "outvarname_evaluated_timefmt" \ + ) + process_args valid_args "$@" +# +#----------------------------------------------------------------------- +# +# For debugging purposes, print out values of arguments passed to this +# script. Note that these will be printed out only if VERBOSE is set to +# TRUE. +# +#----------------------------------------------------------------------- +# + print_input_args "valid_args" +# +#----------------------------------------------------------------------- +# +# Declare local variables. +# +#----------------------------------------------------------------------- +# + local evaluated_timefmt \ + fmt \ hh_init \ init_time_str \ lead_hrs \ len \ - mn_init \ - METplus_time_fmt \ + METplus_time_codes \ METplus_time_shift \ METplus_time_type \ + mn_init \ regex_search \ ss_init \ valid_time_str \ @@ -90,25 +246,25 @@ function eval_METplus_timestr_tmpl() { # #----------------------------------------------------------------------- # - if [ -z "${METplus_timestr_tmpl}" ]; then + if [ -z "${METplus_timefmt}" ]; then print_err_msg_exit "\ -The specified METplus time string template (METplus_timestr_tmpl) cannot -be empty: - METplus_timestr_tmpl = \"${METplus_timestr_tmpl}\"" +The specified METplus time-formatting string (METplus_timefmt) cannot be +empty: + METplus_timefmt = \"${METplus_timefmt}\"" fi len=${#init_time} if [[ ${init_time} =~ ^[0-9]+$ ]]; then if [ "$len" -ne 10 ] && [ "$len" -ne 12 ] && [ "$len" -ne 14 ]; then print_err_msg_exit "\ -The specified initial time string (init_time) must contain 10, 12, or 14 -digits (but contains $len): +The specified initial time (init_time) must contain 10, 12, or 14 digits +but instead contains $len: init_time = \"${init_time}\"" fi else print_err_msg_exit "\ -The specified initial time string (init_time) must consist of digits only -and cannot be empty: +The specified initial time (init_time) must consist of digits only and +cannot be empty: init_time = \"${init_time}\"" fi @@ -150,11 +306,11 @@ be empty: # regex_search="^\{(init|valid|lead)(\?)(fmt=)([^\?]*)(\?)?(shift=)?([^\?]*)?\}" METplus_time_type=$( \ - printf "%s" "${METplus_timestr_tmpl}" | $SED -n -r -e "s/${regex_search}/\1/p" ) - METplus_time_fmt=$( \ - printf "%s" "${METplus_timestr_tmpl}" | $SED -n -r -e "s/${regex_search}/\4/p" ) + printf "%s" "${METplus_timefmt}" | $SED -n -r -e "s/${regex_search}/\1/p" ) + METplus_time_codes=$( \ + printf "%s" "${METplus_timefmt}" | $SED -n -r -e "s/${regex_search}/\4/p" ) METplus_time_shift=$( \ - printf "%s" "${METplus_timestr_tmpl}" | $SED -n -r -e "s/${regex_search}/\7/p" ) + printf "%s" "${METplus_timefmt}" | $SED -n -r -e "s/${regex_search}/\7/p" ) # #----------------------------------------------------------------------- # @@ -163,9 +319,9 @@ be empty: # #----------------------------------------------------------------------- # - case "${METplus_time_fmt}" in + case "${METplus_time_codes}" in "%Y%m%d%H"|"%Y%m%d"|"%H%M%S") - fmt="${METplus_time_fmt}" + fmt="${METplus_time_codes}" ;; "%H") # @@ -178,7 +334,7 @@ be empty: if [ "${METplus_time_type}" = "lead" ]; then fmt="%02.0f" else - fmt="${METplus_time_fmt}" + fmt="${METplus_time_codes}" fi ;; "%HHH") @@ -194,10 +350,10 @@ be empty: ;; *) print_err_msg_exit "\ -Unsupported METplus time format: - METplus_time_fmt = \"${METplus_time_fmt}\" -METplus time string template passed to this function is: - METplus_timestr_tmpl = \"${METplus_timestr_tmpl}\"" +Unsupported METplus time codes: + METplus_time_codes = \"${METplus_time_codes}\" +METplus time-formatting string passed to this function is: + METplus_timefmt = \"${METplus_timefmt}\"" ;; esac # @@ -213,10 +369,10 @@ METplus time string template passed to this function is: # case "${METplus_time_type}" in "init") - formatted_time=$( ${DATE_UTIL} --date="${init_time_str} + ${time_shift_str}" +"${fmt}" ) + evaluated_timefmt=$( ${DATE_UTIL} --date="${init_time_str} + ${time_shift_str}" +"${fmt}" ) ;; "valid") - formatted_time=$( ${DATE_UTIL} --date="${valid_time_str} + ${time_shift_str}" +"${fmt}" ) + evaluated_timefmt=$( ${DATE_UTIL} --date="${valid_time_str} + ${time_shift_str}" +"${fmt}" ) ;; "lead") lead_secs=$(( $( ${DATE_UTIL} --date="${valid_time_str} + ${time_shift_str}" +"%s" ) \ @@ -240,23 +396,23 @@ The remainder (lead_hrs_rem) after dividing the lead_secs by SECS_PER_HOUR # # Get the lead in the proper format. # - formatted_time=$( printf "${fmt}" "${lead_hrs}" ) + evaluated_timefmt=$( printf "${fmt}" "${lead_hrs}" ) ;; *) print_err_msg_exit "\ Unsupported METplus time type: METplus_time_type = \"${METplus_time_type}\" -METplus time string template passed to this function is: - METplus_timestr_tmpl = \"${METplus_timestr_tmpl}\"" +METplus time-formatting string passed to this function is: + METplus_timefmt = \"${METplus_timefmt}\"" ;; esac - if [ -z "${formatted_time}" ]; then + if [ -z "${evaluated_timefmt}" ]; then print_err_msg_exit "\ -The specified METplus time string template (METplus_timestr_tmpl) could -not be evaluated for the given initial time (init_time) and forecast -hour (fhr): - METplus_timestr_tmpl = \"${METplus_timestr_tmpl}\" +The specified METplus time-formatting string (METplus_timefmt) could not +be evaluated for the given initial time (init_time) and forecast hour +(fhr): + METplus_timefmt = \"${METplus_timefmt}\" init_time = \"${init_time}\" fhr = \"${fhr}\"" fi @@ -267,8 +423,8 @@ hour (fhr): # #----------------------------------------------------------------------- # - if [ ! -z "${outvarname_formatted_time}" ]; then - printf -v ${outvarname_formatted_time} "%s" "${formatted_time}" + if [ ! -z "${outvarname_evaluated_timefmt}" ]; then + printf -v ${outvarname_evaluated_timefmt} "%s" "${evaluated_timefmt}" fi # #----------------------------------------------------------------------- From ef1cbbb6f97de5ea03709ff89a895f1908aa8325 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 16 Sep 2024 14:12:31 -0600 Subject: [PATCH 087/260] Change variable names and remove unneeded variable. --- ush/get_obs_ccpa.sh | 47 ++++++++++++++++++++++----------------------- ush/get_obs_mrms.sh | 26 ++++++++++++------------- ush/get_obs_ndas.sh | 37 +++++++++++++++++------------------ 3 files changed, 54 insertions(+), 56 deletions(-) diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index 79ce882da3..d3ad4c49ce 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -173,31 +173,31 @@ yyyymmdd_task=${PDY} # processing by this script is complete. basedir_proc=${OBS_DIR} -# The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a -# scalar string containing all relevant forecast output times (each in +# The environment variable FCST_OUTPUT_TIMES_ALL set in the ROCOTO XML is +# a scalar string containing all relevant forecast output times (each in # the form YYYYMMDDHH) separated by spaces. It isn't an array of strings # because in ROCOTO, there doesn't seem to be a way to pass a bash array # from the XML to the task's script. To have an array-valued variable to -# work with, here, we create the new variable output_times_all that is -# the array-valued counterpart of OUTPUT_TIMES_ALL. -output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) +# work with, here, we create the new variable fcst_output_times_all that +# is the array-valued counterpart of FCST_OUTPUT_TIMES_ALL. +fcst_output_times_all=($(printf "%s" "${FCST_OUTPUT_TIMES_ALL}")) # List of times (each of the form YYYYMMDDHH) for which there is forecast # APCP (accumulated precipitation) output for the current day. We start # constructing this by extracting from the full list of all forecast APCP # output times (i.e. from all cycles) all elements that contain the current # task's day (in the form YYYYMMDD). -output_times_crnt_day=() -if [[ ${output_times_all[@]} =~ ${yyyymmdd_task} ]]; then - output_times_crnt_day=( $(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}") ) +fcst_output_times_crnt_day=() +if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd_task} ]]; then + fcst_output_times_crnt_day=( $(printf "%s\n" "${fcst_output_times_all[@]}" | grep "^${yyyymmdd_task}") ) fi # If the 0th hour of the current day is in this list (and if it is, it # will be the first element), remove it because for APCP, that time is # considered part of the previous day (because it represents precipitation # that occurred during the last hour of the previous day). -if [[ ${#output_times_crnt_day[@]} -gt 0 ]] && \ - [[ ${output_times_crnt_day[0]} == "${yyyymmdd_task}00" ]]; then - output_times_crnt_day=(${output_times_crnt_day[@]:1}) +if [[ ${#fcst_output_times_crnt_day[@]} -gt 0 ]] && \ + [[ ${fcst_output_times_crnt_day[0]} == "${yyyymmdd_task}00" ]]; then + fcst_output_times_crnt_day=(${fcst_output_times_crnt_day[@]:1}) fi # If the 0th hour of the next day (i.e. the day after yyyymmdd_task) is # one of the output times in the list of all APCP output times, we include @@ -205,14 +205,14 @@ fi # considered part of the current day (because it represents precipitation # that occured during the last hour of the current day). yyyymmdd00_task_p1d=$(${DATE_UTIL} --date "${yyyymmdd_task} 1 day" +%Y%m%d%H) -if [[ ${output_times_all[@]} =~ ${yyyymmdd00_task_p1d} ]]; then - output_times_crnt_day+=(${yyyymmdd00_task_p1d}) +if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd00_task_p1d} ]]; then + fcst_output_times_crnt_day+=(${yyyymmdd00_task_p1d}) fi # If there are no forecast APCP output times on the day of the current # task, exit the script. -num_output_times_crnt_day=${#output_times_crnt_day[@]} -if [[ ${num_output_times_crnt_day} -eq 0 ]]; then +num_fcst_output_times_crnt_day=${#fcst_output_times_crnt_day[@]} +if [[ ${num_fcst_output_times_crnt_day} -eq 0 ]]; then print_info_msg " None of the forecast APCP output times fall within the day (including the 0th hour of the next day) associated with the current task (yyyymmdd_task): @@ -237,14 +237,14 @@ arcv_hr_incr=6 # Initial guess for starting archive hour. This is set to the archive # hour containing obs at the first forecast output time of the day. -hh_first=$(echo ${output_times_crnt_day[0]} | cut -c9-10) +hh_first=$(echo ${fcst_output_times_crnt_day[0]} | cut -c9-10) hr_first=$((10#${hh_first})) arcv_hr_start=$(ceil ${hr_first} ${arcv_hr_incr}) arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) # Ending archive hour. This is set to the archive hour containing obs at # the last forecast output time of the day. -hh_last=$(echo ${output_times_crnt_day[-1]} | cut -c9-10) +hh_last=$(echo ${fcst_output_times_crnt_day[-1]} | cut -c9-10) hr_last=$((10#${hh_last})) if [[ ${hr_last} -eq 0 ]]; then arcv_hr_end=24 @@ -257,7 +257,7 @@ fi # starting archive hour. In the process, keep a count of the number of # obs files that already exist on disk. num_existing_files=0 -for yyyymmddhh in ${output_times_crnt_day[@]}; do +for yyyymmddhh in ${fcst_output_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) day_dir_proc="${basedir_proc}/${yyyymmdd}" @@ -283,7 +283,7 @@ done # If the number of obs files that already exist on disk is equal to the # number of files needed, then there is no need to retrieve any files. -num_needed_files=$((num_output_times_crnt_day)) +num_needed_files=$((num_fcst_output_times_crnt_day)) if [[ ${num_existing_files} -eq ${num_needed_files} ]]; then print_info_msg " All obs files needed for the current day (yyyymmdd_task) already exist @@ -361,9 +361,8 @@ arcv_hr = ${arcv_hr}" yyyymmddhh_qrtrday_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 5 hours ago" +%Y%m%d%H) yyyymmddhh_qrtrday_end=${yyyymmddhh_arcv} do_retrieve="FALSE" - nout=${#output_times_crnt_day[@]} - for (( i=0; i<${nout}; i++ )); do - output_time=${output_times_crnt_day[i]} + for (( i=0; i<${num_fcst_output_times_crnt_day}; i++ )); do + output_time=${fcst_output_times_crnt_day[i]} if [[ "${output_time}" -ge "${yyyymmddhh_qrtrday_start}" ]] && \ [[ "${output_time}" -le "${yyyymmddhh_qrtrday_end}" ]]; then do_retrieve="TRUE" @@ -416,7 +415,7 @@ arcv_hr = ${arcv_hr}" yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - if [[ ${output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + if [[ ${fcst_output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then fn_raw="ccpa.t${hh}z.${accum}h.hrap.conus.gb2" fp_raw="${qrtrday_dir_raw}/${fn_raw}" day_dir_proc="${basedir_proc}/${yyyymmdd}" @@ -445,7 +444,7 @@ archive are: yyyymmddhh_qrtrday_start = \"${yyyymmddhh_qrtrday_start}\" yyyymmddhh_qrtrday_end = \"${yyyymmddhh_qrtrday_end}\" The forecast output times for APCP are: - output_times_crnt_day = ($(printf "\"%s\" " ${output_times_crnt_day[@]}))" + fcst_output_times_crnt_day = ($(printf "\"%s\" " ${fcst_output_times_crnt_day[@]}))" fi diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index 6aac0159b3..af3b8ca942 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -85,27 +85,27 @@ yyyymmdd_task=${PDY} # all processing by this script is complete. basedir_proc=${OBS_DIR} -# The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a -# scalar string containing all relevant forecast output times (each in +# The environment variable FCST_OUTPUT_TIMES_ALL set in the ROCOTO XML is +# a scalar string containing all relevant forecast output times (each in # the form YYYYMMDDHH) separated by spaces. It isn't an array of strings # because in ROCOTO, there doesn't seem to be a way to pass a bash array # from the XML to the task's script. To have an array-valued variable to -# work with, here, we create the new variable output_times_all that is -# the array-valued counterpart of OUTPUT_TIMES_ALL. -output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) +# work with, here, we create the new variable fcst_output_times_all that +# is the array-valued counterpart of FCST_OUTPUT_TIMES_ALL. +fcst_output_times_all=($(printf "%s" "${FCST_OUTPUT_TIMES_ALL}")) # List of times (each of the form YYYYMMDDHH) for which there is forecast # output for the current day. We extract this list from the full list of # all forecast output times (i.e. from all cycles). -output_times_crnt_day=() -if [[ ${output_times_all[@]} =~ ${yyyymmdd_task} ]]; then - output_times_crnt_day=( $(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}") ) +fcst_output_times_crnt_day=() +if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd_task} ]]; then + fcst_output_times_crnt_day=( $(printf "%s\n" "${fcst_output_times_all[@]}" | grep "^${yyyymmdd_task}") ) fi # If there are no forecast output times on the day of the current task, # exit the script. -num_output_times_crnt_day=${#output_times_crnt_day[@]} -if [[ ${num_output_times_crnt_day} -eq 0 ]]; then +num_fcst_output_times_crnt_day=${#fcst_output_times_crnt_day[@]} +if [[ ${num_fcst_output_times_crnt_day} -eq 0 ]]; then print_info_msg " None of the forecast output times fall within the day associated with the current task (yyyymmdd_task): @@ -120,7 +120,7 @@ fi num_existing_files=0 num_mrms_fields=${#mrms_fields[@]} for (( i=0; i<${num_mrms_fields}; i++ )); do - for yyyymmddhh in ${output_times_crnt_day[@]}; do + for yyyymmddhh in ${fcst_output_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) day_dir_proc="${basedir_proc}/${yyyymmdd}" @@ -139,7 +139,7 @@ done # If the number of obs files that already exist on disk is equal to the # number of files needed, then there is no need to retrieve any files. -num_needed_files=$((num_output_times_crnt_day*num_mrms_fields)) +num_needed_files=$((num_fcst_output_times_crnt_day*num_mrms_fields)) if [[ ${num_existing_files} -eq $((num_needed_files)) ]]; then print_info_msg " All obs files needed for the current day (yyyymmdd_task) already exist @@ -246,7 +246,7 @@ $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." # process renaming it) to the processed location. for hr in $(seq 0 1 23); do yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_task} ${hr} hours" +%Y%m%d%H) - if [[ ${output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + if [[ ${fcst_output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then for (( i=0; i<${num_mrms_fields}; i++ )); do python ${USHdir}/mrms_pull_topofhour.py \ --valid_time ${yyyymmddhh} \ diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index 2954552412..5562647340 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -59,27 +59,27 @@ yyyymmdd_task=${PDY} # all processing by this script is complete. basedir_proc=${OBS_DIR} -# The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a -# scalar string containing all relevant forecast output times (each in +# The environment variable FCST_OUTPUT_TIMES_ALL set in the ROCOTO XML is +# a scalar string containing all relevant forecast output times (each in # the form YYYYMMDDHH) separated by spaces. It isn't an array of strings # because in ROCOTO, there doesn't seem to be a way to pass a bash array # from the XML to the task's script. To have an array-valued variable to -# work with, here, we create the new variable output_times_all that is -# the array-valued counterpart of OUTPUT_TIMES_ALL. -output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) +# work with, here, we create the new variable fcst_output_times_all that +# is the array-valued counterpart of FCST_OUTPUT_TIMES_ALL. +fcst_output_times_all=($(printf "%s" "${FCST_OUTPUT_TIMES_ALL}")) # List of times (each of the form YYYYMMDDHH) for which there is forecast # output for the current day. We extract this list from the full list of # all forecast output times (i.e. from all cycles). -output_times_crnt_day=() -if [[ ${output_times_all[@]} =~ ${yyyymmdd_task} ]]; then - output_times_crnt_day=( $(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}") ) +fcst_output_times_crnt_day=() +if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd_task} ]]; then + fcst_output_times_crnt_day=( $(printf "%s\n" "${fcst_output_times_all[@]}" | grep "^${yyyymmdd_task}") ) fi # If there are no forecast output times on the day of the current task, # exit the script. -num_output_times_crnt_day=${#output_times_crnt_day[@]} -if [[ ${num_output_times_crnt_day} -eq 0 ]]; then +num_fcst_output_times_crnt_day=${#fcst_output_times_crnt_day[@]} +if [[ ${num_fcst_output_times_crnt_day} -eq 0 ]]; then print_info_msg " None of the forecast output times fall within the day associated with the current task (yyyymmdd_task): @@ -104,13 +104,13 @@ arcv_hr_incr=6 # Initial guess for starting archive hour. This is set to the archive # hour containing obs at the first forecast output time of the day. -hh_first=$(echo ${output_times_crnt_day[0]} | cut -c9-10) +hh_first=$(echo ${fcst_output_times_crnt_day[0]} | cut -c9-10) hr_first=$((10#${hh_first})) arcv_hr_start=$(( (hr_first/arcv_hr_incr + 1)*arcv_hr_incr )) # Ending archive hour. This is set to the archive hour containing obs at # the last forecast output time of the day. -hh_last=$(echo ${output_times_crnt_day[-1]} | cut -c9-10) +hh_last=$(echo ${fcst_output_times_crnt_day[-1]} | cut -c9-10) hr_last=$((10#${hh_last})) arcv_hr_end=$(( (hr_last/arcv_hr_incr + 1)*arcv_hr_incr )) @@ -118,7 +118,7 @@ arcv_hr_end=$(( (hr_last/arcv_hr_incr + 1)*arcv_hr_incr )) # starting archive hour. In the process, keep a count of the number of # obs files that already exist on disk. num_existing_files=0 -for yyyymmddhh in ${output_times_crnt_day[@]}; do +for yyyymmddhh in ${fcst_output_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) day_dir_proc="${basedir_proc}" @@ -143,7 +143,7 @@ done # If the number of obs files that already exist on disk is equal to the # number of files needed, then there is no need to retrieve any files. -num_needed_files=$((num_output_times_crnt_day)) +num_needed_files=$((num_fcst_output_times_crnt_day)) if [[ ${num_existing_files} -eq ${num_needed_files} ]]; then print_info_msg " All obs files needed for the current day (yyyymmdd_task) already exist @@ -221,9 +221,8 @@ arcv_hr = ${arcv_hr}" yyyymmddhh_qrtrday_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 6 hours ago" +%Y%m%d%H) yyyymmddhh_qrtrday_end=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 1 hours ago" +%Y%m%d%H) do_retrieve="FALSE" - nout=${#output_times_crnt_day[@]} - for (( i=0; i<${nout}; i++ )); do - output_time=${output_times_crnt_day[i]} + for (( i=0; i<${num_fcst_output_times_crnt_day}; i++ )); do + output_time=${fcst_output_times_crnt_day[i]} if [[ "${output_time}" -ge "${yyyymmddhh_qrtrday_start}" ]] && \ [[ "${output_time}" -le "${yyyymmddhh_qrtrday_end}" ]]; then do_retrieve="TRUE" @@ -276,7 +275,7 @@ arcv_hr = ${arcv_hr}" yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - if [[ ${output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + if [[ ${fcst_output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then fn_raw="nam.t${hh_arcv}z.prepbufr.tm${hrs_ago}.nr" fp_raw="${qrtrday_dir_raw}/${fn_raw}" day_dir_proc="${basedir_proc}" @@ -296,7 +295,7 @@ are: yyyymmddhh_qrtrday_start = \"${yyyymmddhh_qrtrday_start}\" yyyymmddhh_qrtrday_end = \"${yyyymmddhh_qrtrday_end}\" The forecast output times are: - output_times_crnt_day = ($(printf "\"%s\" " ${output_times_crnt_day[@]}))" + fcst_output_times_crnt_day = ($(printf "\"%s\" " ${fcst_output_times_crnt_day[@]}))" fi From 2d751130084dce588aef9def18f2f487d1dadbc9 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 16 Sep 2024 14:13:24 -0600 Subject: [PATCH 088/260] Fix location of staged forecast input files. --- ...ulticyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml | 2 +- ...ulticyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml | 2 +- ...multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml index 8b840a8ea8..e386d84e3d 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml @@ -57,7 +57,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml index 18558e0d95..9694f9845f 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml @@ -59,7 +59,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml index 5d6929cd4a..067187a216 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml @@ -57,7 +57,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml index 7ec2264509..75d706ba1f 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml @@ -59,7 +59,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml index 29427201e7..a55c7c1e0b 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -58,7 +58,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml index 508d14c7fa..e5f8fc8d1f 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml @@ -57,7 +57,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml index c838e8581d..6445ebca53 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -59,7 +59,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/det/Gerard.Ketefian/UFS_CAM/forecasts_for_SRW_WE2E_tests' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' From 830a635c2a0c9fcadf605b7d8d52f4b447c58652 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 17 Sep 2024 12:58:42 -0600 Subject: [PATCH 089/260] Add check in setup.py to make sure that the accumulation periods of accumlated vx fields are less than or equal to the forecast length. This check is also made in the workflow yaml files, but if all the accumulation periods happen to be greater than the forecast length, then the field must be completely removed from verification (i.e. its vx tasks must be removed from the workflow), and that can only be done via this check in setup.py; it can't be done in the workflow task yaml files. --- ush/setup.py | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/ush/setup.py b/ush/setup.py index d60f8a9154..d6e9e5c2d0 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -546,6 +546,31 @@ def remove_tag(tasks, tag): # # ----------------------------------------------------------------------- # + # For vx fields that are accumulated, remove those accumulation hours + # that are longer than the forecast length. If that leaves the array + # of accumulation hours for that field empty, then remove the field + # from the list of fields to be verified. + # + # ----------------------------------------------------------------------- + # + # Get the vx fields specified in the experiment configuration. + vx_fields_config = expt_config["verification"]["VX_FIELDS"] + + fcst_len_hrs = workflow_config.get("FCST_LEN_HRS") + vx_fields_accum = ["APCP", "ASNOW"] + for field in vx_fields_accum: + if field in vx_fields_config: + accum_periods_array_name = "".join(["VX_", field, "_ACCUMS_HRS"]) + accum_periods = expt_config["verification"][accum_periods_array_name] + accum_periods = [accum for accum in accum_periods if (accum <= fcst_len_hrs)] + expt_config["verification"][accum_periods_array_name] = accum_periods + if not accum_periods: + vx_fields_config.remove(field) + + expt_config["verification"]["VX_FIELDS"] = vx_fields_config + # + # ----------------------------------------------------------------------- + # # Remove all verification [meta]tasks for which no fields are specified. # # ----------------------------------------------------------------------- @@ -562,10 +587,10 @@ def remove_tag(tasks, tag): vx_fields_all["NOHRSC"] = ["ASNOW"] vx_metatasks_all["NOHRSC"] = ["task_get_obs_nohrsc", - "metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems", - "metatask_GridStat_NOHRSC_all_accums_all_mems", - "metatask_GenEnsProd_EnsembleStat_NOHRSC", - "metatask_GridStat_NOHRSC_ensmeanprob_all_accums"] + "metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems", + "metatask_GridStat_NOHRSC_all_accums_all_mems", + "metatask_GenEnsProd_EnsembleStat_NOHRSC", + "metatask_GridStat_NOHRSC_ensmeanprob_all_accums"] vx_fields_all["MRMS"] = ["REFC", "RETOP"] vx_metatasks_all["MRMS"] = ["metatask_GridStat_MRMS_all_mems", @@ -578,9 +603,6 @@ def remove_tag(tasks, tag): "metatask_GenEnsProd_EnsembleStat_NDAS", "metatask_PointStat_NDAS_ensmeanprob"] - # Get the vx fields specified in the experiment configuration. - vx_fields_config = expt_config["verification"]["VX_FIELDS"] - # If there are no vx fields specified, remove those tasks that are necessary # for all observation types. if not vx_fields_config: @@ -602,7 +624,6 @@ def remove_tag(tasks, tag): are specified for verification.""" )) rocoto_config['tasks'].pop(metatask) - # # ----------------------------------------------------------------------- # From 407c51b9a183983001f5d33ed25a05372361fe3e Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 20 Sep 2024 08:25:06 -0600 Subject: [PATCH 090/260] Fix typo. --- parm/metplus/PcpCombine.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/metplus/PcpCombine.conf b/parm/metplus/PcpCombine.conf index 3cee69df1d..c2807e6380 100644 --- a/parm/metplus/PcpCombine.conf +++ b/parm/metplus/PcpCombine.conf @@ -98,7 +98,7 @@ FCST_PCP_COMBINE_INPUT_LEVELS = A01 # For accumulation variables (which is the only type of variable that we # run PcpCombine on), we add the accumulation period to the variable name # because this is how METplus normally sets names. This is because, -# epending on the settings in the METplus configuration file, it is +# depending on the settings in the METplus configuration file, it is # possible for a single NetCDF output file to contain output for multiple # accumulations, so even though the "level" attribute of each accumulation # variable in the output file will contain the level (e.g. "A1" or "A3"), From e3de6e1386955d36df0117e42cd376b1e829bcc6 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 20 Sep 2024 08:32:02 -0600 Subject: [PATCH 091/260] Fix typo. --- ush/get_obs_ndas.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index 5562647340..8b0c87b3eb 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -133,7 +133,7 @@ File already exists on disk: hr=$((10#${hh})) arcv_hr_start=$(( (hr/arcv_hr_incr + 1)*arcv_hr_incr )) print_info_msg " -File does not exists on disk: +File does not exist on disk: fp_proc = \"${fp_proc}\" Setting the hour (since 00) of the first archive to retrieve to: arcv_hr_start = \"${arcv_hr_start}\"" From 399f3378071418f4567fe060b84a0e34dd88f323 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Mon, 23 Sep 2024 20:27:47 +0000 Subject: [PATCH 092/260] Fix convoluted problem with logic that checks for successful retrieval from HPSS --- ush/retrieve_data.py | 42 ++++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/ush/retrieve_data.py b/ush/retrieve_data.py index 5acf9d5ce9..175f210ef5 100755 --- a/ush/retrieve_data.py +++ b/ush/retrieve_data.py @@ -52,7 +52,15 @@ def clean_up_output_dir(expected_subdir, local_archive, output_path, source_path expand_source_paths = [] logging.debug(f"Cleaning up local paths: {source_paths}") for p in source_paths: - expand_source_paths.extend(glob.glob(p.lstrip("/"))) + globbed=glob.glob(p.lstrip("/")) + if globbed: + expand_source_paths.extend(globbed) + else: + logging.warning(f"Input source path {p} did not match any extracted files!") + if unavailable.get("hpss"): + unavailable["hpss"].append(p) + else: + unavailable["hpss"] = [p] # Check to make sure the files exist on disk for file_path in expand_source_paths: @@ -575,10 +583,10 @@ def hpss_requested_files(cla, file_names, store_specs, members=-1, ens_group=-1) existing_archive = hsi_single_file(existing_archive, mode="get") # Grab only the necessary files from the archive - cmd = f'unzip -o {os.path.basename(existing_archive)} {" ".join(source_paths)}' + cmd = f'unzip -o {os.path.basename(existing_archive)} {" ".join(expected)}' else: - cmd = f'htar -xvf {existing_archive} {" ".join(source_paths)}' + cmd = f'htar -xvf {existing_archive} {" ".join(expected)}' logging.info(f"Running command \n {cmd}") @@ -598,33 +606,22 @@ def hpss_requested_files(cla, file_names, store_specs, members=-1, ens_group=-1) raise Exception("Error running archive extraction command") # Check that files exist and Remove any data transfer artifacts. - # Returns {'hpss': []}, turn that into a new dict of - # sets. - unavailable[existing_archive] = set( + unavailable = set( clean_up_output_dir( expected_subdir=archive_internal_dir, local_archive=os.path.basename(existing_archive), output_path=output_path, - source_paths=source_paths, + source_paths=list(expected), ).get("hpss", []) ) - # Once we go through all the archives, the union of all - # "unavailable" files should equal the "expected" list of - # files since clean_up_output_dir only reports on those that - # are missing from one of the files attempted. If any - # additional files are reported as unavailable, then - # something has gone wrong. - unavailable = set.union(*unavailable.values()) + # Finally, update the "expected" set, removing files we already found and retrieved. + expected = expected.intersection(unavailable) + - # Break loop if unexpected files were found or if files were found - # A successful file found does not equal the expected file list and - # returns an empty set function. - if not expected == unavailable: - return unavailable - expected - - # If this loop has completed successfully without returning early, then all files have been found - return {} + # Return a set of unavailable/not found files. In our case, this represents the remaining elements + # of the "expected" set, since we removed elements as they were found. + return expected def load_str(arg): @@ -881,6 +878,7 @@ def main(argv): ens_group=ens_group, ) + if not unavailable: # All files are found. Stop looking! # Write a variable definitions file for the data, if requested From 6b49be825aa9d21eeade0b4c7eef4c6a57e4a864 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Mon, 23 Sep 2024 20:40:05 +0000 Subject: [PATCH 093/260] Properly trap error when "--output_path" does not exist --- ush/retrieve_data.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ush/retrieve_data.py b/ush/retrieve_data.py index 175f210ef5..c0df9def56 100755 --- a/ush/retrieve_data.py +++ b/ush/retrieve_data.py @@ -1072,6 +1072,11 @@ def parse_args(argv): raise argparse.ArgumentTypeError(f"Invalid value '{store}' provided " \ f"for --data_stores; valid values are {valid_data_stores}") + # Check other requirements + if not os.path.isdir(args.output_path): + logging.critical(f"{args.output_path} does not exist or is not a directory") + raise FileNotFoundError(f"Argument `--output_path` must be an existing directory") + return args From c80159ca9f9620ac7d2d22fc6505f26f4cbc8bb5 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Mon, 2 Sep 2024 09:34:02 -0600 Subject: [PATCH 094/260] Add ability in retrieve_data.py to use forecast hour templates as well (e.g. {fyyyy}, {fhh}, {fyyyymmdd}, etc) to supplement the current templates based on cycle date --- ush/retrieve_data.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ush/retrieve_data.py b/ush/retrieve_data.py index c0df9def56..c30415b1ee 100755 --- a/ush/retrieve_data.py +++ b/ush/retrieve_data.py @@ -214,6 +214,8 @@ def fill_template(template_str, cycle_date, templates_only=False, **kwargs): mem = kwargs.get("mem", "") # ----- + # Set some variables to make the format statement more concise + f_date = cycle_date + dt.timedelta(hours=fcst_hr) cycle_hour = cycle_date.strftime("%H") # One strategy for binning data files at NCEP is to put them into 6 @@ -233,17 +235,26 @@ def fill_template(template_str, cycle_date, templates_only=False, **kwargs): ens_group=ens_group, fcst_hr=fcst_hr, dd=cycle_date.strftime("%d"), + fdd=f_date.strftime("%d"), hh=cycle_hour, + fhh=f_date.strftime("%H"), hh_even=hh_even, jjj=cycle_date.strftime("%j"), + fjjj=f_date.strftime("%j"), mem=mem, min=cycle_date.strftime("%M"), mm=cycle_date.strftime("%m"), + fmm=cycle_date.strftime("%m"), yy=cycle_date.strftime("%y"), + fyy=f_date.strftime("%y"), yyyy=cycle_date.strftime("%Y"), + fyyyy=f_date.strftime("%Y"), yyyymm=cycle_date.strftime("%Y%m"), + fyyyymm=f_date.strftime("%Y%m"), yyyymmdd=cycle_date.strftime("%Y%m%d"), + fyyyymmdd=f_date.strftime("%Y%m%d"), yyyymmddhh=cycle_date.strftime("%Y%m%d%H"), + fyyyymmddhh=f_date.strftime("%Y%m%d%H"), ) if templates_only: From d1a88b2a034c5578e720373ff75f02cf2606cb60 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Tue, 24 Sep 2024 15:31:21 +0000 Subject: [PATCH 095/260] Revert "Fix convoluted problem with logic that checks for successful retrieval from HPSS" This reverts commit 399f3378071418f4567fe060b84a0e34dd88f323. --- ush/retrieve_data.py | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/ush/retrieve_data.py b/ush/retrieve_data.py index c30415b1ee..f557a465a6 100755 --- a/ush/retrieve_data.py +++ b/ush/retrieve_data.py @@ -52,15 +52,7 @@ def clean_up_output_dir(expected_subdir, local_archive, output_path, source_path expand_source_paths = [] logging.debug(f"Cleaning up local paths: {source_paths}") for p in source_paths: - globbed=glob.glob(p.lstrip("/")) - if globbed: - expand_source_paths.extend(globbed) - else: - logging.warning(f"Input source path {p} did not match any extracted files!") - if unavailable.get("hpss"): - unavailable["hpss"].append(p) - else: - unavailable["hpss"] = [p] + expand_source_paths.extend(glob.glob(p.lstrip("/"))) # Check to make sure the files exist on disk for file_path in expand_source_paths: @@ -594,10 +586,10 @@ def hpss_requested_files(cla, file_names, store_specs, members=-1, ens_group=-1) existing_archive = hsi_single_file(existing_archive, mode="get") # Grab only the necessary files from the archive - cmd = f'unzip -o {os.path.basename(existing_archive)} {" ".join(expected)}' + cmd = f'unzip -o {os.path.basename(existing_archive)} {" ".join(source_paths)}' else: - cmd = f'htar -xvf {existing_archive} {" ".join(expected)}' + cmd = f'htar -xvf {existing_archive} {" ".join(source_paths)}' logging.info(f"Running command \n {cmd}") @@ -617,22 +609,33 @@ def hpss_requested_files(cla, file_names, store_specs, members=-1, ens_group=-1) raise Exception("Error running archive extraction command") # Check that files exist and Remove any data transfer artifacts. - unavailable = set( + # Returns {'hpss': []}, turn that into a new dict of + # sets. + unavailable[existing_archive] = set( clean_up_output_dir( expected_subdir=archive_internal_dir, local_archive=os.path.basename(existing_archive), output_path=output_path, - source_paths=list(expected), + source_paths=source_paths, ).get("hpss", []) ) - # Finally, update the "expected" set, removing files we already found and retrieved. - expected = expected.intersection(unavailable) - + # Once we go through all the archives, the union of all + # "unavailable" files should equal the "expected" list of + # files since clean_up_output_dir only reports on those that + # are missing from one of the files attempted. If any + # additional files are reported as unavailable, then + # something has gone wrong. + unavailable = set.union(*unavailable.values()) - # Return a set of unavailable/not found files. In our case, this represents the remaining elements - # of the "expected" set, since we removed elements as they were found. - return expected + # Break loop if unexpected files were found or if files were found + # A successful file found does not equal the expected file list and + # returns an empty set function. + if not expected == unavailable: + return unavailable - expected + + # If this loop has completed successfully without returning early, then all files have been found + return {} def load_str(arg): @@ -889,7 +892,6 @@ def main(argv): ens_group=ens_group, ) - if not unavailable: # All files are found. Stop looking! # Write a variable definitions file for the data, if requested From 670ea0fcc7838765cc68426435b0c8b74de31d15 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Tue, 24 Sep 2024 23:24:35 +0000 Subject: [PATCH 096/260] Add older HPSS file name for NDAS, add AWS locations for AERONET and AIRNOW --- parm/data_locations.yml | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/parm/data_locations.yml b/parm/data_locations.yml index 211d187dac..6c6a220f88 100644 --- a/parm/data_locations.yml +++ b/parm/data_locations.yml @@ -351,11 +351,13 @@ NDAS_obs: - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} + - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} archive_file_names: - "com2_nam_prod_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" - "gpfs_dell1_nco_ops_com_nam_prod_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" - "com_nam_prod_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" - "com_obsproc_v1.1_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" + - "com_obsproc_v1.0_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" file_names: obs: - "./nam.t{hh}z.prepbufr.tm*.nr" @@ -391,6 +393,12 @@ AERONET: - "{yyyy}{mm}{dd}.lev15" archive_internal_dir: - ./validation_data/aq/aeronet/ + aws: + protocol: download + url: "https://aeronet.gsfc.nasa.gov/cgi-bin/" + file_names: + obs: + - "print_web_data_v3?year={yyyy}&month={mm}&day={dd}&AOD15=1&AVG=10" AIRNOW: hpss: @@ -400,12 +408,19 @@ AIRNOW: - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} archive_file_names: - - "dcom_prod_{yyyy}{mm}{dd}.tar" - - "dcom_{yyyy}{mm}{dd}.tar" + - "dcom_prod_{yyyymmdd}.tar" + - "dcom_{yyyymmdd}.tar" file_names: obs: - - "HourlyAQObs_{yyyy}{mm}{dd}*.dat" + - "HourlyAQObs_{yyyymmdd}*.dat" - "Monitoring_Site_Locations_V2.dat" archive_internal_dir: - ./airnow/ + aws: + protocol: download + url: "https://s3-us-west-1.amazonaws.com//files.airnowtech.org/airnow/{fyyyy}/{fyyyymmdd}/" + file_names: + obs: + - "HourlyData_{fyyyymmdd}{fhh}.dat" + - "Monitoring_Site_Locations_V2.dat" From 34ead8d140aa22fef8a754dc3d047ff649ce2f46 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Tue, 24 Sep 2024 23:26:35 +0000 Subject: [PATCH 097/260] Updates for new online downloads of AERONET and AIRNOW data - New config settings AIRNOW_INPUT_FORMAT and AIRNOW_DATA_STORES - Get Airnow from AWS by default, and set AIRNOW_INPUT_FORMAT appropriately (this controls how METplus reads the file) as well as default filename - Automatically rename weird HTTP AERONET files - Do not remove "raw" directories for observations, since this messes up running multiple cycles. Temporary fix while I wait for Gerard's bugfix bonanza. --- scripts/exregional_get_verif_obs.sh | 17 ++++++++++------- scripts/exregional_run_met_ascii2nc_obs.sh | 15 +++++++++++++-- ush/config_defaults.yaml | 20 ++++++++++++++++++-- 3 files changed, 41 insertions(+), 11 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 2c53dacbf2..e69965d917 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -31,7 +31,7 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user nco ; do +for sect in user nco verification; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done # @@ -203,6 +203,7 @@ while [[ ${current_fcst} -le ${fcst_length} ]]; do vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" +%Y%m%d%H) unix_vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" "+%Y-%m-%d %H:00:00") vyyyymmdd=$(echo ${vdate} | cut -c1-8) + vyyyy=$(echo ${vdate} | cut -c1-4) vhh=$(echo ${vdate} | cut -c9-10) # Calculate valid date + 1 day; this is needed because some obs files @@ -622,7 +623,7 @@ while [[ ${current_fcst} -le ${fcst_length} ]]; do --file_set obs \ --config ${PARMdir}/data_locations.yml \ --cycle_date ${vyyyymmdd}${vhh} \ - --data_stores hpss \ + --data_stores aws hpss \ --data_type AERONET \ --output_path $aeronet_proc/${vyyyymmdd} \ --summary_file ${logfile}" @@ -635,6 +636,10 @@ while [[ ${current_fcst} -le ${fcst_length} ]]; do The following command exited with a non-zero exit status: ${cmd} " + # AERONET pulled from http gets weird filenames, rename to standard name + if [[ -f $aeronet_proc/${vyyyymmdd}/print_web_data_v3?year=${vyyyy} ]]; then + mv $aeronet_proc/${vyyyymmdd}/print_web_data_v3?year=${vyyyy} $aeronet_file + fi fi @@ -665,7 +670,7 @@ while [[ ${current_fcst} -le ${fcst_length} ]]; do --file_set obs \ --config ${PARMdir}/data_locations.yml \ --cycle_date ${vyyyymmdd}${vhh} \ - --data_stores hpss \ + --data_stores ${AIRNOW_DATA_STORES} \ --data_type AIRNOW \ --output_path $airnow_proc/${vyyyymmdd} \ --summary_file ${logfile}" @@ -673,14 +678,12 @@ while [[ ${current_fcst} -le ${fcst_length} ]]; do echo "CALLING: ${cmd}" $cmd || print_err_msg_exit "\ - Could not retrieve AIRNOW data from HPSS + Could not retrieve AIRNOW data from AWS or HPSS The following command exited with a non-zero exit status: ${cmd} " - - fi else @@ -696,7 +699,7 @@ done # Clean up raw, unprocessed observation files -rm -rf ${OBS_DIR}/raw +#rm -rf ${OBS_DIR}/raw # #----------------------------------------------------------------------- diff --git a/scripts/exregional_run_met_ascii2nc_obs.sh b/scripts/exregional_run_met_ascii2nc_obs.sh index a0ea10f33d..d139b84c50 100755 --- a/scripts/exregional_run_met_ascii2nc_obs.sh +++ b/scripts/exregional_run_met_ascii2nc_obs.sh @@ -21,7 +21,6 @@ done #----------------------------------------------------------------------- # . $USHdir/get_metplus_tool_name.sh -. $USHdir/set_vx_params.sh # #----------------------------------------------------------------------- # @@ -92,7 +91,19 @@ if [ "${OBTYPE}" = "AERONET" ]; then elif [ "${OBTYPE}" = "AIRNOW" ]; then OBS_INPUT_FN_TEMPLATE=${OBS_AIRNOW_FN_TEMPLATE} OUTPUT_FN_TEMPLATE=${OBS_AIRNOW_FN_TEMPLATE_ASCII2NC_OUTPUT} - ASCII2NC_INPUT_FORMAT=airnowhourlyaqobs + if [ -z "${AIRNOW_INPUT_FORMAT}" ]; then + if [[ "${OBS_AIRNOW_FN_TEMPLATE}" == *"HourlyData"* ]]; then + ASCII2NC_INPUT_FORMAT=airnowhourly + elif [[ "${OBS_AIRNOW_FN_TEMPLATE}" == *"HourlyAQObs"* ]]; then + ASCII2NC_INPUT_FORMAT=airnowhourlyaqobs + else + print_err_msg_exit "Could not automatically determine format of Airnow observations;\ +check your filenames (OBS_AIRNOW_FN_TEMPLATE=${OBS_AIRNOW_FN_TEMPLATE}) +or manually set variable AIRNOW_INPUT_FORMAT" + fi + else + ASCII2NC_INPUT_FORMAT=${AIRNOW_INPUT_FORMAT} + fi else print_err_msg_exit "\nNo filename template set for OBTYPE \"${OBTYPE}\"!" fi diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 9bbd36e8f4..9fa3966baa 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2416,7 +2416,9 @@ verification: # File name template for AERONET observation files. # # OBS_AIRNOW_FN_TEMPLATE: - # File name template for AERONET observation files. + # File name template for AERONET observation files. NOTE: for files retrieved from HPSS (see + # AIRNOW_DATA_STORES variable), the default value should be replaced with + # '{valid?fmt=%Y%m%d}/HourlyAQObs_{valid?fmt=%Y%m%d%H}.dat' # OBS_CCPA_APCP_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2' OBS_NOHRSC_ASNOW_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/sfav2_CONUS_${ACCUM_HH}h_{valid?fmt=%Y%m%d%H}_grid184.grb2' @@ -2424,7 +2426,7 @@ verification: OBS_MRMS_RETOP_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE: 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' OBS_AERONET_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/{valid?fmt=%Y%m%d}.lev15' - OBS_AIRNOW_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/HourlyAQObs_{valid?fmt=%Y%m%d%H}.dat' + OBS_AIRNOW_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/HourlyData_{valid?fmt=%Y%m%d%H}.dat' # # OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: # Template used to specify the names of the output NetCDF observation @@ -2449,6 +2451,20 @@ verification: # OBS_AERONET_FN_TEMPLATE_ASCII2NC_OUTPUT: 'hourly_aeronet_obs_{valid?fmt=%Y%m%d%H}.nc' OBS_AIRNOW_FN_TEMPLATE_ASCII2NC_OUTPUT: 'hourly_airnow_obs_{valid?fmt=%Y%m%d%H}.nc' + # + # AIRNOW_INPUT_FORMAT: + # Observation format for ASCII Airnow observations. Valid options can be found in METplus + # users guide: https://met.readthedocs.io/en/latest/Users_Guide/reformat_point.html#ascii2nc-tool + # If not specified or set to a blank string, will attempt to determine its value based on the + # value of OBS_AIRNOW_FN_TEMPLATE + # + # AIRNOW_DATA_STORES: + # Location(s) to retrieve AIRNOW data from. Valid values are "aws" and/or "hpss", see + # parm/data_locations.yaml for info on these data stores. + # + AIRNOW_INPUT_FORMAT: "" + AIRNOW_DATA_STORES: aws + # # VX_FCST_MODEL_NAME: # String that specifies a descriptive name for the model being verified. From 1556c209ec488b79aa77268e2c450469a25c12ba Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Wed, 25 Sep 2024 02:22:17 +0000 Subject: [PATCH 098/260] Fix task dependencies for Airnow and Aeronet PointStat tasks --- parm/wflow/verify_det.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index 305632dbc0..d5259d471a 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -121,7 +121,7 @@ metatask_PointStat: METAVAR: '{% for var in verification.VX_FIELDS %}{% if var in ["ADPSFC", "ADPUPA", "AOD", "PM25", "PM10"] %}{{ "%s " % var }}{% endif %}{% endfor %}' METAOBTYPE: '{% for var in verification.VX_FIELDS %}{% if var =="ADPSFC" or var =="ADPUPA" %}NDAS {% elif var =="AOD" %}AERONET {% elif var =="PM25" or var =="PM10" %}AIRNOW {% endif %}{% endfor %}' METAOBS_DIR: '{% for var in verification.VX_FIELDS %}{% if var =="ADPSFC" or var =="ADPUPA" %}&NDAS_OBS_DIR; {% elif var =="AOD" %}&AERONET_OBS_DIR; {% elif var =="PM25" or var =="PM10" %}&AIRNOW_OBS_DIR; {% endif %}{% endfor %}' - TASKDEP: '{% for var in verification.VX_FIELDS %}{% if var =="ADPSFC" or var =="ADPUPA" %}run_MET_Pb2nc_obs {% elif var =="AOD" %}run_MET_ASCII2nc_obs_AOD {% elif var =="PM25" or var =="PM10" %}run_MET_ASCII2nc_obs_{{"%s" % var}} {% endif %}{% endfor %}' + TASKDEP: '{% for var in verification.VX_FIELDS %}{% if var =="ADPSFC" or var =="ADPUPA" %}run_MET_Pb2nc_obs {% elif var =="AOD" %}run_MET_ASCII2nc_obs_AERONET {% elif var =="PM25" or var =="PM10" %}run_MET_ASCII2nc_obs_AIRNOW {% endif %}{% endfor %}' metatask_PointStat_#METAVAR#_all_mems: var: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' From fc7e257ebc88750bc0d0a1d20c979506f49fc179 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 07:22:44 -0600 Subject: [PATCH 099/260] Changes to allow NOHRSC obs and ASNOW to be accumulated by PcpCombine and verified in GridStat. --- parm/data_locations.yml | 2 +- parm/metplus/GridStat_or_PointStat.conf | 11 ++------ parm/metplus/PcpCombine.conf | 34 ++++++++++++++++--------- parm/metplus/vx_config_det.yaml | 1 + parm/metplus/vx_config_ens.yaml | 1 + 5 files changed, 27 insertions(+), 22 deletions(-) diff --git a/parm/data_locations.yml b/parm/data_locations.yml index a3712a1972..5a30e48774 100644 --- a/parm/data_locations.yml +++ b/parm/data_locations.yml @@ -371,6 +371,6 @@ NOHRSC_obs: - "dcom_{yyyy}{mm}{dd}.tar" file_names: obs: - - "sfav2_CONUS_*h_{yyyy}{mm}{dd}{hh}_grid184.grb2" + - "sfav2_CONUS_6h_{yyyy}{mm}{dd}*_grid184.grb2" archive_internal_dir: - ./wgrbbul/nohrsc_snowfall/ diff --git a/parm/metplus/GridStat_or_PointStat.conf b/parm/metplus/GridStat_or_PointStat.conf index 39d34eb24f..7bd0039ab5 100644 --- a/parm/metplus/GridStat_or_PointStat.conf +++ b/parm/metplus/GridStat_or_PointStat.conf @@ -93,7 +93,7 @@ METPLUS_CONF = {{ '{' ~ METPLUS_TOOL_NAME ~ '_OUTPUT_DIR}' }}/metplus_final.{{me {%- if (METPLUS_TOOL_NAME == 'GRID_STAT') %} - {%- if (input_field_group == 'APCP') %} + {%- if input_field_group in ['APCP', 'ASNOW'] %} #{{METPLUS_TOOL_NAME}}_INTERP_FIELD = BOTH #{{METPLUS_TOOL_NAME}}_INTERP_VLD_THRESH = 1.0 @@ -539,15 +539,8 @@ PcpCombine tool. In that file, the field name consists of the observation field name here (field_obs) with the accumulation period appended to it (separated by an underscore), so we must do the same here to get an exact match. - -Note: -Turns out for ASNOW, PcpCombine is not run for obs, so we exclude that -from the "if" clause here (so it goes into the "else"). For workflow -behavior uniformity between APCP and ASNOW, consider running PcpCombine -for ASNOW observations as well (just as it's run for APCP observations). - {%- if (input_field_group in ['APCP', 'ASNOW']) %} #} - {%- if (input_field_group in ['APCP']) %} + {%- if (input_field_group in ['APCP', 'ASNOW']) %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}}_{{accum_hh}} {%- else %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}} diff --git a/parm/metplus/PcpCombine.conf b/parm/metplus/PcpCombine.conf index c2807e6380..5bdd09c761 100644 --- a/parm/metplus/PcpCombine.conf +++ b/parm/metplus/PcpCombine.conf @@ -79,17 +79,26 @@ FCST_PCP_COMBINE_RUN = False # {{FCST_OR_OBS}}_PCP_COMBINE_METHOD = ADD -{%- if (FCST_OR_OBS == 'FCST') and (input_field_group == 'ASNOW') %} +{%- if (FCST_OR_OBS == 'FCST') %} + {%- if (input_field_group == 'ASNOW') %} # # Specify name of variable for Snowfall Accumulation. -# NOTE: Currently TSNOWP is used which is a constant-density estimate of snowfall accumulation. -# In future RRFS development, a GSL product with variable-density snowfall accumulation -# is planned for UPP. When that is included and turned on in post, this variable may be changed -# to ASNOW. # -FCST_PCP_COMBINE_INPUT_NAMES = TSNOWP - -FCST_PCP_COMBINE_INPUT_LEVELS = A01 +# NOTE: +# For forecasts, currently TSNOWP is used which is a constant-density +# estimate of snowfall accumulation. In future RRFS development, a GSL +# product with variable-density snowfall accumulation is planned for UPP. +# When that is included and turned on in post, this variable may be +# changed to ASNOW. +# +{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_NAMES = TSNOWP +{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_LEVELS = A{{input_accum_hh}} + {%- endif %} +{%- elif (FCST_OR_OBS == 'OBS') %} + {%- if (input_field_group == 'ASNOW') %} +{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_NAMES = ASNOW +{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_LEVELS = A{{input_accum_hh}} + {%- endif %} {%- endif %} # # Specify how to name the array in the NetCDF file that PcpCombine @@ -110,18 +119,19 @@ FCST_PCP_COMBINE_INPUT_LEVELS = A01 # the output NetCDF file). # {%- if (input_field_group in ['APCP', 'ASNOW']) %} -{{FCST_OR_OBS}}_PCP_COMBINE_OUTPUT_NAME = {{fieldname_in_met_output}}_{{accum_hh}} +{{FCST_OR_OBS}}_PCP_COMBINE_OUTPUT_NAME = {{fieldname_in_met_output}}_{{output_accum_hh}} {%- else %} {{FCST_OR_OBS}}_PCP_COMBINE_OUTPUT_NAME = {{fieldname_in_met_output}} {%- endif %} # # Accumulation interval available in the input data. # -{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_ACCUMS = 01 +#{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_ACCUMS = 01 +{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_ACCUMS = {{input_accum_hh}} # # Accumulation interval to generate in the output file. # -{{FCST_OR_OBS}}_PCP_COMBINE_OUTPUT_ACCUM = {{accum_hh}} +{{FCST_OR_OBS}}_PCP_COMBINE_OUTPUT_ACCUM = {{output_accum_hh}} # # If the output NetCDF file already exists, specify whether or not to # skip the call to PcpCombine. @@ -177,7 +187,7 @@ FCST_PCP_COMBINE_CONSTANT_INIT = True # # Name to identify observation data in output. # -OBTYPE = CCPA +OBTYPE = {{obtype}} {%- endif %} # # Specify file type of input data. diff --git a/parm/metplus/vx_config_det.yaml b/parm/metplus/vx_config_det.yaml index 8ea3fd5e13..c20e192dcb 100644 --- a/parm/metplus/vx_config_det.yaml +++ b/parm/metplus/vx_config_det.yaml @@ -50,6 +50,7 @@ APCP: ASNOW: ASNOW: A6: ['gt0.0', 'ge2.54', 'ge5.08', 'ge10.16', 'ge20.32'] + A24: ['gt0.0', 'ge2.54', 'ge10.16', 'ge20.32', 'ge30.48'] REFC: REFC%%MergedReflectivityQCComposite: L0%%Z500: ['ge20', 'ge30', 'ge40', 'ge50'] diff --git a/parm/metplus/vx_config_ens.yaml b/parm/metplus/vx_config_ens.yaml index 5f55254a4c..2608490565 100644 --- a/parm/metplus/vx_config_ens.yaml +++ b/parm/metplus/vx_config_ens.yaml @@ -14,6 +14,7 @@ APCP: ASNOW: ASNOW: A6: ['gt0.0', 'ge2.54', 'ge5.08', 'ge10.16', 'ge20.32'] + A24: ['gt0.0', 'ge2.54', 'ge10.16', 'ge20.32', 'ge30.48'] REFC: REFC%%MergedReflectivityQCComposite: L0%%Z500: ['ge20', 'ge30', 'ge40', 'ge50'] From 75325447c87aa12ccb95f8bd35ccab220e78add6 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 07:28:34 -0600 Subject: [PATCH 100/260] Changes to get_obs/vx WE2E tests to get them working with staged forecast output at new location. --- ...7hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml | 21 ++++++++----------- ...1hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml | 21 ++++++++----------- ...24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml | 21 ++++++++----------- ...24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml | 21 ++++++++----------- ...24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 21 ++++++++----------- ...24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml | 21 ++++++++----------- ...96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 21 ++++++++----------- 7 files changed, 63 insertions(+), 84 deletions(-) diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml index e386d84e3d..41428a7939 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml @@ -15,16 +15,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - -workflow: - PREEXISTING_DIR_METHOD: rename - PREDEF_GRID_NAME: RRFS_CONUS_25km - INCR_CYCL_FREQ: 7 - DATE_FIRST_CYCL: '2024042902' - DATE_LAST_CYCL: '2024043006' - FCST_LEN_HRS: 9 - -platform: # Base directories in which to look for obs. If these do not exist and/or # do not contain the requied obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to @@ -44,6 +34,14 @@ platform: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 7 + DATE_FIRST_CYCL: '2024042902' + DATE_LAST_CYCL: '2024043006' + FCST_LEN_HRS: 9 + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' @@ -57,7 +55,6 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' - FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml index 9694f9845f..2fae0d6388 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml @@ -17,16 +17,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - -workflow: - PREEXISTING_DIR_METHOD: rename - PREDEF_GRID_NAME: RRFS_CONUS_25km - INCR_CYCL_FREQ: 11 - DATE_FIRST_CYCL: '2024042902' - DATE_LAST_CYCL: '2024043022' - FCST_LEN_HRS: 3 - -platform: # Base directories in which to look for obs. If these do not exist and/or # do not contain the requied obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to @@ -46,6 +36,14 @@ platform: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 11 + DATE_FIRST_CYCL: '2024042902' + DATE_LAST_CYCL: '2024043022' + FCST_LEN_HRS: 3 + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' @@ -59,7 +57,6 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' - FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml index 067187a216..e150234a47 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml @@ -15,16 +15,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - -workflow: - PREEXISTING_DIR_METHOD: rename - PREDEF_GRID_NAME: RRFS_CONUS_25km - INCR_CYCL_FREQ: 24 - DATE_FIRST_CYCL: '2024042900' - DATE_LAST_CYCL: '2024043000' - FCST_LEN_HRS: 3 - -platform: # Base directories in which to look for obs. If these do not exist and/or # do not contain the requied obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to @@ -44,6 +34,14 @@ platform: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042900' + DATE_LAST_CYCL: '2024043000' + FCST_LEN_HRS: 3 + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' @@ -57,7 +55,6 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' - FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml index 75d706ba1f..2e180e2714 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml @@ -17,16 +17,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - -workflow: - PREEXISTING_DIR_METHOD: rename - PREDEF_GRID_NAME: RRFS_CONUS_25km - INCR_CYCL_FREQ: 24 - DATE_FIRST_CYCL: '2024042912' - DATE_LAST_CYCL: '2024043012' - FCST_LEN_HRS: 3 - -platform: # Base directories in which to look for obs. If these do not exist and/or # do not contain the requied obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to @@ -46,6 +36,14 @@ platform: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042912' + DATE_LAST_CYCL: '2024043012' + FCST_LEN_HRS: 3 + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' @@ -59,7 +57,6 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' - FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml index a55c7c1e0b..37c3eceb24 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -16,16 +16,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - -workflow: - PREEXISTING_DIR_METHOD: rename - PREDEF_GRID_NAME: RRFS_CONUS_25km - INCR_CYCL_FREQ: 24 - DATE_FIRST_CYCL: '2024042912' - DATE_LAST_CYCL: '2024050212' - FCST_LEN_HRS: 48 - -platform: # Base directories in which to look for obs. If these do not exist and/or # do not contain the requied obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to @@ -45,6 +35,14 @@ platform: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042912' + DATE_LAST_CYCL: '2024050212' + FCST_LEN_HRS: 48 + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' @@ -58,7 +56,6 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' - FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml index e5f8fc8d1f..d8eb349433 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml @@ -15,16 +15,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - -workflow: - PREEXISTING_DIR_METHOD: rename - PREDEF_GRID_NAME: RRFS_CONUS_25km - INCR_CYCL_FREQ: 24 - DATE_FIRST_CYCL: '2024042921' - DATE_LAST_CYCL: '2024043021' - FCST_LEN_HRS: 3 - -platform: # Base directories in which to look for obs. If these do not exist and/or # do not contain the requied obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to @@ -44,6 +34,14 @@ platform: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042921' + DATE_LAST_CYCL: '2024043021' + FCST_LEN_HRS: 3 + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' @@ -57,7 +55,6 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'hrrr_ncep' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' - FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml index 6445ebca53..514dbed8d3 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -17,16 +17,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - -workflow: - PREEXISTING_DIR_METHOD: rename - PREDEF_GRID_NAME: RRFS_CONUS_25km - INCR_CYCL_FREQ: 96 - DATE_FIRST_CYCL: '2024042912' - DATE_LAST_CYCL: '2024051112' - FCST_LEN_HRS: 48 - -platform: # Base directories in which to look for obs. If these do not exist and/or # do not contain the requied obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to @@ -46,6 +36,14 @@ platform: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 96 + DATE_FIRST_CYCL: '2024042912' + DATE_LAST_CYCL: '2024051112' + FCST_LEN_HRS: 48 + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' @@ -59,7 +57,6 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' - VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests' - FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{{VX_FCST_MODEL_NAME}}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}{% endif %}' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' From 877bf05efad49fb5c15f1c568d72d024c0408e1b Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 07:39:42 -0600 Subject: [PATCH 101/260] Add WE2E test to get NOHRSC obs and do vx on 6-hour and 24-hour snowfall accumulation. --- ...c.init_00z_fcstlen_36hr.winter_wx.SRW.yaml | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml new file mode 100644 index 0000000000..6069ce8212 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml @@ -0,0 +1,61 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, NOHRSC, MRMS, and NDAS + observations from HPSS for a single cycle with a relatively long forecast + (36 hours) cycle and then perform deterministic verification, including + first performing vx preprocessing with METplus tools such as PcpCombine + and Pb2Nc. + + The staged forecast data are from the SRW itself. + + This test uses a winter case to ensure that ASNOW is verified correctly + for both 6-hour and 24-hour accumulations. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + +workflow: + PREEXISTING_DIR_METHOD: rename + # This is required in the experiment generation step, although it shouldn't + # since a forecast is not being run. + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2023021700' + DATE_LAST_CYCL: '2023021700' + FCST_LEN_HRS: 36 + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'custom_ESGgrid_Michigan_Ontario' + +verification: + VX_FCST_MODEL_NAME: 'Michigan_Ontario_snow_8km' + VX_FIELDS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/SRW' From 4495813d591ea7e536e073f8fe23f68a2ed2562d Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 07:41:36 -0600 Subject: [PATCH 102/260] Add script to get NOHRSC obs from HPSS. --- ush/get_obs_nohrsc.sh | 475 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 475 insertions(+) create mode 100755 ush/get_obs_nohrsc.sh diff --git a/ush/get_obs_nohrsc.sh b/ush/get_obs_nohrsc.sh new file mode 100755 index 0000000000..910cf3c35a --- /dev/null +++ b/ush/get_obs_nohrsc.sh @@ -0,0 +1,475 @@ +#!/usr/bin/env bash + +# +#----------------------------------------------------------------------- +# +# Source the variable definitions file and the bash utility functions. +# +#----------------------------------------------------------------------- +# +. $USHdir/source_util_funcs.sh +for sect in user platform verification ; do + source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} +done + +set -u +#set -x +# +#----------------------------------------------------------------------- +# +# This script performs several important tasks for preparing data for +# verification tasks. Depending on the value of the environment variable +# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data +# set. +# +# If data is not available on disk (in the location specified by +# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), +# the script attempts to retrieve the data from HPSS using the retrieve_data.py +# script. Depending on the data set, there are a few strange quirks and/or +# bugs in the way data is organized; see in-line comments for details. +# +# NOHRSC snow accumulation observations +# ---------- +# If data is available on disk, it must be in the following +# directory structure and file name conventions expected by verification +# tasks: +# +# {NOHRSC_OBS_DIR}/{YYYYMMDD}/sfav2_CONUS_{AA}h_{YYYYMMDD}{HH}_grid184.grb2 +# +# where AA is the 2-digit accumulation duration in hours: 06 or 24 +# +# METplus is configured to verify snowfall using 06- and 24-h accumulated +# snowfall from 6- and 12-hourly NOHRSC files, respectively. +# +# If data is retrieved from HPSS, it will automatically staged by this +# this script. +#----------------------------------------------------------------------- +# + +# The day (in the form YYYMMDD) associated with the current task via the +# task's cycledefs attribute in the ROCOTO xml. +yyyymmdd_task=${PDY} + +# Base directory in which the daily subdirectories containing the grib2 +# obs files will appear after this script is done. We refer to this as +# the "processed" base directory because it contains the files after all +# processing by this script is complete. +basedir_proc=${OBS_DIR} +# +#----------------------------------------------------------------------- +# +# Generate a list of forecast output times for the current day. Note +# that if the 0th hour of the next day (i.e. the day after the one +# associated with this task) is one of the forecast output times, we +# include it in the list for the current day because the accumulation +# associated with that hour occurred during the current day. +# +#----------------------------------------------------------------------- +# + +# The environment variable FCST_OUTPUT_TIMES_ALL set in the ROCOTO XML is +# a scalar string containing all relevant forecast output times (each in +# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings +# because in ROCOTO, there doesn't seem to be a way to pass a bash array +# from the XML to the task's script. To have an array-valued variable to +# work with, here, we create the new variable fcst_output_times_all that +# is the array-valued counterpart of FCST_OUTPUT_TIMES_ALL. +fcst_output_times_all=($(printf "%s" "${FCST_OUTPUT_TIMES_ALL}")) + +# List of times (each of the form YYYYMMDDHH) for which there is forecast +# ASNOW (accumulated snow) output for the current day. We start constructing +# this by extracting from the full list of all forecast ASNOW output times +# (i.e. from all cycles) all elements that contain the current task's day +# (in the form YYYYMMDD). +fcst_output_times_crnt_day=() +if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd_task} ]]; then + fcst_output_times_crnt_day=( $(printf "%s\n" "${fcst_output_times_all[@]}" | grep "^${yyyymmdd_task}") ) +fi +# If the 0th hour of the current day is in this list (and if it is, it +# will be the first element), remove it because for ASNOW, that time is +# considered part of the previous day (because it represents snowfall +# that occurred during the last hour of the previous day). +if [[ ${#fcst_output_times_crnt_day[@]} -gt 0 ]] && \ + [[ ${fcst_output_times_crnt_day[0]} == "${yyyymmdd_task}00" ]]; then + fcst_output_times_crnt_day=(${fcst_output_times_crnt_day[@]:1}) +fi +# If the 0th hour of the next day (i.e. the day after yyyymmdd_task) is +# one of the output times in the list of all ASNOW output times, we +# include it in the list for the current day because for ASNOW, that time +# is considered part of the current day (because it represents snowfall +# that occured during the last hour of the current day). +yyyymmdd00_task_p1d=$(${DATE_UTIL} --date "${yyyymmdd_task} 1 day" +%Y%m%d%H) +if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd00_task_p1d} ]]; then + fcst_output_times_crnt_day+=(${yyyymmdd00_task_p1d}) +fi + +# If there are no forecast ASNOW output times on the day of the current +# task, exit the script. +num_fcst_output_times_crnt_day=${#fcst_output_times_crnt_day[@]} +if [[ ${num_fcst_output_times_crnt_day} -eq 0 ]]; then + print_info_msg " +None of the forecast ASNOW output times fall within the day (including the +0th hour of the next day) associated with the current task (yyyymmdd_task): + yyyymmdd_task = \"${yyyymmdd_task}\" +Thus, there is no need to retrieve any obs files." + exit +fi +# +#----------------------------------------------------------------------- +# +# Generate a list of all the times at which obs are available for the +# current day, possibly including hour 00 of the next day. +# +#----------------------------------------------------------------------- +# + +# The time interval (in hours) at which the obs are available on HPSS +# must be evenly divisible into 24. Otherwise, different days would +# have obs available at different hours. Make sure this is the case. +remainder=$(( 24 % NOHRSC_OBS_AVAIL_INTVL_HRS )) +if [ ${remainder} -ne 0 ]; then + print_err_msg_exit "\ +The obs availability interval NOHRSC_OBS_AVAIL_INTVL_HRS must divide evenly +into 24 but doesn't: + NOHRSC_OBS_AVAIL_INTVL_HRS = ${NOHRSC_OBS_AVAIL_INTVL_HRS} + mod(24, NOHRSC_OBS_AVAIL_INTVL_HRS) = ${remainder}" +fi + +# Construct the array of times during the current day (and possibly +# during hour 00 of the next day) at which obs are available on HPSS. +# Each element of this array is of the form "YYYYMMDDHH". +num_obs_avail_times=$((24/NOHRSC_OBS_AVAIL_INTVL_HRS)) +obs_avail_times_crnt_day=() +# Note: Start at i=1 because the output for hour 00 of the current day is +# considered part of the previous day (because it represents accumulation +# that occurred during the previous day). +for (( i=1; i<$((num_obs_avail_times+1)); i++ )); do + hrs=$((i*NOHRSC_OBS_AVAIL_INTVL_HRS)) + obs_avail_times_crnt_day+=( $(${DATE_UTIL} --date "${yyyymmdd_task} ${hrs} hours" +%Y%m%d%H) ) +done +# +#----------------------------------------------------------------------- +# +# Generate a list of all the times at which to retrieve obs. This is +# obtained from the intersection of the list of times at which there is +# forecast output and the list of times at which there are obs available. +# Note that if the forecast output is more frequent than the data is +# available, then the forecast values must be accumulated together to +# get values at the times at which the obs are available. This is done +# in another workflow task using the METplus tool PcpCombine. +# +#----------------------------------------------------------------------- +# +obs_retrieve_times_crnt_day=() +for yyyymmddhh in ${fcst_output_times_crnt_day[@]}; do + if [[ ${obs_avail_times_crnt_day[@]} =~ ${yyyymmddhh} ]] ; then + obs_retrieve_times_crnt_day+=(${yyyymmddhh}) + fi +done +# +#----------------------------------------------------------------------- +# +# +# +#----------------------------------------------------------------------- +# +array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" +eval obs_retrieve_times=\( \${${array_name}[@]} \) +echo +echo "QQQQQQQQQQQQQQQQQQQ" +#echo "obs_retrieve_times = |${obs_retrieve_times[@]}|" +echo "obs_retrieve_times =" +echo "|${obs_retrieve_times[@]}|" + +# For testing. +#obs_retrieve_times+=('abcd') +#obs_retrieve_times[4]='abcd' + +err_msg=" +The two methods of obtaining the array of obs retrieve times don't match: + obs_retrieve_times_crnt_day = + (${obs_retrieve_times_crnt_day[@]}) + obs_retrieve_times = + (${obs_retrieve_times[@]})" + +n1=${#obs_retrieve_times_crnt_day[@]} +n2=${#obs_retrieve_times[@]} +if [ ${n1} -ne ${n2} ]; then + print_err_msg_exit "${err_msg}" +fi + +for (( i=0; i<${n1}; i++ )); do + elem1=${obs_retrieve_times_crnt_day[$i]} + elem2=${obs_retrieve_times[$i]} + if [ ${elem1} != ${elem2} ]; then + print_err_msg_exit "${err_msg}" + fi +done + +obs_retrieve_times_crnt_day=($( printf "%s " "${obs_retrieve_times[@]}" )) + +echo +echo "RRRRRRRRRRRRRRRRR" +#echo "obs_retrieve_times_crnt_day = |${obs_retrieve_times_crnt_day[@]}|" +echo "obs_retrieve_times_crnt_day =" +echo "|${obs_retrieve_times_crnt_day[@]}|" + +#exit 1 +# +#----------------------------------------------------------------------- +# +# Obs files will be obtained by extracting them from the relevant 24-hourly +# archives. Thus, we need the sequence of archive hours over which to +# loop. In the simplest case, this sequence will be "0 24". This will +# be the case if the forecast output times include all hours of the +# task's day and if none of the obs files for this day already exist on +# disk. In other cases, the sequence we loop over will be a subset of +# "0 24", e.g. just "0" or just "24". +# +# To generate this sequence, we first set its starting and ending values +# as well as the interval. +# +#----------------------------------------------------------------------- +# + +# Sequence interval must be 24 hours because the archives are 24-hourly. +arcv_hr_incr=24 + +# Initial guess for starting archive hour. This is set to the archive +# hour containing obs at the first obs retrieval time of the day. +hh_first=$(echo ${obs_retrieve_times_crnt_day[0]} | cut -c9-10) +hr_first=$((10#${hh_first})) +arcv_hr_start=$(( hr_first/arcv_hr_incr )) +arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) + +# Ending archive hour. This is set to the archive hour containing obs at +# the last obs retrieval time of the day. +hh_last=$(echo ${obs_retrieve_times_crnt_day[-1]} | cut -c9-10) +hr_last=$((10#${hh_last})) +if [[ ${hr_last} -eq 0 ]]; then + arcv_hr_end=24 +else + arcv_hr_end=$(( hr_last/arcv_hr_incr )) + arcv_hr_end=$(( arcv_hr_end*arcv_hr_incr )) +fi + +# Check whether any obs files already exist on disk. If so, adjust the +# starting archive hour. In the process, keep a count of the number of +# obs files that already exist on disk. +num_existing_files=0 +for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + day_dir_proc="${basedir_proc}" + fn_proc="sfav2_CONUS_6h_${yyyymmddhh}_grid184.grb2" + fp_proc="${day_dir_proc}/${fn_proc}" + if [[ -f ${fp_proc} ]]; then + num_existing_files=$((num_existing_files+1)) + print_info_msg " +File already exists on disk: + fp_proc = \"${fp_proc}\"" + else + hr=$((10#${hh})) + arcv_hr_start=$(( hr/arcv_hr_incr )) + arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) + print_info_msg " +File does not exist on disk: + fp_proc = \"${fp_proc}\" +Setting the hour (since 00) of the first archive to retrieve to: + arcv_hr_start = \"${arcv_hr_start}\"" + break + fi +done + +# If the number of obs files that already exist on disk is equal to the +# number of obs files needed, then there is no need to retrieve any files. +num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} +if [[ ${num_existing_files} -eq ${num_obs_retrieve_times_crnt_day} ]]; then + + print_info_msg " +All obs files needed for the current day (yyyymmdd_task) already exist +on disk: + yyyymmdd_task = \"${yyyymmdd_task}\" +Thus, there is no need to retrieve any files." + exit + +# If the number of obs files that already exist on disk is not equal to +# the number of obs files needed, then we will need to retrieve files. +# In this case, set the sequence of hours corresponding to the archives +# from which files will be retrieved. +else + + arcv_hrs=($(seq ${arcv_hr_start} ${arcv_hr_incr} ${arcv_hr_end})) + arcv_hrs_str="( "$( printf "%s " "${arcv_hrs[@]}" )")" + print_info_msg " +At least some obs files needed needed for the current day (yyyymmdd_task) +do not exist on disk: + yyyymmdd_task = \"${yyyymmdd_task}\" +The number of obs files needed for the current day (which is equal to the +number of observation retrieval times for the current day) is: + num_obs_retrieve_times_crnt_day = ${num_obs_retrieve_times_crnt_day} +The number of obs files that already exist on disk is: + num_existing_files = ${num_existing_files} +Will retrieve remaining files by looping over archives corresponding to +the following hours (since 00 of this day): + arcv_hrs = ${arcv_hrs_str} +" + +fi +# +#----------------------------------------------------------------------- +# +# At this point, at least some obs files for the current day need to be +# retrieved. The NOHRSC data on HPSS are archived by day, with the +# archive for a given day containing 6-hour as well as 24-hour grib2 +# files. The four 6-hour files are for accumulated snowfall at 00z +# (which represents accumulation over the last 6 hours of the previous +# day), 06z, 12z, and 18z, while the two 24-hour files are at 00z (which +# represents accumulation over all 24 hours of the previous day) and 12z +# (which represents accumulation over the last 12 hours of the previous +# day plus the first 12 hours of the current day). +# +# Here, we will only obtain the 6-hour files. In other workflow tasks, +# the values in these 6-hour files will be added as necessary to obtain +# accumulations over longer periods (e.g. 24 hours). Since the four +# 6-hour files are in one archive and are relatively small (on the order +# of kilobytes), we get them all with a single call to the retrieve_data.py +# script. +# +#----------------------------------------------------------------------- +# + +# Whether to move or copy files from raw to processed directories. +#mv_or_cp="mv" +mv_or_cp="cp" +# Whether to remove raw observations after processed directories have +# been created from them. +remove_raw_obs="${REMOVE_RAW_OBS_NOHRSC}" +# If the raw directories and files are to be removed at the end of this +# script, no need to copy the files since the raw directories are going +# to be removed anyway. +if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then + mv_or_cp="mv" +fi + +# Base directory that will contain the daily subdirectories in which the +# NOHRSC grib2 files retrieved from archive (tar) files will be placed. +# We refer to this as the "raw" base directory because it contains files +# as they are found in the archives before any processing by this script. +basedir_raw="${basedir_proc}/raw_${yyyymmdd_task}" + +for arcv_hr in ${arcv_hrs[@]}; do + + print_info_msg " +arcv_hr = ${arcv_hr}" + + # Calculate the time information for the current archive. + yyyymmddhh_arcv=$(${DATE_UTIL} --date "${yyyymmdd_task} ${arcv_hr} hours" +%Y%m%d%H) + yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) + hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) + + # Directory that will contain the grib2 files retrieved from the current + # archive file. We refer to this as the "raw" archive directory because + # it will contain the files as they are in the archive before any processing + # by this script. + arcv_dir_raw="${basedir_raw}/${yyyymmdd_arcv}" + + # Check whether any of the obs retrieval times for the day associated with + # this task fall in the time interval spanned by the current archive. If + # so, set the flag (do_retrieve) to retrieve the files in the current + # archive. + arcv_contents_yyyymmddhh_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv}" +%Y%m%d%H) + hrs=$((arcv_hr_incr - 1)) + arcv_contents_yyyymmddhh_end=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs} hours" +%Y%m%d%H) + do_retrieve="FALSE" + for (( i=0; i<${num_obs_retrieve_times_crnt_day}; i++ )); do + obs_retrieve_time=${obs_retrieve_times_crnt_day[i]} + if [[ "${obs_retrieve_time}" -ge "${arcv_contents_yyyymmddhh_start}" ]] && \ + [[ "${obs_retrieve_time}" -le "${arcv_contents_yyyymmddhh_end}" ]]; then + do_retrieve="TRUE" + break + fi + done + + if [[ $(boolify "${do_retrieve}") != "TRUE" ]]; then + + print_info_msg " +None of the times in the current day (or hour 00 of the next day) at which +obs need to be retrieved fall in the range spanned by the current ${arcv_hr_incr}-hourly +archive file. The bounds of the data in the current archive file are: + arcv_contents_yyyymmddhh_start = \"${arcv_contents_yyyymmddhh_start}\" + arcv_contents_yyyymmddhh_end = \"${arcv_contents_yyyymmddhh_end}\" +The times at which obs need to be retrieved are: + obs_retrieve_times_crnt_day = ($(printf "\"%s\" " ${obs_retrieve_times_crnt_day[@]}))" + + else + + # Make sure the raw archive directory exists because it is used below as + # the output directory of the retrieve_data.py script (so if this directory + # doesn't already exist, that script will fail). Creating this directory + # also ensures that the raw base directory (basedir_raw) exists before we + # change location to it below. + mkdir -p ${arcv_dir_raw} + + # The retrieve_data.py script first extracts the contents of the archive + # file into the directory it was called from and then moves them to the + # specified output location (via the --output_path option). In order to + # avoid other get_obs_ccpa tasks (i.e. those associated with other days) + # from interfering with (clobbering) these files (because extracted files + # from different get_obs_ccpa tasks to have the same names or relative + # paths), we change location to the base raw directory so that files with + # same names are extracted into different directories. + cd ${basedir_raw} + + # Pull obs from HPSS. This will get all the obs files in the current + # archive and place them in the raw archive directory. + cmd=" + python3 -u ${USHdir}/retrieve_data.py \ + --debug \ + --file_set obs \ + --config ${PARMdir}/data_locations.yml \ + --cycle_date ${yyyymmddhh_arcv} \ + --data_stores hpss \ + --data_type NOHRSC_obs \ + --output_path ${arcv_dir_raw} \ + --summary_file retrieve_data.log" + + print_info_msg "CALLING: ${cmd}" + $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." + + # Create the processed NOHRSC grib2 files. This consists of simply copying + # or moving them from the raw daily directory to the processed directory. + for hrs in $(seq 0 6 18); do + yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs} hours" +%Y%m%d%H) + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + # Create the processed grib2 obs file from the raw one (by moving, copying, + # or otherwise) only if the time of the current file in the current archive + # also exists in the list of obs retrieval times for the current day. + if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + fn_raw="sfav2_CONUS_6h_${yyyymmddhh}_grid184.grb2" + fp_raw="${arcv_dir_raw}/${fn_raw}" + day_dir_proc="${basedir_proc}" + mkdir -p ${day_dir_proc} + fn_proc="${fn_raw}" + #fn_proc="sfav2_CONUS_6h_${yyyymmddhh}_grid184.grb2" + fp_proc="${day_dir_proc}/${fn_proc}" + ${mv_or_cp} ${fp_raw} ${fp_proc} + fi + done + + fi + +done +# +#----------------------------------------------------------------------- +# +# Clean up raw obs directories. +# +#----------------------------------------------------------------------- +# +if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then + print_info_msg "Removing raw obs directories..." + rm -rf ${basedir_raw} || print_err_msg_exit "\ +Failed to remove raw obs directories." +fi From 80f2a1258c4e8053e9806af41f6d6548aea718c1 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 10:44:50 -0600 Subject: [PATCH 103/260] Add new variables specifying the time intervals at which different types of obs are available on NOAA HPSS. Use these new variables in file name templates. --- ush/config_defaults.yaml | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index bbddf30874..687463de5f 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2423,6 +2423,13 @@ verification: # METPLUS_VERBOSITY_LEVEL: 2 # + # Time interval (in hours) at which various types of obs are available on + # NOAA's HPSS. + CCPA_OBS_AVAIL_INTVL_HRS: 1 + NOHRSC_OBS_AVAIL_INTVL_HRS: 6 + MRMS_OBS_AVAIL_INTVL_HRS: 1 + NDAS_OBS_AVAIL_INTVL_HRS: 1 + # # Templates for CCPA, MRMS, and NDAS observation files. # # OBS_CCPA_APCP_FN_TEMPLATE: @@ -2448,8 +2455,10 @@ verification: # surface (ADPSFC) or ADP upper air (ADPUPA) fields and then generate # NetCDF versions of these files. # - OBS_CCPA_APCP_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2' - OBS_NOHRSC_ASNOW_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/sfav2_CONUS_${ACCUM_HH}h_{valid?fmt=%Y%m%d%H}_grid184.grb2' + OBS_CCPA_APCP_FN_TEMPLATE: '{%- set data_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} + {{- "{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z." ~ data_intvl_hrs ~ "h.hrap.conus.gb2" }}' + OBS_NOHRSC_ASNOW_FN_TEMPLATE: '{%- set data_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} + {{- "sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' OBS_MRMS_REFC_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' OBS_MRMS_RETOP_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE: 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' @@ -2467,7 +2476,10 @@ verification: # METplus Pb2nc tool on NDAS observations. (These files will contain # obs ADPSFC or ADPUPA fields in NetCDF format.) # - OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: 'ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2_a${ACCUM_HH}h.nc' + OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{%- set data_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} + {{- "ccpa.t{valid?fmt=%H}z." ~ data_intvl_hrs ~ "h.hrap.conus.gb2_a${ACCUM_HH}h.nc" }}' + OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{%- set data_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} + {{- "sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2_a${ACCUM_HH}h.nc" }}' OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: '${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}.nc' # # VX_FCST_MODEL_NAME: From a0a938c0667ddd2c3cdf76ae1768470e43532020 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 10:48:52 -0600 Subject: [PATCH 104/260] Rearrange the settings for the forecast file templates in config_default.sh so that they're more readable. --- ush/config_defaults.yaml | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 687463de5f..56bd15b814 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2547,9 +2547,25 @@ verification: # both for 1 hour and for > 1 hour accumulation periods, in NetCDF # format.) # - FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}/postprd{% endif %}' - FCST_FN_TEMPLATE: '${NET_default}.t{init?fmt=%H?shift=-${time_lag}}z{% if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %}.${ensmem_name}{% endif %}.prslev.f{lead?fmt=%HHH?shift=${time_lag}}.${POST_OUTPUT_DOMAIN_NAME}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${NET_default}.t{init?fmt=%H}z{% if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %}.${ensmem_name}{% endif %}.prslev.f{lead?fmt=%HHH}.${POST_OUTPUT_DOMAIN_NAME}_${VAR}_a${ACCUM_HH}h.nc' + FCST_SUBDIR_TEMPLATE: '{%- if user.RUN_ENVIR == "nco" %} + {{- "${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}" }} + {%- else %} + {{- "{init?fmt=%Y%m%d%H?shift=-${time_lag}}" }} + {%- if global.DO_ENSEMBLE %} + {{- "/${ensmem_name}" }} + {%- endif %} + {{- "/postprd" }} + {%- endif %}' + FCST_FN_TEMPLATE: '{{- "${NET_default}.t{init?fmt=%H?shift=-${time_lag}}z" }} + {%- if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %} + {{- ".${ensmem_name}" }} + {%- endif %} + {{- ".prslev.f{lead?fmt=%HHH?shift=${time_lag}}.${POST_OUTPUT_DOMAIN_NAME}.grib2" }}' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- "${NET_default}.t{init?fmt=%H}z" }} + {%- if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %} + {{- ".${ensmem_name}" }} + {%- endif %} + {{- ".prslev.f{lead?fmt=%HHH}.${POST_OUTPUT_DOMAIN_NAME}_${VAR}_a${ACCUM_HH}h.nc" }}' # # For verification tasks that need observational data, this specifies # the maximum number of observation files that may be missing. If more From 692255598ff94ef260720c9ffbf21684ce5677d9 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 10:51:31 -0600 Subject: [PATCH 105/260] Allow verification of 12 and 18-hourly accumulated snowfall since these are now obtained by adding 6-hourly in the obs (as opposed to before, where we just got the 6 or 24 hourly obs without the option to add the 6-hourly to get 12 and 18). --- ush/valid_param_vals.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/valid_param_vals.yaml b/ush/valid_param_vals.yaml index 3530b51ae9..1ff5405ffb 100644 --- a/ush/valid_param_vals.yaml +++ b/ush/valid_param_vals.yaml @@ -78,4 +78,4 @@ valid_vals_DO_AQM_SAVE_AIRNOW_HIST: [True, False] valid_vals_COLDSTART: [True, False] valid_vals_VX_FIELDS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] valid_vals_VX_APCP_ACCUMS_HRS: [ 1, 3, 6, 24 ] -valid_vals_VX_ASNOW_ACCUMS_HRS: [ 6, 24 ] +valid_vals_VX_ASNOW_ACCUMS_HRS: [ 6, 12, 18, 24 ] From cae50b5d8a3ef9d477f6a771eee29cf04dcd0b50 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 12:31:19 -0600 Subject: [PATCH 106/260] In Pb2NC tasks, use the arrays in var_defns.py that specify the times on a given day at which a given types of obs should be retrieved. --- parm/metplus/Pb2nc_obs.conf | 2 +- scripts/exregional_run_met_pcpcombine.sh | 26 ++++++++++++++++-------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/parm/metplus/Pb2nc_obs.conf b/parm/metplus/Pb2nc_obs.conf index 729bf2ba06..24d469602f 100644 --- a/parm/metplus/Pb2nc_obs.conf +++ b/parm/metplus/Pb2nc_obs.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{leadhr_list}} # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 3d4d0cb9fb..9495031722 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -190,11 +190,13 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then elif [ "${FCST_OR_OBS}" = "OBS" ]; then OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_CCPA_APCP_FN_TEMPLATE} ) + fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE}) + OBS_INPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_obs" - OUTPUT_FN_TEMPLATE=$( eval echo ${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT} ) + fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE_PCPCOMBINE_OUTPUT}) + OUTPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" fi @@ -213,22 +215,27 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then base_dir="${FCST_INPUT_DIR}" fn_template="${FCST_INPUT_FN_TEMPLATE}" num_missing_files_max="${NUM_MISSING_FCST_FILES_MAX}" + subintvl_accum_hrs="${FCST_OUTPUT_INTVL_HRS}" elif [ "${FCST_OR_OBS}" = "OBS" ]; then base_dir="${OBS_INPUT_DIR}" fn_template="${OBS_INPUT_FN_TEMPLATE}" num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" + subintvl_accum_hrs="${OBS_AVAIL_INTVL_HRS}" fi +input_accum_hh=$(printf "%02d" ${subintvl_accum_hrs}) +vx_output_intvl_hrs="$((10#${ACCUM_HH}))" -set_vx_fhr_list \ - cdate="${CDATE}" \ +set_vx_hrs_list \ + yyyymmddhh_init="${CDATE}" \ fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ + vx_output_intvl_hrs="${vx_output_intvl_hrs}" \ + field_is_cumul="TRUE" \ + check_subintvl_files="TRUE" \ + subintvl_accum_hrs="${subintvl_accum_hrs}" \ base_dir="${base_dir}" \ fn_template="${fn_template}" \ - check_accum_contrib_files="TRUE" \ num_missing_files_max="${num_missing_files_max}" \ - outvarname_fhr_list="FHR_LIST" + outvarname_hrs_list="FHR_LIST" # #----------------------------------------------------------------------- # @@ -358,7 +365,8 @@ settings="\ 'fieldname_in_met_filedir_names': '${FIELDNAME_IN_MET_FILEDIR_NAMES}' 'obtype': '${OBTYPE}' 'FCST_OR_OBS': '${FCST_OR_OBS}' - 'accum_hh': '${ACCUM_HH:-}' + 'input_accum_hh': '${input_accum_hh}' + 'output_accum_hh': '${ACCUM_HH:-}' 'accum_no_pad': '${ACCUM_NO_PAD:-}' 'metplus_templates_dir': '${METPLUS_CONF:-}' 'input_field_group': '${VAR:-}' From fffdbd3739e8c5ac542e6fea63e5d8b53f5d907e Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 12:39:24 -0600 Subject: [PATCH 107/260] In the get_obs tasks, use the arrays in var_defns.py that specify the times on a given day at which a given types of obs should be retrieved. Remove the arrays that specify the combined forecast output times for all cycles. --- parm/wflow/verify_det.yaml | 4 + parm/wflow/verify_pre.yaml | 8 +- ush/get_obs_ccpa.sh | 215 ++++++++++++++++++------------------- ush/get_obs_mrms.sh | 47 ++++---- ush/get_obs_ndas.sh | 102 ++++++++++-------- ush/get_obs_nohrsc.sh | 177 ++++-------------------------- 6 files changed, 204 insertions(+), 349 deletions(-) diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index 4f4d4672ce..c4f420f10c 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -42,6 +42,7 @@ metatask_GridStat_CCPA_all_accums_all_mems: OBTYPE: 'CCPA' ENSMEM_INDX: "#mem#" SLASH_ENSMEM_SUBDIR_OR_NULL: '{% if global.DO_ENSEMBLE %}{{ "/mem#mem#" }}{% endif %}' + OBS_AVAIL_INTVL_HRS: '{{- verification.CCPA_OBS_AVAIL_INTVL_HRS }}' FCST_LEVEL: 'A#ACCUM_HH#' FCST_THRESH: 'all' walltime: 02:00:00 @@ -75,6 +76,7 @@ metatask_GridStat_NOHRSC_all_accums_all_mems: OBTYPE: 'NOHRSC' ENSMEM_INDX: "#mem#" SLASH_ENSMEM_SUBDIR_OR_NULL: '{% if global.DO_ENSEMBLE %}{{ "/mem#mem#" }}{% endif %}' + OBS_AVAIL_INTVL_HRS: '{{- verification.NOHRSC_OBS_AVAIL_INTVL_HRS }}' FCST_LEVEL: 'A#ACCUM_HH#' FCST_THRESH: 'all' walltime: 02:00:00 @@ -105,6 +107,7 @@ metatask_GridStat_MRMS_all_mems: OBTYPE: 'MRMS' ENSMEM_INDX: "#mem#" SLASH_ENSMEM_SUBDIR_OR_NULL: '{% if global.DO_ENSEMBLE %}{{ "/mem#mem#" }}{% endif %}' + OBS_AVAIL_INTVL_HRS: '{{- verification.MRMS_OBS_AVAIL_INTVL_HRS }}' FCST_LEVEL: 'L0' FCST_THRESH: 'all' walltime: 02:00:00 @@ -152,6 +155,7 @@ metatask_PointStat_NDAS_all_mems: ACCUM_HH: '01' ENSMEM_INDX: "#mem#" SLASH_ENSMEM_SUBDIR_OR_NULL: '{% if global.DO_ENSEMBLE %}{{ "/mem#mem#" }}{% endif %}' + OBS_AVAIL_INTVL_HRS: '{{- verification.NDAS_OBS_AVAIL_INTVL_HRS }}' FCST_LEVEL: 'all' FCST_THRESH: 'all' walltime: 01:00:00 diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 4a9e750c56..80b70f68d3 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -47,7 +47,6 @@ task_get_obs_nohrsc: <<: *default_vars OBS_DIR: '&NOHRSC_OBS_DIR;' OBTYPE: 'NOHRSC' - OUTPUT_TIMES_ALL: *output_times_all_cumul native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -61,10 +60,6 @@ task_get_obs_mrms: OBS_DIR: '&MRMS_OBS_DIR;' OBTYPE: 'MRMS' MRMS_FIELDS: 'REFC RETOP' - OUTPUT_TIMES_ALL: &output_times_all_inst - '{%- for i in range(0, (workflow.OUTPUT_TIMES_ALL_CYCLES_INST|length)) %} - {{- " " ~ workflow.OUTPUT_TIMES_ALL_CYCLES_INST[i] }} - {%- endfor %}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -77,7 +72,6 @@ task_get_obs_ndas: <<: *default_vars OBS_DIR: '&NDAS_OBS_DIR;' OBTYPE: 'NDAS' - OUTPUT_TIMES_ALL: *output_times_all_inst queue: "&QUEUE_HPSS;" native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' @@ -96,7 +90,6 @@ task_run_MET_Pb2nc_obs: FCST_OR_OBS: OBS OBTYPE: NDAS OBS_DIR: '&NDAS_OBS_DIR;' - OUTPUT_TIMES_ALL: *output_times_all_inst METPLUSTOOLNAME: 'PB2NC' dependency: and: @@ -127,6 +120,7 @@ metatask_PcpCombine_obs: FCST_OR_OBS: OBS OBTYPE: CCPA OBS_DIR: '&CCPA_OBS_DIR;' + OBS_AVAIL_INTVL_HRS: '{{- verification.CCPA_OBS_AVAIL_INTVL_HRS }}' METPLUSTOOLNAME: 'PCPCOMBINE' dependency: and: diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index d3ad4c49ce..aabb55e5a4 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -8,7 +8,7 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user platform ; do +for sect in user platform verification ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done @@ -158,93 +158,71 @@ set -u #----------------------------------------------------------------------- # -# CCPA accumulation period to consider. Here, we only retrieve data for -# 1-hour accumulations. Other accumulations (03h, 06h, 24h) are obtained -# by other tasks in the workflow that add up these hourly values. -accum="01" +# The time interval (in hours) at which the obs are available on HPSS +# must divide evenly into 24. Otherwise, different days would have obs +# available at different hours-of-day. Make sure this is the case. +remainder=$(( 24 % CCPA_OBS_AVAIL_INTVL_HRS )) +if [ ${remainder} -ne 0 ]; then + print_err_msg_exit "\ +The obs availability interval CCPA_OBS_AVAIL_INTVL_HRS must divide evenly +into 24 but doesn't: + CCPA_OBS_AVAIL_INTVL_HRS = ${CCPA_OBS_AVAIL_INTVL_HRS} + mod(24, CCPA_OBS_AVAIL_INTVL_HRS) = ${remainder}" +fi + +# Accumulation period to use when getting obs files. This is simply (a +# properly formatted version of) the obs availability interval. +accum_obs_fmt=$( printf "%02d" "${CCPA_OBS_AVAIL_INTVL_HRS}" ) # The day (in the form YYYMMDD) associated with the current task via the # task's cycledefs attribute in the ROCOTO xml. yyyymmdd_task=${PDY} -# Base directory in which the daily subdirectories containing the CCPA -# grib2 files will appear after this script is done. We refer to this as +# Base directory in which the daily subdirectories containing the grib2 +# obs files will appear after this script is done. We refer to this as # the "processed" base directory because it contains the files after all # processing by this script is complete. basedir_proc=${OBS_DIR} - -# The environment variable FCST_OUTPUT_TIMES_ALL set in the ROCOTO XML is -# a scalar string containing all relevant forecast output times (each in -# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings -# because in ROCOTO, there doesn't seem to be a way to pass a bash array -# from the XML to the task's script. To have an array-valued variable to -# work with, here, we create the new variable fcst_output_times_all that -# is the array-valued counterpart of FCST_OUTPUT_TIMES_ALL. -fcst_output_times_all=($(printf "%s" "${FCST_OUTPUT_TIMES_ALL}")) - -# List of times (each of the form YYYYMMDDHH) for which there is forecast -# APCP (accumulated precipitation) output for the current day. We start -# constructing this by extracting from the full list of all forecast APCP -# output times (i.e. from all cycles) all elements that contain the current -# task's day (in the form YYYYMMDD). -fcst_output_times_crnt_day=() -if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd_task} ]]; then - fcst_output_times_crnt_day=( $(printf "%s\n" "${fcst_output_times_all[@]}" | grep "^${yyyymmdd_task}") ) -fi -# If the 0th hour of the current day is in this list (and if it is, it -# will be the first element), remove it because for APCP, that time is -# considered part of the previous day (because it represents precipitation -# that occurred during the last hour of the previous day). -if [[ ${#fcst_output_times_crnt_day[@]} -gt 0 ]] && \ - [[ ${fcst_output_times_crnt_day[0]} == "${yyyymmdd_task}00" ]]; then - fcst_output_times_crnt_day=(${fcst_output_times_crnt_day[@]:1}) -fi -# If the 0th hour of the next day (i.e. the day after yyyymmdd_task) is -# one of the output times in the list of all APCP output times, we include -# it in the list for the current day because for APCP, that time is -# considered part of the current day (because it represents precipitation -# that occured during the last hour of the current day). -yyyymmdd00_task_p1d=$(${DATE_UTIL} --date "${yyyymmdd_task} 1 day" +%Y%m%d%H) -if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd00_task_p1d} ]]; then - fcst_output_times_crnt_day+=(${yyyymmdd00_task_p1d}) -fi - -# If there are no forecast APCP output times on the day of the current -# task, exit the script. -num_fcst_output_times_crnt_day=${#fcst_output_times_crnt_day[@]} -if [[ ${num_fcst_output_times_crnt_day} -eq 0 ]]; then - print_info_msg " -None of the forecast APCP output times fall within the day (including the -0th hour of the next day) associated with the current task (yyyymmdd_task): - yyyymmdd_task = \"${yyyymmdd_task}\" -Thus, there is no need to retrieve any obs files." - exit -fi - +# +#----------------------------------------------------------------------- +# +# Get the list of all the times in the current day at which to retrieve +# obs. This is an array with elements having format "YYYYMMDDHH". +# +#----------------------------------------------------------------------- +# +array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" +eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) +# +#----------------------------------------------------------------------- +# # Obs files will be obtained by extracting them from the relevant 6-hourly # archives. Thus, we need the sequence of archive hours over which to # loop. In the simplest case, this sequence will be "6 12 18 24". This -# will be the case if the forecast output times include all hours of the -# task's day and if none of the obs files for this day already exist on -# disk. In other cases, the sequence we loop over will be a subset of -# "6 12 18 24". +# will be the case if the observation retrieval times include all hours +# of the task's day and if none of the obs files for this day already +# exist on disk. In other cases, the sequence we loop over will be a +# subset of "6 12 18 24". # # To generate this sequence, we first set its starting and ending values # as well as the interval. +# +#----------------------------------------------------------------------- +# # Sequence interval must be 6 hours because the archives are 6-hourly. arcv_hr_incr=6 -# Initial guess for starting archive hour. This is set to the archive -# hour containing obs at the first forecast output time of the day. -hh_first=$(echo ${fcst_output_times_crnt_day[0]} | cut -c9-10) +# Initial guess for starting archive hour. This is set to the archive +# hour containing obs at the first obs retrieval time of the day. +hh_first=$(echo ${obs_retrieve_times_crnt_day[0]} | cut -c9-10) hr_first=$((10#${hh_first})) arcv_hr_start=$(ceil ${hr_first} ${arcv_hr_incr}) arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) # Ending archive hour. This is set to the archive hour containing obs at -# the last forecast output time of the day. -hh_last=$(echo ${fcst_output_times_crnt_day[-1]} | cut -c9-10) +# the last obs retrieval time of the day. +hh_last=$(echo ${obs_retrieve_times_crnt_day[-1]} | cut -c9-10) hr_last=$((10#${hh_last})) if [[ ${hr_last} -eq 0 ]]; then arcv_hr_end=24 @@ -257,11 +235,11 @@ fi # starting archive hour. In the process, keep a count of the number of # obs files that already exist on disk. num_existing_files=0 -for yyyymmddhh in ${fcst_output_times_crnt_day[@]}; do +for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) day_dir_proc="${basedir_proc}/${yyyymmdd}" - fn_proc="ccpa.t${hh}z.${accum}h.hrap.conus.gb2" + fn_proc="ccpa.t${hh}z.${accum_obs_fmt}h.hrap.conus.gb2" fp_proc="${day_dir_proc}/${fn_proc}" if [[ -f ${fp_proc} ]]; then num_existing_files=$((num_existing_files+1)) @@ -273,7 +251,7 @@ File already exists on disk: arcv_hr_start=$(ceil ${hr} ${arcv_hr_incr}) arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) print_info_msg " -File does not exists on disk: +File does not exist on disk: fp_proc = \"${fp_proc}\" Setting the hour (since 00) of the first archive to retrieve to: arcv_hr_start = \"${arcv_hr_start}\"" @@ -282,32 +260,39 @@ Setting the hour (since 00) of the first archive to retrieve to: done # If the number of obs files that already exist on disk is equal to the -# number of files needed, then there is no need to retrieve any files. -num_needed_files=$((num_fcst_output_times_crnt_day)) -if [[ ${num_existing_files} -eq ${num_needed_files} ]]; then +# number of obs files needed, then there is no need to retrieve any files. +num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} +if [[ ${num_existing_files} -eq ${num_obs_retrieve_times_crnt_day} ]]; then + print_info_msg " All obs files needed for the current day (yyyymmdd_task) already exist on disk: yyyymmdd_task = \"${yyyymmdd_task}\" Thus, there is no need to retrieve any files." exit -# Otherwise, will need to retrieve files. In this case, set the sequence -# of hours corresponding to the archives from which files will be retrieved. + +# If the number of obs files that already exist on disk is not equal to +# the number of obs files needed, then we will need to retrieve files. +# In this case, set the sequence of hours corresponding to the archives +# from which files will be retrieved. else + arcv_hrs=($(seq ${arcv_hr_start} ${arcv_hr_incr} ${arcv_hr_end})) arcv_hrs_str="( "$( printf "%s " "${arcv_hrs[@]}" )")" print_info_msg " At least some obs files needed needed for the current day (yyyymmdd_task) do not exist on disk: yyyymmdd_task = \"${yyyymmdd_task}\" -The number of obs files needed is: - num_needed_files = ${num_needed_files} +The number of obs files needed for the current day (which is equal to the +number of observation retrieval times for the current day) is: + num_obs_retrieve_times_crnt_day = ${num_obs_retrieve_times_crnt_day} The number of obs files that already exist on disk is: num_existing_files = ${num_existing_files} Will retrieve remaining files by looping over archives corresponding to the following hours (since 00 of this day): arcv_hrs = ${arcv_hrs_str} " + fi # #----------------------------------------------------------------------- @@ -348,36 +333,48 @@ arcv_hr = ${arcv_hr}" yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) - # Directory that will contain the CCPA grib2 files retrieved from the - # current 6-hourly archive file. We refer to this as the "raw" quarter- - # daily directory because it will contain the files as they are in the - # archive before any processing by this script. - qrtrday_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" + # Directory that will contain the grib2 files retrieved from the current + # archive file. We refer to this as the "raw" archive directory because + # it will contain the files as they are in the archive before any processing + # by this script. + arcv_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" - # Check whether any of the forecast APCP output times for the day associated - # with this task fall in the time interval spanned by the current archive. - # If so, set the flag (do_retrieve) to retrieve the files in the current + # Check whether any of the obs retrieval times for the day associated with + # this task fall in the time interval spanned by the current archive. If + # so, set the flag (do_retrieve) to retrieve the files in the current # archive. - yyyymmddhh_qrtrday_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 5 hours ago" +%Y%m%d%H) - yyyymmddhh_qrtrday_end=${yyyymmddhh_arcv} + hrs_ago=$((arcv_hr_incr - 1)) + arcv_contents_yyyymmddhh_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) + arcv_contents_yyyymmddhh_end=${yyyymmddhh_arcv} do_retrieve="FALSE" - for (( i=0; i<${num_fcst_output_times_crnt_day}; i++ )); do - output_time=${fcst_output_times_crnt_day[i]} - if [[ "${output_time}" -ge "${yyyymmddhh_qrtrday_start}" ]] && \ - [[ "${output_time}" -le "${yyyymmddhh_qrtrday_end}" ]]; then + for (( i=0; i<${num_obs_retrieve_times_crnt_day}; i++ )); do + obs_retrieve_time=${obs_retrieve_times_crnt_day[i]} + if [[ "${obs_retrieve_time}" -ge "${arcv_contents_yyyymmddhh_start}" ]] && \ + [[ "${obs_retrieve_time}" -le "${arcv_contents_yyyymmddhh_end}" ]]; then do_retrieve="TRUE" break fi done - if [[ $(boolify "${do_retrieve}") == "TRUE" ]]; then + if [[ $(boolify "${do_retrieve}") != "TRUE" ]]; then + + print_info_msg " +None of the times in the current day (or hour 00 of the next day) at which +obs need to be retrieved fall in the range spanned by the current ${arcv_hr_incr}-hourly +archive file. The bounds of the data in the current archive file are: + arcv_contents_yyyymmddhh_start = \"${arcv_contents_yyyymmddhh_start}\" + arcv_contents_yyyymmddhh_end = \"${arcv_contents_yyyymmddhh_end}\" +The times at which obs need to be retrieved are: + obs_retrieve_times_crnt_day = ($(printf "\"%s\" " ${obs_retrieve_times_crnt_day[@]}))" + + else - # Make sure the raw quarter-daily directory exists because it is used - # below as the output directory of the retrieve_data.py script (so if - # this directory doesn't already exist, that script will fail). Creating - # this directory also ensures that the raw base directory (basedir_raw) - # exists before we change location to it below. - mkdir -p ${qrtrday_dir_raw} + # Make sure the raw archive directory exists because it is used below as + # the output directory of the retrieve_data.py script (so if this directory + # doesn't already exist, that script will fail). Creating this directory + # also ensures that the raw base directory (basedir_raw) exists before we + # change location to it below. + mkdir -p ${arcv_dir_raw} # The retrieve_data.py script first extracts the contents of the archive # file into the directory it was called from and then moves them to the @@ -389,8 +386,8 @@ arcv_hr = ${arcv_hr}" # same names are extracted into different directories. cd ${basedir_raw} - # Pull CCPA data from HPSS. This will get all 6 obs files in the current - # archive and place them in the raw quarter-daily directory. + # Pull obs from HPSS. This will get all the obs files in the current + # archive and place them in the raw archive directory. cmd=" python3 -u ${USHdir}/retrieve_data.py \ --debug \ @@ -399,7 +396,7 @@ arcv_hr = ${arcv_hr}" --cycle_date ${yyyymmddhh_arcv} \ --data_stores hpss \ --data_type CCPA_obs \ - --output_path ${qrtrday_dir_raw} \ + --output_path ${arcv_dir_raw} \ --summary_file retrieve_data.log" print_info_msg "CALLING: ${cmd}" @@ -415,9 +412,12 @@ arcv_hr = ${arcv_hr}" yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - if [[ ${fcst_output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then - fn_raw="ccpa.t${hh}z.${accum}h.hrap.conus.gb2" - fp_raw="${qrtrday_dir_raw}/${fn_raw}" + # Create the processed grib2 obs file from the raw one (by moving, copying, + # or otherwise) only if the time of the current file in the current archive + # also exists in the list of obs retrieval times for the current day. + if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + fn_raw="ccpa.t${hh}z.${accum_obs_fmt}h.hrap.conus.gb2" + fp_raw="${arcv_dir_raw}/${fn_raw}" day_dir_proc="${basedir_proc}/${yyyymmdd}" mkdir -p ${day_dir_proc} fn_proc="${fn_raw}" @@ -435,17 +435,6 @@ arcv_hr = ${arcv_hr}" fi done - else - - print_info_msg " -None of the current day's forecast APCP output times fall in the range -spanned by the current 6-hourly archive file. The bounds of the current -archive are: - yyyymmddhh_qrtrday_start = \"${yyyymmddhh_qrtrday_start}\" - yyyymmddhh_qrtrday_end = \"${yyyymmddhh_qrtrday_end}\" -The forecast output times for APCP are: - fcst_output_times_crnt_day = ($(printf "\"%s\" " ${fcst_output_times_crnt_day[@]}))" - fi done diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index af3b8ca942..a0d0590667 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -8,7 +8,7 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user platform ; do +for sect in user platform verification ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done @@ -84,31 +84,24 @@ yyyymmdd_task=${PDY} # as the "processed" base directory because it contains the files after # all processing by this script is complete. basedir_proc=${OBS_DIR} +# +#----------------------------------------------------------------------- +# +# Get the list of all the times in the current day at which to retrieve +# obs. This is an array with elements having format "YYYYMMDDHH". +# +#----------------------------------------------------------------------- +# +array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" +eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) -# The environment variable FCST_OUTPUT_TIMES_ALL set in the ROCOTO XML is -# a scalar string containing all relevant forecast output times (each in -# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings -# because in ROCOTO, there doesn't seem to be a way to pass a bash array -# from the XML to the task's script. To have an array-valued variable to -# work with, here, we create the new variable fcst_output_times_all that -# is the array-valued counterpart of FCST_OUTPUT_TIMES_ALL. -fcst_output_times_all=($(printf "%s" "${FCST_OUTPUT_TIMES_ALL}")) - -# List of times (each of the form YYYYMMDDHH) for which there is forecast -# output for the current day. We extract this list from the full list of -# all forecast output times (i.e. from all cycles). -fcst_output_times_crnt_day=() -if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd_task} ]]; then - fcst_output_times_crnt_day=( $(printf "%s\n" "${fcst_output_times_all[@]}" | grep "^${yyyymmdd_task}") ) -fi - -# If there are no forecast output times on the day of the current task, -# exit the script. -num_fcst_output_times_crnt_day=${#fcst_output_times_crnt_day[@]} -if [[ ${num_fcst_output_times_crnt_day} -eq 0 ]]; then +# If there are no observation retrieval times on the day of the current +# task, exit the script. +num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} +if [[ ${num_obs_retrieve_times_crnt_day} -eq 0 ]]; then print_info_msg " -None of the forecast output times fall within the day associated with the -current task (yyyymmdd_task): +None of the observation retrieval times fall within the day associated +with the current task (yyyymmdd_task): yyyymmdd_task = \"${yyyymmdd_task}\" Thus, there is no need to retrieve any obs files." exit @@ -120,7 +113,7 @@ fi num_existing_files=0 num_mrms_fields=${#mrms_fields[@]} for (( i=0; i<${num_mrms_fields}; i++ )); do - for yyyymmddhh in ${fcst_output_times_crnt_day[@]}; do + for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) day_dir_proc="${basedir_proc}/${yyyymmdd}" @@ -139,7 +132,7 @@ done # If the number of obs files that already exist on disk is equal to the # number of files needed, then there is no need to retrieve any files. -num_needed_files=$((num_fcst_output_times_crnt_day*num_mrms_fields)) +num_needed_files=$((num_obs_retrieve_times_crnt_day*num_mrms_fields)) if [[ ${num_existing_files} -eq $((num_needed_files)) ]]; then print_info_msg " All obs files needed for the current day (yyyymmdd_task) already exist @@ -246,7 +239,7 @@ $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." # process renaming it) to the processed location. for hr in $(seq 0 1 23); do yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_task} ${hr} hours" +%Y%m%d%H) - if [[ ${fcst_output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then for (( i=0; i<${num_mrms_fields}; i++ )); do python ${USHdir}/mrms_pull_topofhour.py \ --valid_time ${yyyymmddhh} \ diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index 8b0c87b3eb..7ab6fc652b 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -8,7 +8,7 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user platform ; do +for sect in user platform verification ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done @@ -49,6 +49,18 @@ set -u #----------------------------------------------------------------------- # +# The time interval (in hours) at which the obs are available on HPSS +# must divide evenly into 24. Otherwise, different days would have obs +# available at different hours-of-day. Make sure this is the case. +remainder=$(( 24 % NDAS_OBS_AVAIL_INTVL_HRS )) +if [ ${remainder} -ne 0 ]; then + print_err_msg_exit "\ +The obs availability interval NDAS_OBS_AVAIL_INTVL_HRS must divide evenly +into 24 but doesn't: + NDAS_OBS_AVAIL_INTVL_HRS = ${NDAS_OBS_AVAIL_INTVL_HRS} + mod(24, NDAS_OBS_AVAIL_INTVL_HRS) = ${remainder}" +fi + # The day (in the form YYYMMDD) associated with the current task via the # task's cycledefs attribute in the ROCOTO xml. yyyymmdd_task=${PDY} @@ -58,31 +70,28 @@ yyyymmdd_task=${PDY} # as the "processed" base directory because it contains the files after # all processing by this script is complete. basedir_proc=${OBS_DIR} +# +#----------------------------------------------------------------------- +# +# Get the list of all the times in the current day at which to retrieve +# obs. This is an array with elements having format "YYYYMMDDHH". +# +#----------------------------------------------------------------------- +# +array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" +eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) + + + -# The environment variable FCST_OUTPUT_TIMES_ALL set in the ROCOTO XML is -# a scalar string containing all relevant forecast output times (each in -# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings -# because in ROCOTO, there doesn't seem to be a way to pass a bash array -# from the XML to the task's script. To have an array-valued variable to -# work with, here, we create the new variable fcst_output_times_all that -# is the array-valued counterpart of FCST_OUTPUT_TIMES_ALL. -fcst_output_times_all=($(printf "%s" "${FCST_OUTPUT_TIMES_ALL}")) - -# List of times (each of the form YYYYMMDDHH) for which there is forecast -# output for the current day. We extract this list from the full list of -# all forecast output times (i.e. from all cycles). -fcst_output_times_crnt_day=() -if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd_task} ]]; then - fcst_output_times_crnt_day=( $(printf "%s\n" "${fcst_output_times_all[@]}" | grep "^${yyyymmdd_task}") ) -fi -# If there are no forecast output times on the day of the current task, -# exit the script. -num_fcst_output_times_crnt_day=${#fcst_output_times_crnt_day[@]} -if [[ ${num_fcst_output_times_crnt_day} -eq 0 ]]; then +# If there are no observation retrieval times on the day of the current +# task, exit the script. +num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} +if [[ ${num_obs_retrieve_times_crnt_day} -eq 0 ]]; then print_info_msg " -None of the forecast output times fall within the day associated with the -current task (yyyymmdd_task): +None of the observation retrieval times fall within the day associated +with the current task (yyyymmdd_task): yyyymmdd_task = \"${yyyymmdd_task}\" Thus, there is no need to retrieve any obs files." exit @@ -91,10 +100,10 @@ fi # Obs files will be obtained by extracting them from the relevant 6-hourly # archives. Thus, we need the sequence of archive hours over which to # loop. In the simplest case, this sequence will be "6 12 18 24". This -# will be the case if the forecast output times include all hours of the -# task's day and if none of the obs files for this day already exist on -# disk. In other cases, the sequence we loop over will be a subset of -# "6 12 18 24". +# will be the case if the observation retrieval times include all hours +# of the task's day and if none of the obs files for this day already +# exist on disk. In other cases, the sequence we loop over will be a +# subset of "6 12 18 24". # # To generate this sequence, we first set its starting and ending values # as well as the interval. @@ -103,14 +112,14 @@ fi arcv_hr_incr=6 # Initial guess for starting archive hour. This is set to the archive -# hour containing obs at the first forecast output time of the day. -hh_first=$(echo ${fcst_output_times_crnt_day[0]} | cut -c9-10) +# hour containing obs at the first observation retrieval time of the day. +hh_first=$(echo ${obs_retrieve_times_crnt_day[0]} | cut -c9-10) hr_first=$((10#${hh_first})) arcv_hr_start=$(( (hr_first/arcv_hr_incr + 1)*arcv_hr_incr )) # Ending archive hour. This is set to the archive hour containing obs at -# the last forecast output time of the day. -hh_last=$(echo ${fcst_output_times_crnt_day[-1]} | cut -c9-10) +# the last observation retrieval time of the day. +hh_last=$(echo ${obs_retrieve_times_crnt_day[-1]} | cut -c9-10) hr_last=$((10#${hh_last})) arcv_hr_end=$(( (hr_last/arcv_hr_incr + 1)*arcv_hr_incr )) @@ -118,7 +127,7 @@ arcv_hr_end=$(( (hr_last/arcv_hr_incr + 1)*arcv_hr_incr )) # starting archive hour. In the process, keep a count of the number of # obs files that already exist on disk. num_existing_files=0 -for yyyymmddhh in ${fcst_output_times_crnt_day[@]}; do +for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) day_dir_proc="${basedir_proc}" @@ -143,7 +152,7 @@ done # If the number of obs files that already exist on disk is equal to the # number of files needed, then there is no need to retrieve any files. -num_needed_files=$((num_fcst_output_times_crnt_day)) +num_needed_files=$((num_obs_retrieve_times_crnt_day)) if [[ ${num_existing_files} -eq ${num_needed_files} ]]; then print_info_msg " All obs files needed for the current day (yyyymmdd_task) already exist @@ -214,17 +223,18 @@ arcv_hr = ${arcv_hr}" # archive before any processing by this script. qrtrday_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" - # Check whether any of the forecast output times for the day associated - # with this task fall in the time interval spanned by the current archive. - # If so, set the flag (do_retrieve) to retrieve the files in the current + # Check whether any of the observation retrieval times for the day + # associated with this task fall in the time interval spanned by the + # current archive. If so, set the flag (do_retrieve) to retrieve the + # files in the current # archive. yyyymmddhh_qrtrday_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 6 hours ago" +%Y%m%d%H) yyyymmddhh_qrtrday_end=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 1 hours ago" +%Y%m%d%H) do_retrieve="FALSE" - for (( i=0; i<${num_fcst_output_times_crnt_day}; i++ )); do - output_time=${fcst_output_times_crnt_day[i]} - if [[ "${output_time}" -ge "${yyyymmddhh_qrtrday_start}" ]] && \ - [[ "${output_time}" -le "${yyyymmddhh_qrtrday_end}" ]]; then + for (( i=0; i<${num_obs_retrieve_times_crnt_day}; i++ )); do + retrieve_time=${obs_retrieve_times_crnt_day[i]} + if [[ "${retrieve_time}" -ge "${yyyymmddhh_qrtrday_start}" ]] && \ + [[ "${retrieve_time}" -le "${yyyymmddhh_qrtrday_end}" ]]; then do_retrieve="TRUE" break fi @@ -275,7 +285,7 @@ arcv_hr = ${arcv_hr}" yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - if [[ ${fcst_output_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then fn_raw="nam.t${hh_arcv}z.prepbufr.tm${hrs_ago}.nr" fp_raw="${qrtrday_dir_raw}/${fn_raw}" day_dir_proc="${basedir_proc}" @@ -289,13 +299,13 @@ arcv_hr = ${arcv_hr}" else print_info_msg " -None of the current day's forecast output times fall in the range spanned -by the current 6-hourly archive file. The bounds of the current archive -are: +None of the current day's observation retrieval times fall in the range +spanned by the current 6-hourly archive file. The bounds of the current +archive are: yyyymmddhh_qrtrday_start = \"${yyyymmddhh_qrtrday_start}\" yyyymmddhh_qrtrday_end = \"${yyyymmddhh_qrtrday_end}\" -The forecast output times are: - fcst_output_times_crnt_day = ($(printf "\"%s\" " ${fcst_output_times_crnt_day[@]}))" +The observation retrieval times are: + obs_retrieve_times_crnt_day = ($(printf "\"%s\" " ${obs_retrieve_times_crnt_day[@]}))" fi diff --git a/ush/get_obs_nohrsc.sh b/ush/get_obs_nohrsc.sh index 910cf3c35a..c71266ed07 100755 --- a/ush/get_obs_nohrsc.sh +++ b/ush/get_obs_nohrsc.sh @@ -46,86 +46,9 @@ set -u #----------------------------------------------------------------------- # -# The day (in the form YYYMMDD) associated with the current task via the -# task's cycledefs attribute in the ROCOTO xml. -yyyymmdd_task=${PDY} - -# Base directory in which the daily subdirectories containing the grib2 -# obs files will appear after this script is done. We refer to this as -# the "processed" base directory because it contains the files after all -# processing by this script is complete. -basedir_proc=${OBS_DIR} -# -#----------------------------------------------------------------------- -# -# Generate a list of forecast output times for the current day. Note -# that if the 0th hour of the next day (i.e. the day after the one -# associated with this task) is one of the forecast output times, we -# include it in the list for the current day because the accumulation -# associated with that hour occurred during the current day. -# -#----------------------------------------------------------------------- -# - -# The environment variable FCST_OUTPUT_TIMES_ALL set in the ROCOTO XML is -# a scalar string containing all relevant forecast output times (each in -# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings -# because in ROCOTO, there doesn't seem to be a way to pass a bash array -# from the XML to the task's script. To have an array-valued variable to -# work with, here, we create the new variable fcst_output_times_all that -# is the array-valued counterpart of FCST_OUTPUT_TIMES_ALL. -fcst_output_times_all=($(printf "%s" "${FCST_OUTPUT_TIMES_ALL}")) - -# List of times (each of the form YYYYMMDDHH) for which there is forecast -# ASNOW (accumulated snow) output for the current day. We start constructing -# this by extracting from the full list of all forecast ASNOW output times -# (i.e. from all cycles) all elements that contain the current task's day -# (in the form YYYYMMDD). -fcst_output_times_crnt_day=() -if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd_task} ]]; then - fcst_output_times_crnt_day=( $(printf "%s\n" "${fcst_output_times_all[@]}" | grep "^${yyyymmdd_task}") ) -fi -# If the 0th hour of the current day is in this list (and if it is, it -# will be the first element), remove it because for ASNOW, that time is -# considered part of the previous day (because it represents snowfall -# that occurred during the last hour of the previous day). -if [[ ${#fcst_output_times_crnt_day[@]} -gt 0 ]] && \ - [[ ${fcst_output_times_crnt_day[0]} == "${yyyymmdd_task}00" ]]; then - fcst_output_times_crnt_day=(${fcst_output_times_crnt_day[@]:1}) -fi -# If the 0th hour of the next day (i.e. the day after yyyymmdd_task) is -# one of the output times in the list of all ASNOW output times, we -# include it in the list for the current day because for ASNOW, that time -# is considered part of the current day (because it represents snowfall -# that occured during the last hour of the current day). -yyyymmdd00_task_p1d=$(${DATE_UTIL} --date "${yyyymmdd_task} 1 day" +%Y%m%d%H) -if [[ ${fcst_output_times_all[@]} =~ ${yyyymmdd00_task_p1d} ]]; then - fcst_output_times_crnt_day+=(${yyyymmdd00_task_p1d}) -fi - -# If there are no forecast ASNOW output times on the day of the current -# task, exit the script. -num_fcst_output_times_crnt_day=${#fcst_output_times_crnt_day[@]} -if [[ ${num_fcst_output_times_crnt_day} -eq 0 ]]; then - print_info_msg " -None of the forecast ASNOW output times fall within the day (including the -0th hour of the next day) associated with the current task (yyyymmdd_task): - yyyymmdd_task = \"${yyyymmdd_task}\" -Thus, there is no need to retrieve any obs files." - exit -fi -# -#----------------------------------------------------------------------- -# -# Generate a list of all the times at which obs are available for the -# current day, possibly including hour 00 of the next day. -# -#----------------------------------------------------------------------- -# - # The time interval (in hours) at which the obs are available on HPSS -# must be evenly divisible into 24. Otherwise, different days would -# have obs available at different hours. Make sure this is the case. +# must divide evenly into 24. Otherwise, different days would have obs +# available at different hours-of-day. Make sure this is the case. remainder=$(( 24 % NOHRSC_OBS_AVAIL_INTVL_HRS )) if [ ${remainder} -ne 0 ]; then print_err_msg_exit "\ @@ -135,93 +58,36 @@ into 24 but doesn't: mod(24, NOHRSC_OBS_AVAIL_INTVL_HRS) = ${remainder}" fi -# Construct the array of times during the current day (and possibly -# during hour 00 of the next day) at which obs are available on HPSS. -# Each element of this array is of the form "YYYYMMDDHH". -num_obs_avail_times=$((24/NOHRSC_OBS_AVAIL_INTVL_HRS)) -obs_avail_times_crnt_day=() -# Note: Start at i=1 because the output for hour 00 of the current day is -# considered part of the previous day (because it represents accumulation -# that occurred during the previous day). -for (( i=1; i<$((num_obs_avail_times+1)); i++ )); do - hrs=$((i*NOHRSC_OBS_AVAIL_INTVL_HRS)) - obs_avail_times_crnt_day+=( $(${DATE_UTIL} --date "${yyyymmdd_task} ${hrs} hours" +%Y%m%d%H) ) -done -# -#----------------------------------------------------------------------- -# -# Generate a list of all the times at which to retrieve obs. This is -# obtained from the intersection of the list of times at which there is -# forecast output and the list of times at which there are obs available. -# Note that if the forecast output is more frequent than the data is -# available, then the forecast values must be accumulated together to -# get values at the times at which the obs are available. This is done -# in another workflow task using the METplus tool PcpCombine. -# -#----------------------------------------------------------------------- -# -obs_retrieve_times_crnt_day=() -for yyyymmddhh in ${fcst_output_times_crnt_day[@]}; do - if [[ ${obs_avail_times_crnt_day[@]} =~ ${yyyymmddhh} ]] ; then - obs_retrieve_times_crnt_day+=(${yyyymmddhh}) - fi -done +# Accumulation period to use when getting obs files. This is simply (a +# properly formatted version of) the obs availability interval. +accum_obs_fmt=$( printf "%d" "${NOHRSC_OBS_AVAIL_INTVL_HRS}" ) + +# The day (in the form YYYMMDD) associated with the current task via the +# task's cycledefs attribute in the ROCOTO xml. +yyyymmdd_task=${PDY} + +# Base directory in which the daily subdirectories containing the grib2 +# obs files will appear after this script is done. We refer to this as +# the "processed" base directory because it contains the files after all +# processing by this script is complete. +basedir_proc=${OBS_DIR} # #----------------------------------------------------------------------- # -# +# Get the list of all the times in the current day at which to retrieve +# obs. This is an array with elements having format "YYYYMMDDHH". # #----------------------------------------------------------------------- # array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" -eval obs_retrieve_times=\( \${${array_name}[@]} \) -echo -echo "QQQQQQQQQQQQQQQQQQQ" -#echo "obs_retrieve_times = |${obs_retrieve_times[@]}|" -echo "obs_retrieve_times =" -echo "|${obs_retrieve_times[@]}|" - -# For testing. -#obs_retrieve_times+=('abcd') -#obs_retrieve_times[4]='abcd' - -err_msg=" -The two methods of obtaining the array of obs retrieve times don't match: - obs_retrieve_times_crnt_day = - (${obs_retrieve_times_crnt_day[@]}) - obs_retrieve_times = - (${obs_retrieve_times[@]})" - -n1=${#obs_retrieve_times_crnt_day[@]} -n2=${#obs_retrieve_times[@]} -if [ ${n1} -ne ${n2} ]; then - print_err_msg_exit "${err_msg}" -fi - -for (( i=0; i<${n1}; i++ )); do - elem1=${obs_retrieve_times_crnt_day[$i]} - elem2=${obs_retrieve_times[$i]} - if [ ${elem1} != ${elem2} ]; then - print_err_msg_exit "${err_msg}" - fi -done - -obs_retrieve_times_crnt_day=($( printf "%s " "${obs_retrieve_times[@]}" )) - -echo -echo "RRRRRRRRRRRRRRRRR" -#echo "obs_retrieve_times_crnt_day = |${obs_retrieve_times_crnt_day[@]}|" -echo "obs_retrieve_times_crnt_day =" -echo "|${obs_retrieve_times_crnt_day[@]}|" - -#exit 1 +eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) # #----------------------------------------------------------------------- # # Obs files will be obtained by extracting them from the relevant 24-hourly # archives. Thus, we need the sequence of archive hours over which to # loop. In the simplest case, this sequence will be "0 24". This will -# be the case if the forecast output times include all hours of the +# be the case if the observation retrieval times include all hours of the # task's day and if none of the obs files for this day already exist on # disk. In other cases, the sequence we loop over will be a subset of # "0 24", e.g. just "0" or just "24". @@ -261,7 +127,7 @@ for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) day_dir_proc="${basedir_proc}" - fn_proc="sfav2_CONUS_6h_${yyyymmddhh}_grid184.grb2" + fn_proc="sfav2_CONUS_${accum_obs_fmt}h_${yyyymmddhh}_grid184.grb2" fp_proc="${day_dir_proc}/${fn_proc}" if [[ -f ${fp_proc} ]]; then num_existing_files=$((num_existing_files+1)) @@ -447,12 +313,11 @@ The times at which obs need to be retrieved are: # or otherwise) only if the time of the current file in the current archive # also exists in the list of obs retrieval times for the current day. if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then - fn_raw="sfav2_CONUS_6h_${yyyymmddhh}_grid184.grb2" + fn_raw="sfav2_CONUS_${accum_obs_fmt}h_${yyyymmddhh}_grid184.grb2" fp_raw="${arcv_dir_raw}/${fn_raw}" day_dir_proc="${basedir_proc}" mkdir -p ${day_dir_proc} fn_proc="${fn_raw}" - #fn_proc="sfav2_CONUS_6h_${yyyymmddhh}_grid184.grb2" fp_proc="${day_dir_proc}/${fn_proc}" ${mv_or_cp} ${fp_raw} ${fp_proc} fi From 31a529c569825da84278afe240c06407e98f4278 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 12:50:44 -0600 Subject: [PATCH 108/260] Add file (exregional_run_met_pb2nc_obs.sh) that should have been part of commit hash cae50b5d8, and remove file (exregional_run_met_pcpcombine.sh) that should NOT have been. --- scripts/exregional_run_met_pb2nc_obs.sh | 81 ++++++++++-------------- scripts/exregional_run_met_pcpcombine.sh | 26 +++----- 2 files changed, 41 insertions(+), 66 deletions(-) diff --git a/scripts/exregional_run_met_pb2nc_obs.sh b/scripts/exregional_run_met_pb2nc_obs.sh index 63d530f370..494ce74a3d 100755 --- a/scripts/exregional_run_met_pb2nc_obs.sh +++ b/scripts/exregional_run_met_pb2nc_obs.sh @@ -74,40 +74,22 @@ to convert NDAS prep buffer observation files to NetCDF format. # #----------------------------------------------------------------------- # -# +# The day (in the form YYYMMDD) associated with the current task via the +# task's cycledefs attribute in the ROCOTO xml. # #----------------------------------------------------------------------- # -# The day (in the form YYYMMDD) associated with the current task via the -# task's cycledefs attribute in the ROCOTO xml. yyyymmdd_task=${PDY} - -# The environment variable OUTPUT_TIMES_ALL set in the ROCOTO XML is a -# scalar string containing all relevant forecast output times (each) in -# the form YYYYMMDDHH) separated by spaces. It isn't an array of strings -# because in ROCOTO, there doesn't seem to be a way to pass a bash array -# from the XML to task's script. To have an array-valued variable to -# work with, here, we create the new variable output_times_all that is -# the array-valued counterpart of OUTPUT_TIMES_ALL. -output_times_all=($(printf "%s" "${OUTPUT_TIMES_ALL}")) - -# List of times (each of the form YYYYMMDDHH) for which there is forecast -# output for the current day. We extract this list from the full list of -# all forecast output times (i.e. from all cycles). -output_times_crnt_day=() -if [[ ${output_times_all[@]} =~ ${yyyymmdd_task} ]]; then - output_times_crnt_day=( $(printf "%s\n" "${output_times_all[@]}" | grep "^${yyyymmdd_task}") ) -fi - -num_output_times_crnt_day=${#output_times_crnt_day[@]} -if [[ ${num_output_times_crnt_day} -eq 0 ]]; then - print_info_msg " -None of the forecast output times fall within the day associated with the -current task (yyyymmdd_task): - yyyymmdd_task = \"${yyyymmdd_task}\" -Thus, there is no need to run ${METPLUSTOOLNAME} on any prepbufr files." - exit -fi +# +#----------------------------------------------------------------------- +# +# Get the list of all the times in the current day at which to retrieve +# obs. This is an array with elements having format "YYYYMMDDHH". +# +#----------------------------------------------------------------------- +# +array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" +eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) # #----------------------------------------------------------------------- # @@ -159,31 +141,32 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${MetplusToolName}_obs" # #----------------------------------------------------------------------- # -# Set the array of forecast hours for which to run the MET/METplus tool. +# Set the array of lead hours (relative to the date associated with this +# task) for which to run the MET/METplus tool. # #----------------------------------------------------------------------- # -FHR_LIST="" +LEADHR_LIST="" num_missing_files=0 -for yyyymmddhh in ${output_times_crnt_day[@]}; do +for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) fn="prepbufr.ndas.${yyyymmddhh}" fp="${OBS_INPUT_DIR}/${fn}" if [[ -f "${fp}" ]]; then print_info_msg " -Found ${OBTYPE} obs file corresponding to forecast output time (yyyymmddhh): +Found ${OBTYPE} obs file corresponding to observation retrieval time +(yyyymmddhh): yyyymmddhh = \"${yyyymmddhh}\" fp = \"${fp}\" " hh_noZero=$((10#${hh})) - #FHR_LIST+=("${yyyymmddhh}") - FHR_LIST="${FHR_LIST},${hh_noZero}" + LEADHR_LIST="${LEADHR_LIST},${hh_noZero}" else num_missing_files=$((num_missing_files+1)) print_info_msg " -${OBTYPE} obs file corresponding to forecast output time (yyyymmddhh) does -not exist on disk: +${OBTYPE} obs file corresponding to observation retrieval time (yyyymmddhh) +does not exist on disk: yyyymmddhh = \"${yyyymmddhh}\" fp = \"${fp}\" Removing this time from the list of times to be processed by ${METPLUSTOOLNAME}. @@ -202,12 +185,12 @@ than the maximum allowed number (num_missing_files_max): num_missing_files_max = ${num_missing_files_max}" fi -# Remove leading comma from FHR_LIST. -FHR_LIST=$( echo "${FHR_LIST}" | $SED "s/^,//g" ) +# Remove leading comma from LEADHR_LIST. +LEADHR_LIST=$( echo "${LEADHR_LIST}" | $SED "s/^,//g" ) print_info_msg "$VERBOSE" "\ -Final (i.e. after filtering for missing files) set of forecast hours +Final (i.e. after filtering for missing obs files) set of lead hours (saved in a scalar string variable) is: - FHR_LIST = \"${FHR_LIST}\" + LEADHR_LIST = \"${LEADHR_LIST}\" " # #----------------------------------------------------------------------- @@ -242,15 +225,15 @@ export LOGDIR # #----------------------------------------------------------------------- # -# Do not run METplus if there isn't at least one valid forecast hour for -# which to run it. +# Do not run METplus if there isn't at least one lead hour for which to +# run it. # #----------------------------------------------------------------------- # -if [ -z "${FHR_LIST}" ]; then +if [ -z "${LEADHR_LIST}" ]; then print_err_msg_exit "\ -The list of forecast hours for which to run METplus is empty: - FHR_LIST = [${FHR_LIST}]" +The list of lead hours for which to run METplus is empty: + LEADHR_LIST = [${LEADHR_LIST}]" fi # #----------------------------------------------------------------------- @@ -314,10 +297,10 @@ settings="\ 'METPLUS_TOOL_NAME': '${METPLUS_TOOL_NAME}' 'metplus_verbosity_level': '${METPLUS_VERBOSITY_LEVEL}' # -# Date and forecast hour information. +# Date and lead hour information. # 'cdate': '$CDATE' - 'fhr_list': '${FHR_LIST}' + 'leadhr_list': '${LEADHR_LIST}' # # Input and output directory/file information. # diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 9495031722..3d4d0cb9fb 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -190,13 +190,11 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then elif [ "${FCST_OR_OBS}" = "OBS" ]; then OBS_INPUT_DIR="${OBS_DIR}" - fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE}) - OBS_INPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) + OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_CCPA_APCP_FN_TEMPLATE} ) OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_obs" - fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE_PCPCOMBINE_OUTPUT}) - OUTPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) + OUTPUT_FN_TEMPLATE=$( eval echo ${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT} ) STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" fi @@ -215,27 +213,22 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then base_dir="${FCST_INPUT_DIR}" fn_template="${FCST_INPUT_FN_TEMPLATE}" num_missing_files_max="${NUM_MISSING_FCST_FILES_MAX}" - subintvl_accum_hrs="${FCST_OUTPUT_INTVL_HRS}" elif [ "${FCST_OR_OBS}" = "OBS" ]; then base_dir="${OBS_INPUT_DIR}" fn_template="${OBS_INPUT_FN_TEMPLATE}" num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" - subintvl_accum_hrs="${OBS_AVAIL_INTVL_HRS}" fi -input_accum_hh=$(printf "%02d" ${subintvl_accum_hrs}) -vx_output_intvl_hrs="$((10#${ACCUM_HH}))" -set_vx_hrs_list \ - yyyymmddhh_init="${CDATE}" \ +set_vx_fhr_list \ + cdate="${CDATE}" \ fcst_len_hrs="${FCST_LEN_HRS}" \ - vx_output_intvl_hrs="${vx_output_intvl_hrs}" \ - field_is_cumul="TRUE" \ - check_subintvl_files="TRUE" \ - subintvl_accum_hrs="${subintvl_accum_hrs}" \ + field="$VAR" \ + accum_hh="${ACCUM_HH}" \ base_dir="${base_dir}" \ fn_template="${fn_template}" \ + check_accum_contrib_files="TRUE" \ num_missing_files_max="${num_missing_files_max}" \ - outvarname_hrs_list="FHR_LIST" + outvarname_fhr_list="FHR_LIST" # #----------------------------------------------------------------------- # @@ -365,8 +358,7 @@ settings="\ 'fieldname_in_met_filedir_names': '${FIELDNAME_IN_MET_FILEDIR_NAMES}' 'obtype': '${OBTYPE}' 'FCST_OR_OBS': '${FCST_OR_OBS}' - 'input_accum_hh': '${input_accum_hh}' - 'output_accum_hh': '${ACCUM_HH:-}' + 'accum_hh': '${ACCUM_HH:-}' 'accum_no_pad': '${ACCUM_NO_PAD:-}' 'metplus_templates_dir': '${METPLUS_CONF:-}' 'input_field_group': '${VAR:-}' From f77d31484dd7eab232250ec8dd7f4b172bb67e60 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 25 Sep 2024 12:56:40 -0600 Subject: [PATCH 109/260] Commit change that should have been part of commit hash fffdbd3. --- parm/wflow/verify_pre.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 80b70f68d3..80831f6f29 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -31,10 +31,6 @@ task_get_obs_ccpa: <<: *default_vars OBS_DIR: '&CCPA_OBS_DIR;' OBTYPE: 'CCPA' - OUTPUT_TIMES_ALL: &output_times_all_cumul - '{%- for i in range(0, (workflow.OUTPUT_TIMES_ALL_CYCLES_CUMUL|length)) %} - {{- " " ~ workflow.OUTPUT_TIMES_ALL_CYCLES_CUMUL[i] }} - {%- endfor %}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" From 5725b5928eae60ccbfa143e8fd7c15883bf64a84 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Thu, 26 Sep 2024 18:38:59 +0000 Subject: [PATCH 110/260] - Update log file names for some met tools so they have cycledate to prevent weird combining that makes log files hard to read - Update Airnow staged data case for HPSS file settings - Update comment --- scripts/exregional_check_post_output.sh | 1 + scripts/exregional_run_met_genensprod_or_ensemblestat.sh | 2 +- scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh | 2 +- scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh | 2 +- .../verification/config.MET_verification_smoke_only_vx.yaml | 2 ++ 5 files changed, 6 insertions(+), 3 deletions(-) diff --git a/scripts/exregional_check_post_output.sh b/scripts/exregional_check_post_output.sh index 95f4c68711..e32651d0b5 100755 --- a/scripts/exregional_check_post_output.sh +++ b/scripts/exregional_check_post_output.sh @@ -11,6 +11,7 @@ # CDATE # ENSMEM_INDX # GLOBAL_VAR_DEFNS_FP +# METPLUS_ROOT (used by ush/set_vx_fhr_list.py) # VAR # # Experiment variables diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 4b76e62faa..802103559c 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -283,7 +283,7 @@ fi # First, set the base file names. # metplus_config_tmpl_bn="${MetplusToolName}" -metplus_config_bn="${MetplusToolName}_${FIELDNAME_IN_MET_FILEDIR_NAMES}" +metplus_config_bn="${MetplusToolName}_${FIELDNAME_IN_MET_FILEDIR_NAMES}_${CDATE}" metplus_log_bn="${metplus_config_bn}" # # Add prefixes and suffixes (extensions) to the base file names. diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index e5103b230c..cafe3c45b1 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -236,7 +236,7 @@ fi # First, set the base file names. # metplus_config_tmpl_bn="${MetplusToolName}_ensmean" -metplus_config_bn="${MetplusToolName}_ensmean_${FIELDNAME_IN_MET_FILEDIR_NAMES}" +metplus_config_bn="${MetplusToolName}_ensmean_${FIELDNAME_IN_MET_FILEDIR_NAMES}_${CDATE}" metplus_log_bn="${metplus_config_bn}" # # Add prefixes and suffixes (extensions) to the base file names. diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 1c717b0032..9d953b7003 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -236,7 +236,7 @@ fi # First, set the base file names. # metplus_config_tmpl_bn="${MetplusToolName}_ensprob" -metplus_config_bn="${MetplusToolName}_ensprob_${FIELDNAME_IN_MET_FILEDIR_NAMES}" +metplus_config_bn="${MetplusToolName}_ensprob_${FIELDNAME_IN_MET_FILEDIR_NAMES}_${CDATE}" metplus_log_bn="${metplus_config_bn}" # # Add prefixes and suffixes (extensions) to the base file names. diff --git a/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml b/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml index 3920697e90..e753f809d1 100644 --- a/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml @@ -23,6 +23,8 @@ verification: VX_FCST_INPUT_BASEDIR: /scratch2/BMC/fv3lam/kavulich/smoke_verif/smoke_vx/fcst FCST_SUBDIR_TEMPLATE: '{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}' FCST_FN_TEMPLATE: 'rrfs.t{init?fmt=%H?shift=-${time_lag}}z.prslev.f{lead?fmt=%HHH?shift=${time_lag}}.conus_3km.grib2' + OBS_AIRNOW_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/HourlyAQObs_{valid?fmt=%Y%m%d%H}.dat' + AIRNOW_INPUT_FORMAT: airnowhourlyaqobs platform: CCPA_OBS_DIR: /scratch2/BMC/fv3lam/kavulich/smoke_verif/rebase_june/staged_case/CCPA_obs MRMS_OBS_DIR: /scratch2/BMC/fv3lam/kavulich/smoke_verif/rebase_june/staged_case/MRMS_obs From 83508110fa0e5f05973c15fa23e6cec9f2f39544 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Thu, 26 Sep 2024 18:39:51 +0000 Subject: [PATCH 111/260] If set_vx_fhr_list.py is going to fail anyway, print details about failure as script runs for easier debugging --- ush/set_vx_fhr_list.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/set_vx_fhr_list.py b/ush/set_vx_fhr_list.py index c223c598e0..5a29d7269e 100644 --- a/ush/set_vx_fhr_list.py +++ b/ush/set_vx_fhr_list.py @@ -96,7 +96,7 @@ def set_vx_fhr_list(cdate, fcst_len, field, accum_hh, time_lag, base_dir, filena else: skip_this_fhr = True num_missing_files += 1 - if verbose: + if verbose or num_missing_files > num_missing_files_max: print(f"The file (fp) for the current forecast hour (fhr; relative to the cycle date cdate) is missing:\n fhr = \"{fhr}\"\n cdate = \"{cdate}\"\n fp = \"{fp}\"\nExcluding the current forecast hour from the list of hours passed to the METplus configuration file.") break From 99a87f9442cd3a57a8f92a1d70beb54b8d93b37b Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 26 Sep 2024 12:51:30 -0600 Subject: [PATCH 112/260] Refactor the way the vx lead hours (i.e. the hours for which vx will be run) are calculated (and corresponding files checked for), including renaming of the file set_vx_fhr_list.sh to set_leadhrs.sh. --- parm/metplus/EnsembleStat.conf | 2 +- parm/metplus/GenEnsProd.conf | 2 +- parm/metplus/GridStat_ensmean.conf | 2 +- parm/metplus/GridStat_ensprob.conf | 2 +- parm/metplus/GridStat_or_PointStat.conf | 2 +- parm/metplus/PcpCombine.conf | 2 +- parm/metplus/PointStat_ensmean.conf | 2 +- parm/metplus/PointStat_ensprob.conf | 2 +- scripts/exregional_check_post_output.sh | 31 +- ...onal_run_met_genensprod_or_ensemblestat.sh | 49 +-- ...gional_run_met_gridstat_or_pointstat_vx.sh | 53 +-- ...un_met_gridstat_or_pointstat_vx_ensmean.sh | 51 +-- ...un_met_gridstat_or_pointstat_vx_ensprob.sh | 47 +-- scripts/exregional_run_met_pb2nc_obs.sh | 1 - scripts/exregional_run_met_pcpcombine.sh | 86 +++-- ush/{set_vx_fhr_list.sh => set_leadhrs.sh} | 301 ++++++------------ 16 files changed, 301 insertions(+), 334 deletions(-) rename ush/{set_vx_fhr_list.sh => set_leadhrs.sh} (50%) diff --git a/parm/metplus/EnsembleStat.conf b/parm/metplus/EnsembleStat.conf index 2caeda1521..3759d5d8a1 100644 --- a/parm/metplus/EnsembleStat.conf +++ b/parm/metplus/EnsembleStat.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/parm/metplus/GenEnsProd.conf b/parm/metplus/GenEnsProd.conf index 6c47cedb0d..17005ecd1a 100644 --- a/parm/metplus/GenEnsProd.conf +++ b/parm/metplus/GenEnsProd.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/parm/metplus/GridStat_ensmean.conf b/parm/metplus/GridStat_ensmean.conf index 6bbc20e3f8..0cfaa707bf 100644 --- a/parm/metplus/GridStat_ensmean.conf +++ b/parm/metplus/GridStat_ensmean.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/parm/metplus/GridStat_ensprob.conf b/parm/metplus/GridStat_ensprob.conf index a43b8ed340..6c34eb6ba0 100644 --- a/parm/metplus/GridStat_ensprob.conf +++ b/parm/metplus/GridStat_ensprob.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/parm/metplus/GridStat_or_PointStat.conf b/parm/metplus/GridStat_or_PointStat.conf index 7bd0039ab5..865f1c8d14 100644 --- a/parm/metplus/GridStat_or_PointStat.conf +++ b/parm/metplus/GridStat_or_PointStat.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/parm/metplus/PcpCombine.conf b/parm/metplus/PcpCombine.conf index 5bdd09c761..de99871bed 100644 --- a/parm/metplus/PcpCombine.conf +++ b/parm/metplus/PcpCombine.conf @@ -35,7 +35,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/parm/metplus/PointStat_ensmean.conf b/parm/metplus/PointStat_ensmean.conf index b16a481dbd..8637a7501d 100644 --- a/parm/metplus/PointStat_ensmean.conf +++ b/parm/metplus/PointStat_ensmean.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/parm/metplus/PointStat_ensprob.conf b/parm/metplus/PointStat_ensprob.conf index 84b9f3954d..885ba121be 100644 --- a/parm/metplus/PointStat_ensprob.conf +++ b/parm/metplus/PointStat_ensprob.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/scripts/exregional_check_post_output.sh b/scripts/exregional_check_post_output.sh index 320311cc94..433aba1e4e 100755 --- a/scripts/exregional_check_post_output.sh +++ b/scripts/exregional_check_post_output.sh @@ -56,7 +56,7 @@ done # #----------------------------------------------------------------------- # -. $USHdir/set_vx_fhr_list.sh +. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -104,38 +104,33 @@ user-staged. #----------------------------------------------------------------------- # i="0" -if [ $(boolify "${DO_ENSEMBLE}") = "TRUE" ]; then +if [[ $(boolify "${DO_ENSEMBLE}") == "TRUE" ]]; then i=$( bc -l <<< "${ENSMEM_INDX}-1" ) fi time_lag=$( bc -l <<< "${ENS_TIME_LAG_HRS[$i]}*${SECS_PER_HOUR}" ) # #----------------------------------------------------------------------- # -# Get the list of forecast hours for which there is a post-processed -# output file. Note that: -# -# 1) CDATE (in YYYYMMDDHH format) is already available via the call to -# the job_preamble.sh script in the j-job of this ex-script. -# 2) VAR is set to "APCP" and ACCUM_HH is set to "01" because we assume -# the output files are hourly, so these settings will result in the -# function set_vx_fhr_list checking for existence of hourly post output -# files. +# Check to ensure that all the expected post-processed forecast output +# files are present on disk. This is done by the set_leadhrs function +# below. Note that CDATE (in YYYYMMDDHH format) is already available via +# the call to the job_preamble.sh script in the j-job of this ex-script. # #----------------------------------------------------------------------- # ensmem_indx=$(printf "%0${VX_NDIGITS_ENSMEM_NAMES}d" $(( 10#${ENSMEM_INDX}))) ensmem_name="mem${ensmem_indx}" FCST_INPUT_FN_TEMPLATE=$( eval echo ${FCST_SUBDIR_TEMPLATE:+${FCST_SUBDIR_TEMPLATE}/}${FCST_FN_TEMPLATE} ) -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ + +set_leadhrs \ + yyyymmddhh_init="${CDATE}" \ + lhr_min="0" \ + lhr_max="${FCST_LEN_HRS}" \ + lhr_intvl="${FCST_OUTPUT_INTVL_HRS}" \ base_dir="${VX_FCST_INPUT_BASEDIR}" \ fn_template="${FCST_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ num_missing_files_max="${NUM_MISSING_FCST_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" + outvarname_lhrs_list="FHR_LIST" # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 583178d3ad..73d98754b4 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -22,7 +22,7 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh +. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -220,23 +220,34 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" # #----------------------------------------------------------------------- # +case "$OBTYPE" in + "CCPA"|"NOHRSC") + vx_intvl="$((10#${ACCUM_HH}))" + vx_hr_start="${vx_intvl}" + ;; + *) + vx_intvl="$((${FCST_OUTPUT_INTVL_HRS}))" + vx_hr_start="0" + ;; +esac +vx_hr_end="${FCST_LEN_HRS}" + if [ "${MetplusToolName}" = "GenEnsProd" ]; then - set_vx_fhr_list_no_missing \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - outvarname_fhr_list_no_missing="FHR_LIST" + set_leadhrs_no_missing \ + lhr_min="${vx_hr_start}" \ + lhr_max="${vx_hr_end}" \ + lhr_intvl="${vx_intvl}" \ + outvarname_lhrs_list_no_missing="VX_LEADHR_LIST" elif [ "${MetplusToolName}" = "EnsembleStat" ]; then - set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ + set_leadhrs \ + yyyymmddhh_init="${CDATE}" \ + lhr_min="${vx_hr_start}" \ + lhr_max="${vx_hr_end}" \ + lhr_intvl="${vx_intvl}" \ base_dir="${OBS_INPUT_DIR}" \ fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" + outvarname_lhrs_list="VX_LEADHR_LIST" fi # #----------------------------------------------------------------------- @@ -271,15 +282,15 @@ export LOGDIR # #----------------------------------------------------------------------- # -# Do not run METplus if there isn't at least one valid forecast hour for -# which to run it. +# Do not run METplus if there isn't at least one lead hour for which to +# run it. # #----------------------------------------------------------------------- # -if [ -z "${FHR_LIST}" ]; then +if [ -z "${VX_LEADHR_LIST}" ]; then print_err_msg_exit "\ -The list of forecast hours for which to run METplus is empty: - FHR_LIST = [${FHR_LIST}]" +The list of lead hours for which to run METplus is empty: + VX_LEADHR_LIST = [${VX_LEADHR_LIST}]" fi # #----------------------------------------------------------------------- @@ -342,7 +353,7 @@ settings="\ # Date and forecast hour information. # 'cdate': '$CDATE' -'fhr_list': '${FHR_LIST}' +'vx_leadhr_list': '${VX_LEADHR_LIST}' # # Input and output directory/file information. # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index dd3fcd495b..263d22053f 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -22,7 +22,7 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh +. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -95,10 +95,12 @@ FIELDNAME_IN_FCST_INPUT="" FIELDNAME_IN_MET_OUTPUT="" FIELDNAME_IN_MET_FILEDIR_NAMES="" +# Note that ACCUM_HH will not be defined for the REFC, RETOP, ADPSFC, and +# ADPUPA field groups. set_vx_params \ obtype="${OBTYPE}" \ field="$VAR" \ - accum_hh="${ACCUM_HH}" \ + accum_hh="${ACCUM_HH:-}" \ outvarname_grid_or_point="grid_or_point" \ outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ outvarname_fieldname_in_fcst_input="FIELDNAME_IN_FCST_INPUT" \ @@ -173,8 +175,8 @@ if [ "${grid_or_point}" = "grid" ]; then FCST_INPUT_FN_TEMPLATE="${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "ASNOW"*) - OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" + OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}/metprd/PcpCombine_fcst" FCST_INPUT_FN_TEMPLATE="${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; @@ -209,23 +211,34 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" # #----------------------------------------------------------------------- # -# Set the array of forecast hours for which to run the MET/METplus tool. -# This is done by starting with the full list of forecast hours for which -# there is forecast output and then removing from that list any forecast -# hours for which there is no corresponding observation data. +# Set the lead hours for which to run the MET/METplus tool. This is done +# by starting with the full list of lead hours for which we expect to +# find forecast output and then removing from that list any hours for +# which there is no corresponding observation data. # #----------------------------------------------------------------------- # -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ +case "$OBTYPE" in + "CCPA"|"NOHRSC") + vx_intvl="$((10#${ACCUM_HH}))" + vx_hr_start="${vx_intvl}" + ;; + *) + vx_intvl="$((${FCST_OUTPUT_INTVL_HRS}))" + vx_hr_start="0" + ;; +esac +vx_hr_end="${FCST_LEN_HRS}" + +set_leadhrs \ + yyyymmddhh_init="${CDATE}" \ + lhr_min="${vx_hr_start}" \ + lhr_max="${vx_hr_end}" \ + lhr_intvl="${vx_intvl}" \ base_dir="${OBS_INPUT_DIR}" \ fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" + outvarname_lhrs_list="VX_LEADHR_LIST" # #----------------------------------------------------------------------- # @@ -259,15 +272,15 @@ export LOGDIR # #----------------------------------------------------------------------- # -# Do not run METplus if there isn't at least one valid forecast hour for -# which to run it. +# Do not run METplus if there isn't at least one lead hour for which to +# run it. # #----------------------------------------------------------------------- # -if [ -z "${FHR_LIST}" ]; then +if [ -z "${VX_LEADHR_LIST}" ]; then print_err_msg_exit "\ The list of forecast hours for which to run METplus is empty: - FHR_LIST = [${FHR_LIST}]" + VX_LEADHR_LIST = [${VX_LEADHR_LIST}]" fi # #----------------------------------------------------------------------- @@ -330,7 +343,7 @@ settings="\ # Date and forecast hour information. # 'cdate': '$CDATE' -'fhr_list': '${FHR_LIST}' +'vx_leadhr_list': '${VX_LEADHR_LIST}' # # Input and output directory/file information. # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index 5fdafb20d1..5ad0560f28 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -22,7 +22,7 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh +. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -127,8 +127,8 @@ if [ "${grid_or_point}" = "grid" ]; then OBS_INPUT_FN_TEMPLATE="${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "ASNOW"*) - OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" + OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "REFC") OBS_INPUT_DIR="${OBS_DIR}" @@ -157,23 +157,34 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}_ensmean" # #----------------------------------------------------------------------- # -# Set the array of forecast hours for which to run the MET/METplus tool. -# This is done by starting with the full list of forecast hours for which -# there is forecast output and then removing from that list any forecast -# hours for which there is no corresponding observation data. +# Set the lead hours for which to run the MET/METplus tool. This is done +# by starting with the full list of lead hours for which we expect to +# find forecast output and then removing from that list any hours for +# which there is no corresponding observation data. # #----------------------------------------------------------------------- # -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ +case "$OBTYPE" in + "CCPA"|"NOHRSC") + vx_intvl="$((10#${ACCUM_HH}))" + vx_hr_start="${vx_intvl}" + ;; + *) + vx_intvl="$((${FCST_OUTPUT_INTVL_HRS}))" + vx_hr_start="0" + ;; +esac +vx_hr_end="${FCST_LEN_HRS}" + +set_leadhrs \ + yyyymmddhh_init="${CDATE}" \ + lhr_min="${vx_hr_start}" \ + lhr_max="${vx_hr_end}" \ + lhr_intvl="${vx_intvl}" \ base_dir="${OBS_INPUT_DIR}" \ fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" + outvarname_lhrs_list="VX_LEADHR_LIST" # #----------------------------------------------------------------------- # @@ -216,15 +227,15 @@ export LOGDIR # #----------------------------------------------------------------------- # -# Do not run METplus if there isn't at least one valid forecast hour for -# which to run it. +# Do not run METplus if there isn't at least one lead hour for which to +# run it. # #----------------------------------------------------------------------- # -if [ -z "${FHR_LIST}" ]; then +if [ -z "${VX_LEADHR_LIST}" ]; then print_err_msg_exit "\ -The list of forecast hours for which to run METplus is empty: - FHR_LIST = [${FHR_LIST}]" +The list of lead hours for which to run METplus is empty: + VX_LEADHR_LIST = [${VX_LEADHR_LIST}]" fi # #----------------------------------------------------------------------- @@ -287,7 +298,7 @@ settings="\ # Date and forecast hour information. # 'cdate': '$CDATE' -'fhr_list': '${FHR_LIST}' +'vx_leadhr_list': '${VX_LEADHR_LIST}' # # Input and output directory/file information. # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 201e67ccf1..9a8c35d1cb 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -22,7 +22,7 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh +. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -156,23 +156,34 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}_ensprob" # #----------------------------------------------------------------------- # -# Set the array of forecast hours for which to run the MET/METplus tool. -# This is done by starting with the full list of forecast hours for which -# there is forecast output and then removing from that list any forecast -# hours for which there is no corresponding observation data. +# Set the lead hours for which to run the MET/METplus tool. This is done +# by starting with the full list of lead hours for which we expect to +# find forecast output and then removing from that list any hours for +# which there is no corresponding observation data. # #----------------------------------------------------------------------- # -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ +case "$OBTYPE" in + "CCPA"|"NOHRSC") + vx_intvl="$((10#${ACCUM_HH}))" + vx_hr_start="${vx_intvl}" + ;; + *) + vx_intvl="$((${FCST_OUTPUT_INTVL_HRS}))" + vx_hr_start="0" + ;; +esac +vx_hr_end="${FCST_LEN_HRS}" + +set_leadhrs \ + yyyymmddhh_init="${CDATE}" \ + lhr_min="${vx_hr_start}" \ + lhr_max="${vx_hr_end}" \ + lhr_intvl="${vx_intvl}" \ base_dir="${OBS_INPUT_DIR}" \ fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" + outvarname_lhrs_list="VX_LEADHR_LIST" # #----------------------------------------------------------------------- # @@ -215,15 +226,15 @@ export LOGDIR # #----------------------------------------------------------------------- # -# Do not run METplus if there isn't at least one valid forecast hour for -# which to run it. +# Do not run METplus if there isn't at least one lead hour for which to +# run it. # #----------------------------------------------------------------------- # -if [ -z "${FHR_LIST}" ]; then +if [ -z "${VX_LEADHR_LIST}" ]; then print_err_msg_exit "\ -The list of forecast hours for which to run METplus is empty: - FHR_LIST = [${FHR_LIST}]" +The list of lead hours for which to run METplus is empty: + VX_LEADHR_LIST = [${VX_LEADHR_LIST}]" fi # #----------------------------------------------------------------------- @@ -286,7 +297,7 @@ settings="\ # Date and forecast hour information. # 'cdate': '$CDATE' -'fhr_list': '${FHR_LIST}' +'vx_leadhr_list': '${VX_LEADHR_LIST}' # # Input and output directory/file information. # diff --git a/scripts/exregional_run_met_pb2nc_obs.sh b/scripts/exregional_run_met_pb2nc_obs.sh index 494ce74a3d..fbf3ec1689 100755 --- a/scripts/exregional_run_met_pb2nc_obs.sh +++ b/scripts/exregional_run_met_pb2nc_obs.sh @@ -21,7 +21,6 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 3d4d0cb9fb..97d156aa62 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -22,7 +22,7 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh +. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -190,45 +190,76 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then elif [ "${FCST_OR_OBS}" = "OBS" ]; then OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_CCPA_APCP_FN_TEMPLATE} ) + fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE}) + OBS_INPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_obs" - OUTPUT_FN_TEMPLATE=$( eval echo ${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT} ) + fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE_PCPCOMBINE_OUTPUT}) + OUTPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" fi # #----------------------------------------------------------------------- # -# Set the array of forecast hours for which to run the MET/METplus tool. -# This is done by starting with the full list of forecast hours for which -# there is forecast output and then removing from that list any forecast -# hours for which there is no corresponding observation data (if combining -# observed APCP) or forecast data (if combining forecast APCP). +# Set the array of lead hours for which to run the MET/METplus tool. +# +#----------------------------------------------------------------------- +# +vx_intvl="$((10#${ACCUM_HH}))" +set_leadhrs_no_missing \ + lhr_min="${vx_intvl}" \ + lhr_max="${FCST_LEN_HRS}" \ + lhr_intvl="${vx_intvl}" \ + outvarname_lhrs_list_no_missing="VX_LEADHR_LIST" +# +#----------------------------------------------------------------------- +# +# Check for the presence of files (either from observations or forecasts) +# needed to create required accumulation given by ACCUM_HH. # #----------------------------------------------------------------------- # if [ "${FCST_OR_OBS}" = "FCST" ]; then base_dir="${FCST_INPUT_DIR}" fn_template="${FCST_INPUT_FN_TEMPLATE}" - num_missing_files_max="${NUM_MISSING_FCST_FILES_MAX}" + subintvl="${FCST_OUTPUT_INTVL_HRS}" elif [ "${FCST_OR_OBS}" = "OBS" ]; then base_dir="${OBS_INPUT_DIR}" fn_template="${OBS_INPUT_FN_TEMPLATE}" - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" + subintvl="${OBS_AVAIL_INTVL_HRS}" fi +num_missing_files_max="0" +input_accum_hh=$(printf "%02d" ${subintvl}) +# +# Convert the list of hours at which the PcpCombine tool will be run to +# an array. This represents the hours at which each accumulation period +# ends. Then use it to check the presence of all files requied to build +# the required accumulations from the sub-accumulations. +# +subintvl_end_hrs=($( echo ${VX_LEADHR_LIST} | $SED "s/,//g" )) +for hr_end in ${subintvl_end_hrs[@]}; do + hr_start=$((hr_end - vx_intvl + subintvl)) + print_info_msg " +Checking for the presence of files that will contribute to the ${vx_intvl}-hour +accumulation ending at lead hour ${hr_end} (relative to ${CDATE})... +" + set_leadhrs \ + yyyymmddhh_init="${CDATE}" \ + lhr_min="${hr_start}" \ + lhr_max="${hr_end}" \ + lhr_intvl="${subintvl}" \ + base_dir="${base_dir}" \ + fn_template="${fn_template}" \ + num_missing_files_max="${num_missing_files_max}" \ + outvarname_lhrs_list="tmp" +done -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - base_dir="${base_dir}" \ - fn_template="${fn_template}" \ - check_accum_contrib_files="TRUE" \ - num_missing_files_max="${num_missing_files_max}" \ - outvarname_fhr_list="FHR_LIST" +print_info_msg " +${MetplusToolName} will be run for the following lead hours (relative to ${CDATE}): + VX_LEADHR_LIST = ${VX_LEADHR_LIST} +" # #----------------------------------------------------------------------- # @@ -262,15 +293,15 @@ export LOGDIR # #----------------------------------------------------------------------- # -# Do not run METplus if there isn't at least one valid forecast hour for -# which to run it. +# Do not run METplus if there isn't at least one lead hour for which to +# run it. # #----------------------------------------------------------------------- # -if [ -z "${FHR_LIST}" ]; then +if [ -z "${VX_LEADHR_LIST}" ]; then print_err_msg_exit "\ -The list of forecast hours for which to run METplus is empty: - FHR_LIST = [${FHR_LIST}]" +The list of lead hours for which to run METplus is empty: + VX_LEADHR_LIST = [${VX_LEADHR_LIST}]" fi # #----------------------------------------------------------------------- @@ -330,7 +361,7 @@ settings="\ # Date and forecast hour information. # 'cdate': '$CDATE' - 'fhr_list': '${FHR_LIST}' + 'vx_leadhr_list': '${VX_LEADHR_LIST}' # # Input and output directory/file information. # @@ -358,7 +389,8 @@ settings="\ 'fieldname_in_met_filedir_names': '${FIELDNAME_IN_MET_FILEDIR_NAMES}' 'obtype': '${OBTYPE}' 'FCST_OR_OBS': '${FCST_OR_OBS}' - 'accum_hh': '${ACCUM_HH:-}' + 'input_accum_hh': '${input_accum_hh}' + 'output_accum_hh': '${ACCUM_HH:-}' 'accum_no_pad': '${ACCUM_NO_PAD:-}' 'metplus_templates_dir': '${METPLUS_CONF:-}' 'input_field_group': '${VAR:-}' diff --git a/ush/set_vx_fhr_list.sh b/ush/set_leadhrs.sh similarity index 50% rename from ush/set_vx_fhr_list.sh rename to ush/set_leadhrs.sh index 8101e927e5..aa3b4b338f 100644 --- a/ush/set_vx_fhr_list.sh +++ b/ush/set_leadhrs.sh @@ -1,20 +1,21 @@ # #----------------------------------------------------------------------- # -# This file defines functions used to generate sets of forecast hours for +# This file defines functions used to generate sets of lead hours for # which verification will be performed. # #----------------------------------------------------------------------- # -function set_vx_fhr_list_no_missing() { +function set_leadhrs_no_missing() { # #----------------------------------------------------------------------- # -# This function sets the forecast hours for which verification will be -# performed under the assumption that that the data file (which may be -# a forecast output file or an observation file) for each hour is available -# (i.e. that there are no missing files). +# This function sets the lead hours (relative to some unspecified initial/ +# reference time) for which verification will be performed under the +# assumption that the data file (which may be a forecast output file or +# an observation file) for each hour is available (i.e. it assumes that +# there are no missing files). # #----------------------------------------------------------------------- # @@ -58,10 +59,10 @@ function set_vx_fhr_list_no_missing() { #----------------------------------------------------------------------- # local valid_args=( \ - "fcst_len_hrs" \ - "field" \ - "accum_hh" \ - "outvarname_fhr_list_no_missing" \ + "lhr_min" \ + "lhr_max" \ + "lhr_intvl" \ + "outvarname_lhrs_list_no_missing" \ ) process_args valid_args "$@" # @@ -81,69 +82,21 @@ function set_vx_fhr_list_no_missing() { # #----------------------------------------------------------------------- # - local fhr_array \ - fhr_list \ - fhr_int \ - fhr_min \ - fhr_max -# -#----------------------------------------------------------------------- -# -# Create the array of forecast hours. -# -#----------------------------------------------------------------------- -# - case "${field}" in - "APCP") - fhr_min="${accum_hh}" - fhr_int="${accum_hh}" - ;; - "ASNOW") - if [ "${accum_hh}" = "24" ]; then - fhr_min="24" - fhr_int="12" - else - fhr_min="${accum_hh}" - fhr_int="${accum_hh}" - fi - ;; - "REFC") - fhr_min="00" - fhr_int="01" - ;; - "RETOP") - fhr_min="00" - fhr_int="01" - ;; - "ADPSFC") - fhr_min="00" - fhr_int="01" - ;; - "ADPUPA") - fhr_min="00" - fhr_int="06" - ;; - *) - print_err_msg_exit "\ -A method for setting verification parameters has not been specified for -this field (field): - field = \"${field}\"" - ;; - esac - fhr_max="${fcst_len_hrs}" - - fhr_array=($( seq ${fhr_min} ${fhr_int} ${fhr_max} )) - - # Express the forecast hour array as a (scalar) string containing a comma - # (and space) separated list of the elements of fhr_array. - fhr_list=$( printf "%s, " "${fhr_array[@]}" ) - fhr_list=$( echo "${fhr_list}" | $SED "s/, $//g" ) + local lhrs_array \ + lhrs_list +# +#----------------------------------------------------------------------- +# +# Create the array of lead hours. +# +#----------------------------------------------------------------------- +# + lhrs_array=($( seq ${lhr_min} ${lhr_intvl} ${lhr_max} )) - print_info_msg "$VERBOSE" "\ -Initial (i.e. before filtering for missing files) set of forecast hours -(saved in a scalar string variable) is: - fhr_list = \"${fhr_list}\" -" + # Express the array of lead hours as a (scalar) string containing a comma + # (and space) separated list of the elements of lhrs_array. + lhrs_list=$( printf "%s, " "${lhrs_array[@]}" ) + lhrs_list=$( echo "${lhrs_list}" | $SED "s/, $//g" ) # #----------------------------------------------------------------------- # @@ -151,8 +104,8 @@ Initial (i.e. before filtering for missing files) set of forecast hours # #----------------------------------------------------------------------- # - if [ ! -z "${outvarname_fhr_list_no_missing}" ]; then - printf -v ${outvarname_fhr_list_no_missing} "%s" "${fhr_list}" + if [ ! -z "${outvarname_lhrs_list_no_missing}" ]; then + printf -v ${outvarname_lhrs_list_no_missing} "%s" "${lhrs_list}" fi # #----------------------------------------------------------------------- @@ -166,22 +119,18 @@ Initial (i.e. before filtering for missing files) set of forecast hours } - # #----------------------------------------------------------------------- # -# This function generates a list of forecast hours such that for each -# such hour, there exists a corresponding data file with a name of the -# form specified by the template fn_template. Depending on fn_template, -# this file may contain forecast or observation data. This function -# generates this forecast hour list by first generating a set of hours -# under the assumption that there is a corresponding data file for each -# hour and then removing from that list any hour for which there is no -# data file. +# This function generates a list of lead hours (relative to an initial or +# reference time yyyymmddhh_init) such that for each such hour, there +# exists a corresponding data file with a name of the form specified by +# the template fn_template. Depending on fn_template, this file may +# contain forecast or observation data. # #----------------------------------------------------------------------- # -function set_vx_fhr_list() { +function set_leadhrs() { # #----------------------------------------------------------------------- # @@ -221,15 +170,14 @@ function set_vx_fhr_list() { #----------------------------------------------------------------------- # local valid_args=( \ - "cdate" \ - "fcst_len_hrs" \ - "field" \ - "accum_hh" \ + "yyyymmddhh_init" \ + "lhr_min" \ + "lhr_max" \ + "lhr_intvl" \ "base_dir" \ "fn_template" \ - "check_accum_contrib_files" \ "num_missing_files_max" \ - "outvarname_fhr_list" \ + "outvarname_lhrs_list" \ ) process_args valid_args "$@" # @@ -251,155 +199,102 @@ function set_vx_fhr_list() { # local crnt_tmpl \ crnt_tmpl_esc \ - fhr \ - fhr_array \ - fhr_list \ fn \ fp \ i \ - num_fcst_hrs \ + lhr \ + lhrs_array \ + lhrs_list \ + num_hrs \ num_missing_files \ - regex_search_tmpl \ remainder \ - skip_this_fhr + skip_this_hour # #----------------------------------------------------------------------- # -# For the specified field, generate the set of forecast hours at which +# For the specified field, generate the set of lead hours at which # verification will be performed under the assumption that for each such -# hour, the corresponding forecast and/or observation files exists. Thus, -# this set of forecast hours is an initial guess for the hours at which -# vx will be performed. +# hour, the corresponding or observation file exists. Thus, this set is +# an initial guess for the lead hours at which vx will be performed. # #----------------------------------------------------------------------- # - set_vx_fhr_list_no_missing \ - fcst_len_hrs="${fcst_len_hrs}" \ - field="${field}" \ - accum_hh="${accum_hh}" \ - outvarname_fhr_list_no_missing="fhr_list_no_missing" + set_leadhrs_no_missing \ + lhr_min="${lhr_min}" \ + lhr_max="${lhr_max}" \ + lhr_intvl="${lhr_intvl}" \ + outvarname_lhrs_list_no_missing="lhrs_list_no_missing" - # For convenience, save the scalar variable fhr_list_no_missing to a bash - # array. - fhr_array=$( printf "%s" "${fhr_list_no_missing}" | $SED "s/,//g" ) - fhr_array=( ${fhr_array} ) + # For convenience, save the scalar variable lhrs_list_no_missing to a + # bash array. + lhrs_array=($( printf "%s" "${lhrs_list_no_missing}" | $SED "s/,//g" )) print_info_msg "$VERBOSE" "\ -Initial (i.e. before filtering for missing files) set of forecast hours -is: - fhr_array = ( $( printf "\"%s\" " "${fhr_array[@]}" )) +Initial (i.e. before filtering for missing files) set of lead hours +(relative to ${yyyymmddhh_init}) is: + lhrs_array = ( $( printf "\"%s\" " "${lhrs_array[@]}" )) " # #----------------------------------------------------------------------- # -# Loop through all forecast hours. For each one for which a corresponding -# file exists, add the forecast hour to fhr_list. fhr_list will be a -# scalar containing a comma-separated list of forecast hours for which -# corresponding files exist. Also, use the variable num_missing_files -# to keep track of the number of files that are missing. +# Loop through the array of lead hours generated above and construct the +# variable lhrs_list that will be scalar (string) containing a comma- +# separated list of hours for which corresponding forecast or observation +# files have been confirmed to exist. Also, use the variable +# num_missing_files to keep track of the number of files that are missing. # #----------------------------------------------------------------------- # - fhr_list="" + lhrs_list="" num_missing_files="0" - num_fcst_hrs=${#fhr_array[@]} - for (( i=0; i<${num_fcst_hrs}; i++ )); do - - fhr_orig="${fhr_array[$i]}" + num_hrs=${#lhrs_array[@]} + for (( i=0; i<${num_hrs}; i++ )); do - if [ "${check_accum_contrib_files}" = "TRUE" ]; then - fhr=$(( ${fhr_orig} - ${accum_hh} + 1 )) - num_back_hrs=${accum_hh} - else - fhr=${fhr_orig} - num_back_hrs=1 - fi - - skip_this_fhr="FALSE" - for (( j=0; j<${num_back_hrs}; j++ )); do -# -# Use the provided template to set the name of/relative path to the file -# Note that the while-loop below is over all METplus time string templates -# of the form {...} in the template fn_template; it continues until all -# such templates have been evaluated to actual time strings. -# - fn="${fn_template}" - regex_search_tmpl="(.*)(\{.*\})(.*)" - crnt_tmpl=$( printf "%s" "${fn_template}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\2|p" ) - remainder=$( printf "%s" "${fn_template}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\1\3|p" ) - while [ ! -z "${crnt_tmpl}" ]; do - - eval_METplus_timestr_tmpl \ - init_time="$cdate" \ - fhr="$fhr" \ - METplus_timestr_tmpl="${crnt_tmpl}" \ - outvarname_formatted_time="actual_value" -# -# Replace METplus time templates in fn with actual times. Note that -# when using sed, we need to escape various characters (question mark, -# closing and opening curly braces, etc) in the METplus template in -# order for the sed command below to work properly. -# - crnt_tmpl_esc=$( echo "${crnt_tmpl}" | \ - $SED -r -e "s/\?/\\\?/g" -e "s/\{/\\\{/g" -e "s/\}/\\\}/g" ) - fn=$( echo "${fn}" | \ - $SED -n -r "s|(.*)(${crnt_tmpl_esc})(.*)|\1${actual_value}\3|p" ) -# -# Set up values for the next iteration of the while-loop. -# - crnt_tmpl=$( printf "%s" "${remainder}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\2|p" ) - remainder=$( printf "%s" "${remainder}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\1\3|p" ) - - done + lhr="${lhrs_array[$i]}" + skip_this_hour="FALSE" +# +# Evaluate the METplus file name template containing METplus timestrings +# for the specified yyyymmddhh_init and current hour (lhr) to obtain the +# name of the current file (including possibly a relative directory). +# + eval_METplus_timestr_tmpl \ + init_time="${yyyymmddhh_init}" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${fn_template}" \ + outvarname_evaluated_timestr="fn" # # Get the full path to the file and check if it exists. # - fp="${base_dir}/${fn}" - - if [ -f "${fp}" ]; then - print_info_msg "\ -Found file (fp) for the current forecast hour (fhr; relative to the cycle -date cdate): - fhr = \"$fhr\" - cdate = \"$cdate\" + fp="${base_dir}/${fn}" + if [ -f "${fp}" ]; then + print_info_msg "\ +Found file (fp) for lead hour ${lhr} (relative to ${yyyymmddhh_init}): fp = \"${fp}\" " - else - skip_this_fhr="TRUE" - num_missing_files=$(( ${num_missing_files} + 1 )) - print_info_msg "\ -The file (fp) for the current forecast hour (fhr; relative to the cycle -date cdate) is missing: - fhr = \"$fhr\" - cdate = \"$cdate\" + else + skip_this_hour="TRUE" + num_missing_files=$(( ${num_missing_files} + 1 )) + print_info_msg "\ +The file (fp) for lead hour ${lhr} (relative to ${yyyymmddhh_init}) is MISSING: fp = \"${fp}\" -Excluding the current forecast hour from the list of hours passed to the -METplus configuration file. +Excluding this hour from the list of lead hours to return. " - break - fi - - fhr=$(( $fhr + 1 )) - - done + break + fi - if [ "${skip_this_fhr}" != "TRUE" ]; then - fhr_list="${fhr_list},${fhr_orig}" + if [[ ! $(boolify "${skip_this_hour}") == "TRUE" ]]; then + lhrs_list="${lhrs_list},${lhr}" fi done # -# Remove leading comma from fhr_list. +# Remove leading comma from lhrs_list. # - fhr_list=$( echo "${fhr_list}" | $SED "s/^,//g" ) + lhrs_list=$( echo "${lhrs_list}" | $SED "s/^,//g" ) print_info_msg "$VERBOSE" "\ -Final (i.e. after filtering for missing files) set of forecast hours -(saved in a scalar string variable) is: - fhr_list = \"${fhr_list}\" +Final (i.e. after filtering for missing files) set of lead hours relative +to ${yyyymmddhh_init} (saved in a scalar string variable) is: + lhrs_list = \"${lhrs_list}\" " # #----------------------------------------------------------------------- @@ -424,8 +319,8 @@ maximum allowed number (num_missing_files_max): # #----------------------------------------------------------------------- # - if [ ! -z "${outvarname_fhr_list}" ]; then - printf -v ${outvarname_fhr_list} "%s" "${fhr_list}" + if [ ! -z "${outvarname_lhrs_list}" ]; then + printf -v ${outvarname_lhrs_list} "%s" "${lhrs_list}" fi # #----------------------------------------------------------------------- From da81dbb0680c7973a662a8dc51c520fbd182dfb0 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 26 Sep 2024 14:46:19 -0600 Subject: [PATCH 113/260] Modify variable names for clarity. --- ush/set_cycle_and_obs_timeinfo.py | 64 +++++++++++++++---------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index 36635b643e..9f9fbe9820 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -34,7 +34,7 @@ def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl): while cdate <= start_time_last_cycl: cyc = datetime.strftime(cdate, "%Y%m%d%H") all_cdates.append(cyc) - cdate += cycl_intvl + cdate += cycl_intvl return all_cdates @@ -69,7 +69,7 @@ def set_fcst_output_times_and_obs_days_all_cycles( Time interval between forecast output times; a timedelta object. Returns: - output_times_all_cycles_inst: + fcst_output_times_all_cycles_inst: List of forecast output times over all cycles of instantaneous fields. Each element is a string of the form 'YYYYMMDDHH'. @@ -78,7 +78,7 @@ def set_fcst_output_times_and_obs_days_all_cycles( perform verification) over all cycles of instantaneous fields. Each element is a string of the form 'YYYYMMDD'. - output_times_all_cycles_cumul: + fcst_output_times_all_cycles_cumul: List of forecast output times over all cycles of cumulative fields. Each element is a string of the form 'YYYYMMDDHH'. @@ -98,34 +98,34 @@ def set_fcst_output_times_and_obs_days_all_cycles( cycle_start_times = [datetime.strptime(yyyymmddhh, "%Y%m%d%H") for yyyymmddhh in cycle_start_times_str] # Get the number of forecast output times per cycle/forecast. - num_output_times_per_cycle = int(fcst_len/fcst_output_intvl + 1) + num_fcst_output_times_per_cycle = int(fcst_len/fcst_output_intvl + 1) # Initialize sets that will contain the various forecast output and obs # day information. - output_times_all_cycles_inst = set() + fcst_output_times_all_cycles_inst = set() obs_days_all_cycles_inst = set() - output_times_all_cycles_cumul = set() + fcst_output_times_all_cycles_cumul = set() obs_days_all_cycles_cumul = set() for i, start_time_crnt_cycle in enumerate(cycle_start_times): # Create a list of forecast output times of instantaneous fields for the # current cycle. - output_times_crnt_cycle_inst \ + fcst_output_times_crnt_cycle_inst \ = [start_time_crnt_cycle + i*fcst_output_intvl - for i in range(0,num_output_times_per_cycle)] - # Include the output times of instantaneous fields for the current cycle + for i in range(0,num_fcst_output_times_per_cycle)] + # Include the output times of instantaneous fields for the current cycle # in the set of all such output times over all cycles. - output_times_all_cycles_inst \ - = output_times_all_cycles_inst | set(output_times_crnt_cycle_inst) + fcst_output_times_all_cycles_inst \ + = fcst_output_times_all_cycles_inst | set(fcst_output_times_crnt_cycle_inst) # Create a list of instantaneous field obs days (i.e. days on which # observations of instantaneous fields are needed for verification) for # the current cycle. We do this by dropping the hour-of-day from each # element of the list of forecast output times and keeping only unique # elements. - tmp = [datetime_obj.date() for datetime_obj in output_times_crnt_cycle_inst] + tmp = [datetime_obj.date() for datetime_obj in fcst_output_times_crnt_cycle_inst] obs_days_crnt_cycl_inst = sorted(set(tmp)) - # Include the obs days for instantaneous fields for the current cycle + # Include the obs days for instantaneous fields for the current cycle # in the set of all such obs days over all cycles. obs_days_all_cycles_inst = obs_days_all_cycles_inst | set(obs_days_crnt_cycl_inst) @@ -133,12 +133,12 @@ def set_fcst_output_times_and_obs_days_all_cycles( # current cycle. This is simply the list of forecast output times for # instantaneous fields but with the first time dropped (because nothing # has yet accumulated at the starting time of the cycle). - output_times_crnt_cycle_cumul = output_times_crnt_cycle_inst - output_times_crnt_cycle_cumul.pop(0) + fcst_output_times_crnt_cycle_cumul = fcst_output_times_crnt_cycle_inst + fcst_output_times_crnt_cycle_cumul.pop(0) # Include the obs days for cumulative fields for the current cycle in the # set of all such obs days over all cycles. - output_times_all_cycles_cumul \ - = output_times_all_cycles_cumul | set(output_times_crnt_cycle_cumul) + fcst_output_times_all_cycles_cumul \ + = fcst_output_times_all_cycles_cumul | set(fcst_output_times_crnt_cycle_cumul) # Create a list of cumulative field obs days (i.e. days on which # observations of cumulative fields are needed for verification) for @@ -150,8 +150,8 @@ def set_fcst_output_times_and_obs_days_all_cycles( # the scripts/tasks that get observations of cumulative fields, the # zeroth hour of a day is considered part of the previous day (because # it represents accumulation that occurred on the previous day). - tmp = output_times_crnt_cycle_cumul - last_output_time_cumul = output_times_crnt_cycle_cumul[-1] + tmp = fcst_output_times_crnt_cycle_cumul + last_output_time_cumul = fcst_output_times_crnt_cycle_cumul[-1] if last_output_time_cumul.hour == 0: tmp.pop() tmp = [datetime_obj.date() for datetime_obj in tmp] @@ -162,9 +162,9 @@ def set_fcst_output_times_and_obs_days_all_cycles( # Convert the set of output times of instantaneous fields over all cycles # to a sorted list of strings of the form 'YYYYMMDDHH'. - output_times_all_cycles_inst = sorted(output_times_all_cycles_inst) - output_times_all_cycles_inst = [datetime.strftime(output_times_all_cycles_inst[i], "%Y%m%d%H") - for i in range(len(output_times_all_cycles_inst))] + fcst_output_times_all_cycles_inst = sorted(fcst_output_times_all_cycles_inst) + fcst_output_times_all_cycles_inst = [datetime.strftime(fcst_output_times_all_cycles_inst[i], "%Y%m%d%H") + for i in range(len(fcst_output_times_all_cycles_inst))] # Convert the set of obs days for instantaneous fields over all cycles # to a sorted list of strings of the form 'YYYYMMDD'. @@ -174,9 +174,9 @@ def set_fcst_output_times_and_obs_days_all_cycles( # Convert the set of output times of cumulative fields over all cycles to # a sorted list of strings of the form 'YYYYMMDDHH'. - output_times_all_cycles_cumul = sorted(output_times_all_cycles_cumul) - output_times_all_cycles_cumul = [datetime.strftime(output_times_all_cycles_cumul[i], "%Y%m%d%H") - for i in range(len(output_times_all_cycles_cumul))] + fcst_output_times_all_cycles_cumul = sorted(fcst_output_times_all_cycles_cumul) + fcst_output_times_all_cycles_cumul = [datetime.strftime(fcst_output_times_all_cycles_cumul[i], "%Y%m%d%H") + for i in range(len(fcst_output_times_all_cycles_cumul))] # Convert the set of obs days for cumulative fields over all cycles to a # sorted list of strings of the form 'YYYYMMDD'. @@ -184,8 +184,8 @@ def set_fcst_output_times_and_obs_days_all_cycles( obs_days_all_cycles_cumul = [datetime.strftime(obs_days_all_cycles_cumul[i], "%Y%m%d") for i in range(len(obs_days_all_cycles_cumul))] - return output_times_all_cycles_inst, obs_days_all_cycles_inst, \ - output_times_all_cycles_cumul, obs_days_all_cycles_cumul + return fcst_output_times_all_cycles_inst, obs_days_all_cycles_inst, \ + fcst_output_times_all_cycles_cumul, obs_days_all_cycles_cumul def set_cycledefs_for_obs_days(obs_days_all_cycles): @@ -195,17 +195,17 @@ def set_cycledefs_for_obs_days(obs_days_all_cycles): list of days must be increasing in time, but the days do not have to be consecutive, i.e. there may be gaps between days that are greater than one day. - + Each cycledef string in the output list represents a set of consecutive days in the input string (when used inside a tag in a ROCOTO - XML). Thus, when the cycledef strings in the output string are all + XML). Thus, when the cycledef strings in the output string are all assigned to the same cycledef group in a ROCOTO XML, that group will represent all the days on which observations are needed. Args: obs_days_all_cycles: A list of strings of the form 'YYYYMMDD', with each string representing - a day on which observations are needed. Note that the list must be + a day on which observations are needed. Note that the list must be sorted, i.e. the days must be increasing in time, but there may be gaps between days. @@ -218,7 +218,7 @@ def set_cycledefs_for_obs_days(obs_days_all_cycles): where {yyyymmdd_start} is the starting day of the first cycle in the cycledef, and {yyyymmdd_end} is the starting day of the last cycle (note - that the minutes and hours in these cycledef stirngs are always set to + that the minutes and hours in these cycledef stirngs are always set to '00'). Thus, one of the elements of the output list may be as follows: '202404290000 202405010000 24:00:00' @@ -229,7 +229,7 @@ def set_cycledefs_for_obs_days(obs_days_all_cycles): # list of datetime objects. tmp = [datetime.strptime(yyyymmdd, "%Y%m%d") for yyyymmdd in obs_days_all_cycles] - # Initialize the variable that in the loop below contains the date of + # Initialize the variable that in the loop below contains the date of # the previous day. This is just the first element of the list of # datetime objects constructed above. Then use it to initialize the # list (contin_obs_day_lists) that will contain lists of consecutive From fb3e7f42fa992c47618bf6fc9865d90b1827762d Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 27 Sep 2024 16:36:15 -0600 Subject: [PATCH 114/260] Check for the form of accum_hh (accumulation) only if it's going to be used. --- ush/set_vx_params.sh | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/ush/set_vx_params.sh b/ush/set_vx_params.sh index 9b67e36d22..267cd6902f 100644 --- a/ush/set_vx_params.sh +++ b/ush/set_vx_params.sh @@ -3,8 +3,9 @@ # # This file defines a function that sets various parameters needed when # performing verification. The way these parameters are set depends on -# the field being verified and, if the field is accumulated precipitation, -# the accumulation period (both of which are inputs to this function). +# the field being verified and, if the field is cumulative (e.g. +# accumulated precipitation or snowfall), the accumulation period +# (both of which are inputs to this function). # # As of 20220928, the verification tasks in the SRW App workflow use the # MET/METplus software (MET = Model Evaluation Tools) developed at the @@ -91,10 +92,14 @@ function set_vx_params() { # #----------------------------------------------------------------------- # - if [[ ! "${accum_hh}" =~ ^[0-9]{2}$ ]]; then - print_err_msg_exit "\ -The accumulation (accum_hh) must be a 2-digit integer: + if [ "${obtype}" = "CCPA" ] || [ "${obtype}" = "NOHRSC" ]; then + if [[ ! "${accum_hh}" =~ ^[0-9]{2}$ ]]; then + print_err_msg_exit "\ +For the given observation type (obtype), the accumulation (accum_hh) must +be a 2-digit integer: + obtype = \"${obtype}\" accum_hh = \"${accum_hh}\"" + fi fi # #----------------------------------------------------------------------- From 8f043fc749421191f029f95a284c9d32a156e9c2 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 27 Sep 2024 16:37:46 -0600 Subject: [PATCH 115/260] Changes that allow checks on the verification parameters and generation of new vx config parameters to allow the workflow to have obs-day-based as well as cycle-based tasks. --- ush/set_cycle_and_obs_timeinfo.py | 644 +++++++++++++++++++++++++----- ush/setup.py | 207 ++++++---- 2 files changed, 673 insertions(+), 178 deletions(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index 9f9fbe9820..8f45e60f2a 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -2,14 +2,20 @@ from datetime import datetime, timedelta, date from pprint import pprint +from textwrap import dedent from python_utils import print_input_args, print_err_msg_exit +import logging -def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl): - """This file defines a function that, given the start and end dates - as date time objects, and a cycling frequency, returns an array of - cycle date-hours whose elements have the form YYYYMMDDHH. Here, - YYYY is a four-digit year, MM is a two- digit month, DD is a - two-digit day of the month, and HH is a two-digit hour of the day. + +def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, + return_type='string'): + """ + This file defines a function that returns a list containing the starting + times of all the cycles in the experiment. + + If return_type is set to "string" (the default value), the returned list + contains strings in the format 'YYYYMMDDHH'. If it is set to "datetime", + the returned list contains a set of datetime objects. Args: start_time_first_cycl: @@ -21,38 +27,85 @@ def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl): cycl_intvl: Time interval between cycle starting times; a timedelta object. + return_type: + String that specifies the type of the returned list. + Returns: - A list of strings containing cycle starting times in the format - 'YYYYMMDDHH' + all_cdates: + Either a list of strings in the format 'YYYYMMDDHH' or a list of datetime + objects containing the cycle starting times. """ print_input_args(locals()) + + valid_values = ['string', 'datetime'] + if return_type not in valid_values: + raise ValueError("Invalid value for 'a'. Expected 1, 2, or 3.") + msg = dedent(f""" + Invalid value for optional argument "return_type": + return_type = {return_type} + Valid values are: + valid_values = {valid_values} + """) + raise Exception(msg) # iterate over cycles all_cdates = [] cdate = start_time_first_cycl while cdate <= start_time_last_cycl: - cyc = datetime.strftime(cdate, "%Y%m%d%H") - all_cdates.append(cyc) + all_cdates.append(cdate) cdate += cycl_intvl + + if return_type == "string": + all_cdates = [datetime.strftime(cdate, "%Y%m%d%H") for cdate in all_cdates] + return all_cdates -def set_fcst_output_times_and_obs_days_all_cycles( - start_time_first_cycl, start_time_last_cycl, cycl_intvl, fcst_len, fcst_output_intvl): - """Given the starting time of the first cycle of an SRW App experiment, the - starting time of the last cycle, the interval between cycle start times, - the forecast length, and the forecast output interval, this function - returns two pairs of lists: the first of each pair is a list of strings - of forecast output times over all cycles (each element of the form - 'YYYYMMDDHH'), and the second is a list of days over all cycles on which - observations are needed to perform verification (each element of the form - 'YYYYMMDD'). The first pair of lists is for instantaneous output fields - (e.g. REFC, RETOP, T2m), and the second pair is for cumulative ones (e.g. - APCP or accumulated precipitation). The accumulation period for the latter - is the forecast output interval. +def check_temporal_consistency_cumul_fields( + vx_config, + start_time_first_cycl, start_time_last_cycl, cycl_intvl, + fcst_len, fcst_output_intvl): + """ + This function reads in a subset of the parameters in the verification + configuration dictionary and ensures that certain temporal constraints on + these parameters are satisfied. It then returns an updated version of + the verification configuration dictionary that satisfies these constranints. + + The constraints are on the accumulation intervals associated with the + cumulative forecast fields and corresponding observation type pairs that + are to be verified. The constraints on each such accumulation interval + are as follows: + + 1) The accumulation interval is less than or equal to the forecast length + (since otherwise, the forecast field cannot be accumulated over that + interval). + + 2) The obs availability interval evenly divides the accumulation interval. + This ensures that the obs can be added together to obtain accumulated + values of the obs field, e.g. the 6-hourly NOHRSC obs can be added + to obtain 24-hour observed snowfall accumulations. + + 3) The forecast output interval evenly divides the accumulation interval. + This ensures that the forecast output can be added together to obtain + accumulated values of the forecast field, e.g. if the forecast output + interval is 3 hours, the resulting 3-hourly APCP outputs from the + forecast can be added to obtain 6-hourly forecast APCP. + + 4) The hour-of-day at which the accumulated forecast values will be + available are a subset of the ones at which the accumulated obs + values are available. This ensures that the accumulated fields + from the obs and forecast are valid at the same times and thus can + be compared in the verification. + + If for a given field-accumulation combination any of these constraints + is violated, that accumulation is removed from the list of accumulations + to verify for that field. Args: + vx_config: + The verification configuration dictionary. + start_time_first_cycl: Starting time of first cycle; a datetime object. @@ -69,43 +122,295 @@ def set_fcst_output_times_and_obs_days_all_cycles( Time interval between forecast output times; a timedelta object. Returns: - fcst_output_times_all_cycles_inst: - List of forecast output times over all cycles of instantaneous fields. - Each element is a string of the form 'YYYYMMDDHH'. + vx_config: + An updated version of the verification configuration dictionary. + + fcst_obs_matched_times_all_cycles_cumul: + Dictionary containing the times (in YYYYMMDDHH string format) at + which various field/accumlation combinations are output and at + which the corresponding obs type is also available. + """ + # Set dictionary containing all cumulative fields (i.e. whether or not + # they are to be verified). The keys are the observation types and the + # values are the field names in the forecasts. + vx_cumul_fields_all = {"CCPA": "APCP", "NOHRSC": "ASNOW"} + + # Convert from datetime.timedelta objects to integers. + one_hour = timedelta(hours=1) + fcst_len_hrs = int(fcst_len/one_hour) + fcst_output_intvl_hrs = int(fcst_output_intvl/one_hour) + + # Generate a list containing the starting times of the cycles. This will + # be needed in checking that the hours-of-day of the forecast output match + # those of the observations. + cycle_start_times \ + = set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, + return_type='datetime') + + # Initialize one of the variables that will be returned to an empty + # dictionary. + fcst_obs_matched_times_all_cycles_cumul = dict() + + for obtype, field_fcst in vx_cumul_fields_all.items(): + + # If the current cumulative field is not in the list of fields to be + # verified, just skip to the next field. + if field_fcst not in vx_config["VX_FIELDS"]: + continue + + # Initialize a sub-dictionary in one of the dictionaries to be returned. + fcst_obs_matched_times_all_cycles_cumul.update({field_fcst: {}}) + + # + # Get the availability interval of the current observation type from the + # verification configuration dictionary and use it to calculate the hours- + # of-day at which the obs will be available. + # + # Get the obs availability interval. + config_var_name = "".join([obtype, "_OBS_AVAIL_INTVL_HRS"]) + obs_avail_intvl_hrs = vx_config[config_var_name] + # Ensure that the obs availability interval evenly divides into 24. + remainder = 24 % obs_avail_intvl_hrs + if remainder != 0: + msg = dedent(f""" + The obs availability interval for obs of type {obtype} must divide evenly + into 24 but doesn't: + obs_avail_intvl_hrs = {obs_avail_intvl_hrs} + 24 % obs_avail_intvl_hrs = {remainder}" + """) + raise Exception(msg) + # Assume that the obs are available at hour 0 of the day regardless + # of obs type. + obs_avail_hr_start = 0 + obs_avail_hr_end = obs_avail_hr_start + 24 + # Construct list of obs availability hours-of-day. + obs_avail_hrs_of_day = [hr for hr in range(obs_avail_hr_start, obs_avail_hr_end, obs_avail_intvl_hrs)] + obs_avail_hrs_of_day_str = ['%02d' % int(hr) for hr in obs_avail_hrs_of_day] + # + # Get the array of accumulation intervals for the current cumulative field. + # Then loop over them to ensure that the constraints listed above are + # satisfied. If for a given accumulation one or more of the constraints + # is not satisfied, remove that accumulation from the list of accumulations + # for the current field. + # + accum_intvls_array_name = "".join(["VX_", field_fcst, "_ACCUMS_HRS"]) + accum_intvls_hrs = vx_config[accum_intvls_array_name] + # + # Loop through the accumulation intervals and check the temporal constraints + # listed above. + # + for accum_hrs in accum_intvls_hrs.copy(): + + accum_hh = f"{accum_hrs:02d}" + # Initialize a sub-sub-dictionary in one of the dictionaries to be returned. + fcst_obs_matched_times_all_cycles_cumul[field_fcst][accum_hh] = [] + # + # Check that accumulation inervals are shorter than the forecast length. + # + if accum_hrs > fcst_len_hrs: + msg = dedent(f""" + The accumulation interval (accum_hrs) for the current cumulative forecast + field (field_fcst) and corresponding observation type (obtype) is greater + than the forecast length (fcst_len_hrs): + field_fcst = {field_fcst} + obtype = {obtype} + accum_hrs = {accum_hrs} + fcst_len_hrs = {fcst_len_hrs} + Thus, this forecast field cannot be accumulated over this interval. + Will remove this accumulation interval from the list of accumulation + intervals to verify for this field/obtype. + """) + logging.info(msg) + accum_intvls_hrs.remove(accum_hrs) + # + # Check that accumulation inervals are evenly divisible by the observation + # availability interval. + # + if accum_hrs in accum_intvls_hrs: + rem_obs = accum_hrs % obs_avail_intvl_hrs + if rem_obs != 0: + msg = dedent(f""" + The accumulation interval (accum_hrs) for the current cumulative forecast + field (field_fcst) and corresponding observation type (obtype) is not + evenly divisible by the observation type's availability interval + (obs_avail_intvl_hrs): + field_fcst = {field_fcst} + obtype = {obtype} + accum_hrs = {accum_hrs} + obs_avail_intvl_hrs = {obs_avail_intvl_hrs} + accum_hrs % obs_avail_intvl_hrs = {rem_obs} + Thus, this observation type cannot be accumulated over this interval. + Will remove this accumulation interval from the list of accumulation + intervals to verify for this field/obtype. + """) + logging.info(msg) + accum_intvls_hrs.remove(accum_hrs) + # + # Check that accumulation inervals are evenly divisible by the forecast + # output interval. + # + if accum_hrs in accum_intvls_hrs: + rem_fcst = accum_hrs % fcst_output_intvl_hrs + if rem_fcst != 0: + msg = dedent(f""" + The accumulation interval (accum_hrs) for the current cumulative forecast + field (field_fcst) and corresponding observation type (obtype) is not + evenly divisible by the forecast output interval (fcst_output_intvl): + field_fcst = {field_fcst} + obtype = {obtype} + accum_hrs = {accum_hrs} hr + fcst_output_intvl_hrs = {forecast_output_intvl} hr + accum_hrs % fcst_output_intvl_hrs = {rem_fcst} + Thus, this forecast field cannot be accumulated over this interval. + Will remove this accumulation interval from the list of accumulation + intervals to verify for this field/obtype. + """) + logging.info(msg) + accum_intvls_hrs.remove(accum_hrs) + # + # Check that the hours-of-day at which the current cumulative field will + # be output are a subset of the hours-of-day at which the corresponding + # obs type is output. + # + if accum_hrs in accum_intvls_hrs: + + # Initialize sets that will contain the forecast output times of the + # current cumulative field over all cycles. + fcst_output_times_all_cycles = set() + + # Calculate the forecast output times of the current cumulative field + # for the current cycle and include them in the the set of such times + # over all cycles. + accum = timedelta(hours=accum_hrs) + num_fcst_output_times_per_cycle = int(fcst_len/accum) + for i, start_time_crnt_cycle in enumerate(cycle_start_times): + fcst_output_times_crnt_cycle \ + = [start_time_crnt_cycle + (i+1)*accum + for i in range(0, num_fcst_output_times_per_cycle)] + fcst_output_times_all_cycles \ + = fcst_output_times_all_cycles | set(fcst_output_times_crnt_cycle) + + # Get all the hours-of-day at which the current cumulative field will be + # output by the forecast. + fcst_output_times_all_cycles = sorted(fcst_output_times_all_cycles) + fcst_output_times_all_cycles_str \ + = [datetime.strftime(dt_object, "%Y%m%d%H") + for dt_object in fcst_output_times_all_cycles] + fcst_output_hrs_of_day_str = [yyyymmddhh[8:10] for yyyymmddhh in fcst_output_times_all_cycles_str] + fcst_output_hrs_of_day_str.sort() + + # Check that all the forecast output hours-of-day are a subset of the obs + # availability hours-of-day. If not, remove the current accumulation + # interval from the list of intervals to verify. + if not set(fcst_output_hrs_of_day_str) <= set(obs_avail_hrs_of_day_str): + msg = dedent(f""" + The accumulation interval (accum_hrs) for the current cumulative forecast + field (field_fcst) is such that the forecast will output the field on at + least one of hour-of-day on which the corresponding observation type is + not available: + field_fcst = {field_fcst} + obtype = {obtype} + accum_hrs = {accum_hrs} hr + The forecast output hours-of-day for this field/accumulation interval + combination are: + fcst_output_hrs_of_day_str = {fcst_output_hrs_of_day_str} + The hours-of-day at which the obs are available are: + obs_avail_hrs_of_day_str = {obs_avail_hrs_of_day_str} + Thus, at least some of the forecast output cannot be verified. + Will remove this accumulation interval from the list of accumulation + intervals to verify for this field/obtype. + """) + logging.info(msg) + accum_intvls_hrs.remove(accum_hrs) + else: + fcst_obs_matched_times_all_cycles_cumul[field_fcst][accum_hh] = fcst_output_times_all_cycles_str + # + # Update the value in the experiment configuration dictionary of the list + # of accumulation intervals to verify for this cumulative field (since + # some accumulation intervals may have been removed after the checks above). + # + vx_config[accum_intvls_array_name] = accum_intvls_hrs + # + # If the updated list of accumulations for the current cumulative field + # is empty, remove the field from the list of fields to verify in the + # verification configuration dictionary. + # + if not accum_intvls_hrs: + vx_config["VX_FIELDS"].remove(field_fcst) + msg = dedent(f""" + The list of accumulation intervals (accum_intvls_hrs) for the current + cumulative field to verify (field_fcst) is empty: + field_fcst = {field_fcst} + accum_intvls_hrs = {accum_intvls_hrs} + Removing this field from the list of fields to verify. The updated list + is: + {vx_config["VX_FIELDS"]} + """) + logging.info(msg) + + return vx_config, fcst_obs_matched_times_all_cycles_cumul + + +def set_fcst_output_times_and_obs_days_all_cycles( + start_time_first_cycl, start_time_last_cycl, cycl_intvl, + fcst_len, fcst_output_intvl): + """ + This function returns forecast output times and observation days (i.e. + days on which obs are needed because there is forecast output on those + days) for both instantaneous (e.g. REFC, RETOP, T2m) and cumulative (e.g. + APCP) fields that need to be verified. Note that for cumulative fields, + the only accumulation interval considered is the forecast output interval. + Accumulation intervals larger than this are considered elsewhere (and + accumulation interval smaller than this are obviously not allowed). + + Args: + start_time_first_cycl: + Starting time of first cycle; a datetime object. + + start_time_last_cycl: + Starting time of last cycle; a datetime object. + + cycl_intvl: + Time interval between cycle starting times; a timedelta object. - obs_days_all_cycles_inst: - List of observation days (i.e. days on which observations are needed to - perform verification) over all cycles of instantaneous fields. Each - element is a string of the form 'YYYYMMDD'. + fcst_len: + The length of each forecast; a timedelta object. - fcst_output_times_all_cycles_cumul: - List of forecast output times over all cycles of cumulative fields. Each - element is a string of the form 'YYYYMMDDHH'. + fcst_output_intvl: + Time interval between forecast output times; a timedelta object. - obs_days_all_cycles_cumul: - List of observation days (i.e. days on which observations are needed to - perform verification) over all cycles of cumulative fields. Each element - is a string of the form 'YYYYMMDD'. + Returns: + fcst_output_times_all_cycles: + Dictionary containing a list of forecast output times over all cycles for + instantaneous fields and a second analogous list for cumulative fields. + Each element of these lists is a string of the form 'YYYYMMDDHH'. + obs_days_all_cycles: + Dictionary containing a list of observation days (i.e. days on which + observations are needed to perform verification) over all cycles for + instantaneous fields and a second analogous list for cumulative fields. + Each element of these lists is a string of the form 'YYYYMMDD'. """ # Get the list containing the starting times of the cycles. Each element - # of the list is a string of the form 'YYYYMMDDHH'. - cycle_start_times_str \ - = set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl) - - # Convert cycle_start_times_str to a list of datetime objects. - cycle_start_times = [datetime.strptime(yyyymmddhh, "%Y%m%d%H") for yyyymmddhh in cycle_start_times_str] + # of the list will be a datetime object. + cycle_start_times \ + = set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, + return_type='datetime') # Get the number of forecast output times per cycle/forecast. num_fcst_output_times_per_cycle = int(fcst_len/fcst_output_intvl + 1) - # Initialize sets that will contain the various forecast output and obs - # day information. - fcst_output_times_all_cycles_inst = set() - obs_days_all_cycles_inst = set() - fcst_output_times_all_cycles_cumul = set() - obs_days_all_cycles_cumul = set() + # Initialize dictionaries that will contain the various forecast output + # time and obs day information. Note that we initialize the contents of + # these dictionaries as sets because that better suites the data manipulation + # we will need to do, but these sets will later be converted to lists. + fcst_output_times_all_cycles = dict() + fcst_output_times_all_cycles['inst'] = set() + fcst_output_times_all_cycles['cumul'] = set() + obs_days_all_cycles = dict() + obs_days_all_cycles['inst'] = set() + obs_days_all_cycles['cumul'] = set() for i, start_time_crnt_cycle in enumerate(cycle_start_times): # Create a list of forecast output times of instantaneous fields for the @@ -115,8 +420,8 @@ def set_fcst_output_times_and_obs_days_all_cycles( for i in range(0,num_fcst_output_times_per_cycle)] # Include the output times of instantaneous fields for the current cycle # in the set of all such output times over all cycles. - fcst_output_times_all_cycles_inst \ - = fcst_output_times_all_cycles_inst | set(fcst_output_times_crnt_cycle_inst) + fcst_output_times_all_cycles['inst'] \ + = fcst_output_times_all_cycles['inst'] | set(fcst_output_times_crnt_cycle_inst) # Create a list of instantaneous field obs days (i.e. days on which # observations of instantaneous fields are needed for verification) for @@ -127,7 +432,7 @@ def set_fcst_output_times_and_obs_days_all_cycles( obs_days_crnt_cycl_inst = sorted(set(tmp)) # Include the obs days for instantaneous fields for the current cycle # in the set of all such obs days over all cycles. - obs_days_all_cycles_inst = obs_days_all_cycles_inst | set(obs_days_crnt_cycl_inst) + obs_days_all_cycles['inst'] = obs_days_all_cycles['inst'] | set(obs_days_crnt_cycl_inst) # Create a list of forecast output times of cumulative fields for the # current cycle. This is simply the list of forecast output times for @@ -137,8 +442,8 @@ def set_fcst_output_times_and_obs_days_all_cycles( fcst_output_times_crnt_cycle_cumul.pop(0) # Include the obs days for cumulative fields for the current cycle in the # set of all such obs days over all cycles. - fcst_output_times_all_cycles_cumul \ - = fcst_output_times_all_cycles_cumul | set(fcst_output_times_crnt_cycle_cumul) + fcst_output_times_all_cycles['cumul'] \ + = fcst_output_times_all_cycles['cumul'] | set(fcst_output_times_crnt_cycle_cumul) # Create a list of cumulative field obs days (i.e. days on which # observations of cumulative fields are needed for verification) for @@ -158,49 +463,57 @@ def set_fcst_output_times_and_obs_days_all_cycles( obs_days_crnt_cycl_cumul = sorted(set(tmp)) # Include the obs days for cumulative fields for the current cycle in the # set of all such obs days over all cycles. - obs_days_all_cycles_cumul = obs_days_all_cycles_cumul | set(obs_days_crnt_cycl_cumul) + obs_days_all_cycles['cumul'] = obs_days_all_cycles['cumul'] | set(obs_days_crnt_cycl_cumul) # Convert the set of output times of instantaneous fields over all cycles # to a sorted list of strings of the form 'YYYYMMDDHH'. - fcst_output_times_all_cycles_inst = sorted(fcst_output_times_all_cycles_inst) - fcst_output_times_all_cycles_inst = [datetime.strftime(fcst_output_times_all_cycles_inst[i], "%Y%m%d%H") - for i in range(len(fcst_output_times_all_cycles_inst))] + fcst_output_times_all_cycles['inst'] = sorted(fcst_output_times_all_cycles['inst']) + fcst_output_times_all_cycles['inst'] \ + = [datetime.strftime(fcst_output_times_all_cycles['inst'][i], "%Y%m%d%H") + for i in range(len(fcst_output_times_all_cycles['inst']))] # Convert the set of obs days for instantaneous fields over all cycles # to a sorted list of strings of the form 'YYYYMMDD'. - obs_days_all_cycles_inst = sorted(obs_days_all_cycles_inst) - obs_days_all_cycles_inst = [datetime.strftime(obs_days_all_cycles_inst[i], "%Y%m%d") - for i in range(len(obs_days_all_cycles_inst))] + obs_days_all_cycles['inst'] = sorted(obs_days_all_cycles['inst']) + obs_days_all_cycles['inst'] \ + = [datetime.strftime(obs_days_all_cycles['inst'][i], "%Y%m%d") + for i in range(len(obs_days_all_cycles['inst']))] # Convert the set of output times of cumulative fields over all cycles to # a sorted list of strings of the form 'YYYYMMDDHH'. - fcst_output_times_all_cycles_cumul = sorted(fcst_output_times_all_cycles_cumul) - fcst_output_times_all_cycles_cumul = [datetime.strftime(fcst_output_times_all_cycles_cumul[i], "%Y%m%d%H") - for i in range(len(fcst_output_times_all_cycles_cumul))] + fcst_output_times_all_cycles['cumul'] = sorted(fcst_output_times_all_cycles['cumul']) + fcst_output_times_all_cycles['cumul'] \ + = [datetime.strftime(fcst_output_times_all_cycles['cumul'][i], "%Y%m%d%H") + for i in range(len(fcst_output_times_all_cycles['cumul']))] # Convert the set of obs days for cumulative fields over all cycles to a # sorted list of strings of the form 'YYYYMMDD'. - obs_days_all_cycles_cumul = sorted(obs_days_all_cycles_cumul) - obs_days_all_cycles_cumul = [datetime.strftime(obs_days_all_cycles_cumul[i], "%Y%m%d") - for i in range(len(obs_days_all_cycles_cumul))] + obs_days_all_cycles['cumul'] = sorted(obs_days_all_cycles['cumul']) + obs_days_all_cycles['cumul'] \ + = [datetime.strftime(obs_days_all_cycles['cumul'][i], "%Y%m%d") + for i in range(len(obs_days_all_cycles['cumul']))] - return fcst_output_times_all_cycles_inst, obs_days_all_cycles_inst, \ - fcst_output_times_all_cycles_cumul, obs_days_all_cycles_cumul + return fcst_output_times_all_cycles, obs_days_all_cycles -def set_cycledefs_for_obs_days(obs_days_all_cycles): - """Given a list of days on which obs are needed, this function generates a - list of ROCOTO-style cycledef strings that together span the days (over - all cycles of an SRW App experiment) on which obs are needed. The input - list of days must be increasing in time, but the days do not have to be - consecutive, i.e. there may be gaps between days that are greater than - one day. +def set_rocoto_cycledefs_for_obs_days(obs_days_all_cycles): + """ + Given a list of days on which observations are needed (because there is + forecast output on those days), this function generates a list of ROCOTO- + style cycledef strings that together span the days (over all cycles of an + SRW App experiment) on which obs are needed. The input list of days must + be increasing in time, but the days do not have to be consecutive, i.e. + there may be gaps between days that are greater than one day. Each cycledef string in the output list represents a set of consecutive days in the input string (when used inside a tag in a ROCOTO XML). Thus, when the cycledef strings in the output string are all assigned to the same cycledef group in a ROCOTO XML, that group will - represent all the days on which observations are needed. + represent all the days on which observations are needed. This allows + the ROCOTO workflow to define a single set of non-consecutive days on + which obs are needed and define tasks (e.g. get_obs) only for those + days, thereby avoiding the redundant creation of these tasks for any + in-between days on which obs are not needed. Args: obs_days_all_cycles: @@ -210,16 +523,16 @@ def set_cycledefs_for_obs_days(obs_days_all_cycles): gaps between days. Returns: - cycledef_all_obs_days: + cycledefs_all_obs_days: A list of strings, with each string being a ROCOTO-style cycledef of the form '{yyyymmdd_start}0000 {yyyymmdd_end}0000 24:00:00' where {yyyymmdd_start} is the starting day of the first cycle in the - cycledef, and {yyyymmdd_end} is the starting day of the last cycle (note + cycledef and {yyyymmdd_end} is the starting day of the last cycle (note that the minutes and hours in these cycledef stirngs are always set to - '00'). Thus, one of the elements of the output list may be as follows: + '00'). For example, an element of the output list may be: '202404290000 202405010000 24:00:00' """ @@ -232,16 +545,17 @@ def set_cycledefs_for_obs_days(obs_days_all_cycles): # Initialize the variable that in the loop below contains the date of # the previous day. This is just the first element of the list of # datetime objects constructed above. Then use it to initialize the - # list (contin_obs_day_lists) that will contain lists of consecutive + # list (consec_obs_days_lists) that will contain lists of consecutive # observation days. Thus, after its construction is complete, each - # element of contin_obs_day_lists will itself be a list containing - # datetime objects that are 24 hours apart. + # element of consec_obs_days_lists will itself be a list containing + # datetime objects that represent consecutive days (i.e. are guaranteed + # to be 24 hours apart). day_prev = tmp[0] - contin_obs_day_lists = list() - contin_obs_day_lists.append([day_prev]) + consec_obs_days_lists = list() + consec_obs_days_lists.append([day_prev]) # Remove the first element of the list of obs days since it has already - # been used initiliaze contin_obs_day_lists. + # been used initiliaze consec_obs_days_lists. tmp.pop(0) # Loop over the remaining list of obs days and construct the list of @@ -250,14 +564,14 @@ def set_cycledefs_for_obs_days(obs_days_all_cycles): for day_crnt in tmp: # If the current obs day comes 24 hours after the previous obs day, i.e. # if it is the next day of the previous obs day, append it to the last - # existing list in contin_obs_day_lists. + # existing list in consec_obs_days_lists. if day_crnt == day_prev + one_day: - contin_obs_day_lists[-1].append(day_crnt) + consec_obs_days_lists[-1].append(day_crnt) # If the current obs day is NOT the next day of the previous obs day, - # append a new element to contin_obs_day_lists and initialize it as a + # append a new element to consec_obs_days_lists and initialize it as a # list containing a single element -- the current obs day. else: - contin_obs_day_lists.append([day_crnt]) + consec_obs_days_lists.append([day_crnt]) # Update the value of the previous day in preparation for the next # iteration of the loop. day_prev = day_crnt @@ -267,13 +581,149 @@ def set_cycledefs_for_obs_days(obs_days_all_cycles): # obs days when included in a tag in a ROCOTO XML. Each # string in this new list corresponds to a series of consecutive days on # which observations are needed (where by "consecutive" we mean no days - # are skipped), and there is at least a one day gap between each such + # are skipped), and there is at least a one-day gap between each such # series. These cycledefs together represent all the days (i.e. over all # cycles of the experiment) on which observations are needed. - cycledef_all_obs_days = list() - for contin_obs_day_list in contin_obs_day_lists: - cycledef_start = contin_obs_day_list[0].strftime('%Y%m%d%H%M') - cycledef_end = contin_obs_day_list[-1].strftime('%Y%m%d%H%M') - cycledef_all_obs_days.append(' '.join([cycledef_start, cycledef_end, '24:00:00'])) + cycledefs_all_obs_days = list() + for consec_obs_days_list in consec_obs_days_lists: + cycledef_start = consec_obs_days_list[0].strftime('%Y%m%d%H%M') + cycledef_end = consec_obs_days_list[-1].strftime('%Y%m%d%H%M') + cycledefs_all_obs_days.append(' '.join([cycledef_start, cycledef_end, '24:00:00'])) + + return cycledefs_all_obs_days + + +def get_obs_retrieve_times_by_day( + vx_config, fcst_output_times_all_cycles, obs_days_all_cycles): + """ + This function generates dictionary of dictionaries that, for each + combination of obs type needed and each obs day, contains a string list + of the times at which that type of observation is needed on that day. + The elements of each list are formatted as 'YYYYMMDDHH'. + + Args: + vx_config: + The verification configuration dictionary. + + fcst_output_times_all_cycles: + Dictionary containing a list of forecast output times over all cycles for + instantaneous fields and a second analogous list for cumulative fields. + Each element of these lists is a string of the form 'YYYYMMDDHH'. + + obs_days_all_cycles: + Dictionary containing a list of observation days (i.e. days on which + observations are needed to perform verification) over all cycles for + instantaneous fields and a second analogous list for cumulative fields. + Each element of these lists is a string of the form 'YYYYMMDD'. + + Returns: + obs_retrieve_times_by_day: + Dictionary of dictionaries containing times at which each type of obs is + needed on each obs day. + """ + # Convert string contents of input dictionaries to datetime objects. + for time_type in ['cumul', 'inst']: + fcst_output_times_all_cycles[time_type] \ + = [datetime.strptime(fcst_output_times_all_cycles[time_type][i], "%Y%m%d%H") + for i in range(len(fcst_output_times_all_cycles[time_type]))] + obs_days_all_cycles[time_type] \ + = [datetime.strptime(obs_days_all_cycles[time_type][i], "%Y%m%d") + for i in range(len(obs_days_all_cycles[time_type]))] + + # Get list of forecast fields to be verified. + vx_fields = vx_config['VX_FIELDS'] + + # Define dictionary containing information about all fields that may + # possibly be verified. This information includes their temporal + # characteristics (cumulative vs. instantaneous) and the mapping between + # the observation type and the forecast field. + vx_field_info = {'cumul': [{'obtype': 'CCPA', 'fcst_fields': ['APCP']}, + {'obtype': 'NOHRSC', 'fcst_fields': ['ASNOW']}], + 'inst': [{'obtype': 'MRMS', 'fcst_fields': ['REFC', 'RETOP']}, + {'obtype': 'NDAS', 'fcst_fields': ['ADPSFC', 'ADPUPA']}] + } + + # Keep only those items in the dictionary above that have forecast fields + # that appear in the list of forecast fields to be verified. + for obs_time_type, obtypes_to_fcst_fields_dict_list in vx_field_info.copy().items(): + for obtypes_to_fcst_fields_dict in obtypes_to_fcst_fields_dict_list.copy(): + obtype = obtypes_to_fcst_fields_dict['obtype'] + fcst_fields = obtypes_to_fcst_fields_dict['fcst_fields'] + fcst_fields = [field for field in fcst_fields if field in vx_fields] + obtypes_to_fcst_fields_dict['fcst_fields'] = fcst_fields + if not fcst_fields: obtypes_to_fcst_fields_dict_list.remove(obtypes_to_fcst_fields_dict) + if not obtypes_to_fcst_fields_dict_list: vx_field_info.pop(obs_time_type) + + # Create dictionary containing the temporal characteristics as keys and + # a string list of obs types to verify as the values. + obs_time_type_to_obtypes_dict = dict() + for obs_time_type, obtypes_to_fcst_fields_dict_list in vx_field_info.items(): + obtype_list = [the_dict['obtype'] for the_dict in obtypes_to_fcst_fields_dict_list] + obs_time_type_to_obtypes_dict[obs_time_type] = obtype_list + + # Initialize the return variable. + obs_retrieve_times_by_day = dict() + + # Define timedelta object representing a single day. + one_day = timedelta(days=1) - return cycledef_all_obs_days + # Loop over all obs types to be verified (by looping over the temporal + # type and the specific obs under that type). For each obs type, loop + # over each obs day and find the times within that that at which the obs + # need to be retrieved. + for obs_time_type, obtypes in obs_time_type_to_obtypes_dict.items(): + + fcst_output_times_all_cycles_crnt_ttype = fcst_output_times_all_cycles[obs_time_type] + obs_days_all_cycles_crnt_ttype = obs_days_all_cycles[obs_time_type] + + for obtype in obtypes: + + obs_retrieve_times_by_day[obtype] = dict() + + # Get the availability interval for the current observation type from the + # verification configuration dictionary. Then make sure it divides evenly + # into 24. + config_var_name = "".join([obtype, "_OBS_AVAIL_INTVL_HRS"]) + obs_avail_intvl_hrs = vx_config[config_var_name] + remainder = 24 % obs_avail_intvl_hrs + if remainder != 0: + msg = dedent(f""" + The obs availability interval for obs of type {obtype} must divide evenly + into 24 but doesn't: + obs_avail_intvl_hrs = {obs_avail_intvl_hrs} + 24 % obs_avail_intvl_hrs = {remainder}" + """) + raise Exception(msg) + obs_avail_intvl = timedelta(hours=obs_avail_intvl_hrs) + num_obs_avail_times_per_day = int(24/obs_avail_intvl_hrs) + + # Loop over all obs days over all cycles (for the current obs type). For + # each such day, get the list forecast output times and the list of obs + # availability times. Finally, set the times (on that day) that obs need + # to be retrieved to the intersection of these two lists. + for obs_day in obs_days_all_cycles_crnt_ttype: + + next_day = obs_day + one_day + if obs_time_type == "cumul": + fcst_output_times_crnt_day \ + = [time for time in fcst_output_times_all_cycles_crnt_ttype if obs_day < time <= next_day] + elif obs_time_type == "inst": + fcst_output_times_crnt_day \ + = [time for time in fcst_output_times_all_cycles_crnt_ttype if obs_day <= time < next_day] + fcst_output_times_crnt_day = [datetime.strftime(time, "%Y%m%d%H") for time in fcst_output_times_crnt_day] + + if obs_time_type == "cumul": + obs_avail_times_crnt_day \ + = [obs_day + (i+1)*obs_avail_intvl for i in range(0,num_obs_avail_times_per_day)] + elif obs_time_type == "inst": + obs_avail_times_crnt_day \ + = [obs_day + i*obs_avail_intvl for i in range(0,num_obs_avail_times_per_day)] + obs_avail_times_crnt_day = [datetime.strftime(time, "%Y%m%d%H") for time in obs_avail_times_crnt_day] + + obs_retrieve_times_crnt_day = list(set(fcst_output_times_crnt_day) & set(obs_avail_times_crnt_day)) + obs_retrieve_times_crnt_day.sort() + + obs_day_str = datetime.strftime(obs_day, "%Y%m%d") + obs_retrieve_times_by_day[obtype][obs_day_str] = obs_retrieve_times_crnt_day + + return obs_retrieve_times_by_day diff --git a/ush/setup.py b/ush/setup.py index d6e9e5c2d0..8aaec0ef90 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -42,7 +42,9 @@ from set_cycle_and_obs_timeinfo import \ set_cycle_dates, set_fcst_output_times_and_obs_days_all_cycles, \ - set_cycledefs_for_obs_days + set_rocoto_cycledefs_for_obs_days, \ + check_temporal_consistency_cumul_fields, \ + get_obs_retrieve_times_by_day from set_predef_grid_params import set_predef_grid_params from set_gridparams_ESGgrid import set_gridparams_ESGgrid from set_gridparams_GFDLgrid import set_gridparams_GFDLgrid @@ -546,32 +548,106 @@ def remove_tag(tasks, tag): # # ----------------------------------------------------------------------- # - # For vx fields that are accumulated, remove those accumulation hours - # that are longer than the forecast length. If that leaves the array - # of accumulation hours for that field empty, then remove the field - # from the list of fields to be verified. + # Set some variables needed for running checks on and creating new + # (derived) configuration variables for the verification. # # ----------------------------------------------------------------------- # - # Get the vx fields specified in the experiment configuration. - vx_fields_config = expt_config["verification"]["VX_FIELDS"] - + date_first_cycl = workflow_config.get("DATE_FIRST_CYCL") + date_last_cycl = workflow_config.get("DATE_LAST_CYCL") + incr_cycl_freq = int(workflow_config.get("INCR_CYCL_FREQ")) fcst_len_hrs = workflow_config.get("FCST_LEN_HRS") - vx_fields_accum = ["APCP", "ASNOW"] - for field in vx_fields_accum: - if field in vx_fields_config: - accum_periods_array_name = "".join(["VX_", field, "_ACCUMS_HRS"]) - accum_periods = expt_config["verification"][accum_periods_array_name] - accum_periods = [accum for accum in accum_periods if (accum <= fcst_len_hrs)] - expt_config["verification"][accum_periods_array_name] = accum_periods - if not accum_periods: - vx_fields_config.remove(field) - - expt_config["verification"]["VX_FIELDS"] = vx_fields_config + + # Set the forecast output interval. Ideally, this should be obtained + # from the SRW App's configuration file, but such a variable doesn't + # yet exist in that file. + fcst_output_intvl_hrs = 1 + workflow_config['FCST_OUTPUT_INTVL_HRS'] = fcst_output_intvl_hrs + + # To enable arithmetic with dates and times, convert various time + # intervals from integer to datetime.timedelta objects. + cycl_intvl_dt = datetime.timedelta(hours=incr_cycl_freq) + fcst_len_dt = datetime.timedelta(hours=fcst_len_hrs) + fcst_output_intvl_dt = datetime.timedelta(hours=fcst_output_intvl_hrs) + # + # ----------------------------------------------------------------------- + # + # Ensure that the configuration parameters associated with cumulative + # fields (e.g. APCP) in the verification section of the experiment + # dicitonary are temporally consistent, e.g. that accumulation intervals + # are less than or equal to the forecast length. Update the verification + # section of the dictionary to remove inconsistencies. + # + # ----------------------------------------------------------------------- + # + vx_config = expt_config["verification"] + vx_config, fcst_obs_matched_times_all_cycles_cumul \ + = check_temporal_consistency_cumul_fields( + vx_config, + date_first_cycl, date_last_cycl, cycl_intvl_dt, + fcst_len_dt, fcst_output_intvl_dt) + expt_config["verification"] = vx_config + # + # ----------------------------------------------------------------------- + # + # Generate a list of forecast output times and a list of obs days (i.e. + # days on which observations are needed to perform verification because + # there is forecast output on those days) over all cycles, both for + # instantaneous fields (e.g. T2m, REFC, RETOP) and for cumulative ones + # (e.g. APCP). Then add these lists to the dictionary containing workflow + # configuration variables. These will be needed in generating the ROCOTO + # XML. + # + # ----------------------------------------------------------------------- + # + fcst_output_times_all_cycles, obs_days_all_cycles, \ + = set_fcst_output_times_and_obs_days_all_cycles( + date_first_cycl, date_last_cycl, cycl_intvl_dt, + fcst_len_dt, fcst_output_intvl_dt) + + workflow_config['OBS_DAYS_ALL_CYCLES_INST'] = obs_days_all_cycles['inst'] + workflow_config['OBS_DAYS_ALL_CYCLES_CUMUL'] = obs_days_all_cycles['cumul'] + # + # ----------------------------------------------------------------------- + # + # Generate lists of ROCOTO cycledef strings corresonding to the obs days + # for instantaneous fields and those for cumulative ones. Then save the + # lists of cycledefs in the dictionary containing values needed to + # construct the ROCOTO XML. # # ----------------------------------------------------------------------- # - # Remove all verification [meta]tasks for which no fields are specified. + cycledefs_obs_days_inst = set_rocoto_cycledefs_for_obs_days(obs_days_all_cycles['inst']) + cycledefs_obs_days_cumul = set_rocoto_cycledefs_for_obs_days(obs_days_all_cycles['cumul']) + + rocoto_config['cycledefs']['cycledefs_obs_days_inst'] = cycledefs_obs_days_inst + rocoto_config['cycledefs']['cycledefs_obs_days_cumul'] = cycledefs_obs_days_cumul + # + # ----------------------------------------------------------------------- + # + # Generate dictionary of dictionaries that, for each combination of obs + # type needed and obs day, contains a string list of the times at which + # that type of observation is needed on that day. The elements of each + # list are formatted as 'YYYYMMDDHH'. This information is used by the + # day-based get_obs tasks in the workflow to get obs only at those times + # at which they are needed (as opposed to for the whole day). + # + # ----------------------------------------------------------------------- + # + vx_config = expt_config["verification"] + obs_retrieve_times_by_day \ + = get_obs_retrieve_times_by_day( + vx_config, fcst_output_times_all_cycles, obs_days_all_cycles) + + for obtype, obs_days_dict in obs_retrieve_times_by_day.items(): + for obs_day, obs_retrieve_times in obs_days_dict.items(): + array_name = '_'.join(["OBS_RETRIEVE_TIMES", obtype, obs_day]) + vx_config[array_name] = obs_retrieve_times + expt_config["verification"] = vx_config + # + # ----------------------------------------------------------------------- + # + # Remove all verification (meta)tasks for which no fields are specified. # # ----------------------------------------------------------------------- # @@ -579,7 +655,8 @@ def remove_tag(tasks, tag): vx_metatasks_all = {} vx_fields_all["CCPA"] = ["APCP"] - vx_metatasks_all["CCPA"] = ["metatask_PcpCombine_obs", + vx_metatasks_all["CCPA"] = ["task_get_obs_ccpa", + "metatask_PcpCombine_obs_CCPA_all_accums", "metatask_PcpCombine_fcst_APCP_all_accums_all_mems", "metatask_GridStat_CCPA_all_accums_all_mems", "metatask_GenEnsProd_EnsembleStat_CCPA", @@ -587,33 +664,38 @@ def remove_tag(tasks, tag): vx_fields_all["NOHRSC"] = ["ASNOW"] vx_metatasks_all["NOHRSC"] = ["task_get_obs_nohrsc", + "metatask_PcpCombine_obs_NOHRSC_all_accums", "metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems", "metatask_GridStat_NOHRSC_all_accums_all_mems", "metatask_GenEnsProd_EnsembleStat_NOHRSC", "metatask_GridStat_NOHRSC_ensmeanprob_all_accums"] vx_fields_all["MRMS"] = ["REFC", "RETOP"] - vx_metatasks_all["MRMS"] = ["metatask_GridStat_MRMS_all_mems", + vx_metatasks_all["MRMS"] = ["task_get_obs_mrms", + "metatask_GridStat_MRMS_all_mems", "metatask_GenEnsProd_EnsembleStat_MRMS", "metatask_GridStat_MRMS_ensprob"] vx_fields_all["NDAS"] = ["ADPSFC", "ADPUPA"] - vx_metatasks_all["NDAS"] = ["task_run_MET_Pb2nc_obs", + vx_metatasks_all["NDAS"] = ["task_get_obs_ndas", + "task_run_MET_Pb2nc_obs_NDAS", "metatask_PointStat_NDAS_all_mems", "metatask_GenEnsProd_EnsembleStat_NDAS", "metatask_PointStat_NDAS_ensmeanprob"] # If there are no vx fields specified, remove those tasks that are necessary # for all observation types. - if not vx_fields_config: + vx_config = expt_config["verification"] + vx_fields = vx_config["VX_FIELDS"] + if not vx_fields: metatask = "metatask_check_post_output_all_mems" rocoto_config['tasks'].pop(metatask) # If for a given obstype no fields are specified, remove all vx metatasks # for that obstype. for obstype in vx_fields_all: - vx_fields_obstype = [field for field in vx_fields_config if field in vx_fields_all[obstype]] - if not vx_fields_obstype: + vx_fields_by_obstype = [field for field in vx_fields if field in vx_fields_all[obstype]] + if not vx_fields_by_obstype: for metatask in vx_metatasks_all[obstype]: if metatask in rocoto_config['tasks']: logging.info(dedent( @@ -627,6 +709,24 @@ def remove_tag(tasks, tag): # # ----------------------------------------------------------------------- # + # The "cycled_from_second" cycledef in the default workflow configuration + # file (default_workflow.yaml) requires the starting date of the second + # cycle. That is difficult to calculate in the yaml file itself because + # currently, there are no utilities to perform arithmetic with dates. + # Thus, we calculate it here and save it as a variable in the workflow + # configuration dictionary. Note that correct functioning of the default + # workflow yaml file also requires that DATE_[FIRST|SECOND|LAST]_CYCL all + # be strings, not datetime objects. We perform those conversions here. + # + # ----------------------------------------------------------------------- + # + date_second_cycl = date_first_cycl + cycl_intvl_dt + workflow_config['DATE_FIRST_CYCL'] = datetime.datetime.strftime(date_first_cycl, "%Y%m%d%H") + workflow_config['DATE_SECOND_CYCL'] = datetime.datetime.strftime(date_second_cycl, "%Y%m%d%H") + workflow_config['DATE_LAST_CYCL'] = datetime.datetime.strftime(date_last_cycl, "%Y%m%d%H") + # + # ----------------------------------------------------------------------- + # # ICS and LBCS settings and validation # # ----------------------------------------------------------------------- @@ -775,61 +875,6 @@ def get_location(xcs, fmt, expt_cfg): run_envir = expt_config["user"].get("RUN_ENVIR", "") - fcst_len_hrs = workflow_config.get("FCST_LEN_HRS") - date_first_cycl = workflow_config.get("DATE_FIRST_CYCL") - date_last_cycl = workflow_config.get("DATE_LAST_CYCL") - incr_cycl_freq = int(workflow_config.get("INCR_CYCL_FREQ")) - - # Set the forecast output interval. Ideally, this should be obtained - # from the SRW App's configuration file, but such a variable doesn't - # yet exist in that file. - fcst_output_intvl_hrs = 1 - - # To enable arithmetic with dates and times, convert various time - # intervals from integer to datetime.timedelta objects. - cycl_intvl = datetime.timedelta(days=0, hours=incr_cycl_freq, minutes=0, seconds=0) - fcst_len = datetime.timedelta(days=0, hours=fcst_len_hrs, minutes=0, seconds=0) - fcst_output_intvl = datetime.timedelta(days=0, hours=fcst_output_intvl_hrs, minutes=0, seconds=0) - - # Generate a list of forecast output times and a list of obs days (i.e. - # days on which observations are needed to perform verification) over all - # cycles, both for instantaneous fields (e.g. T2m, REFC, RETOP) and for - # cumulative ones (e.g. APCP). - output_times_all_cycles_inst, obs_days_all_cycles_inst, \ - output_times_all_cycles_cumul, obs_days_all_cycles_cumul \ - = set_fcst_output_times_and_obs_days_all_cycles( \ - date_first_cycl, date_last_cycl, cycl_intvl, fcst_len, fcst_output_intvl) - - # Add the list generated above to the dictionary containing workflow - # configuration variables. These will be needed in generating the ROCOTO - # XML. - workflow_config['OUTPUT_TIMES_ALL_CYCLES_INST'] = output_times_all_cycles_inst - workflow_config['OBS_DAYS_ALL_CYCLES_INST'] = obs_days_all_cycles_inst - workflow_config['OUTPUT_TIMES_ALL_CYCLES_CUMUL'] = output_times_all_cycles_cumul - workflow_config['OBS_DAYS_ALL_CYCLES_CUMUL'] = obs_days_all_cycles_cumul - - # Generate lists of ROCOTO cycledef strings corresonding to the obs days - # for instantaneous fields and those for cumulative ones. - cycledef_obs_days_inst = set_cycledefs_for_obs_days(obs_days_all_cycles_inst) - cycledef_obs_days_cumul = set_cycledefs_for_obs_days(obs_days_all_cycles_cumul) - # Save the lists of cycledefs in the dictionary containing values needed - # to construct the ROCOTO XML. - rocoto_config['cycledefs']['cycledef_obs_days_inst'] = cycledef_obs_days_inst - rocoto_config['cycledefs']['cycledef_obs_days_cumul'] = cycledef_obs_days_cumul - - # The "cycled_from_second" cycledef in the default workflow configuration - # file (default_workflow.yaml) requires the starting date of the second - # cycle. That is difficult to calculate in the yaml file itself because - # currently, there are no utilities to perform arithmetic with dates. - # Thus, we calculate it here and save it as a variable in the workflow - # configuration dictionary. Note that correct functioning of the default - # workflow yaml file also requires that DATE_[FIRST|SECOND|LAST]_CYCL all - # be strings, not datetime objects. We perform those conversions here. - date_second_cycl = date_first_cycl + cycl_intvl - workflow_config['DATE_FIRST_CYCL'] = datetime.datetime.strftime(date_first_cycl, "%Y%m%d%H") - workflow_config['DATE_SECOND_CYCL'] = datetime.datetime.strftime(date_second_cycl, "%Y%m%d%H") - workflow_config['DATE_LAST_CYCL'] = datetime.datetime.strftime(date_last_cycl, "%Y%m%d%H") - # set varying forecast lengths only when fcst_len_hrs=-1 if fcst_len_hrs == -1: fcst_len_cycl = workflow_config.get("FCST_LEN_CYCL") From ab1332d6b6a4d310de55ea8592727d5a94e672ff Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 27 Sep 2024 19:40:19 -0600 Subject: [PATCH 116/260] Bug fixes. --- ...ulticyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml | 2 +- ...ulticyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml index 41428a7939..418e47e95e 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml @@ -57,4 +57,4 @@ verification: VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml index 2fae0d6388..913d5093bb 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml @@ -59,4 +59,4 @@ verification: VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml index e150234a47..a859a03ac8 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml @@ -57,4 +57,4 @@ verification: VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml index d8eb349433..563b8852a8 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml @@ -57,4 +57,4 @@ verification: VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%HHH?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' From d50b4a0ce7fe8eff7d3341b506b5b360636cd4db Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 27 Sep 2024 19:47:24 -0600 Subject: [PATCH 117/260] Change name of cycledefs for obs days as was done in other files (python scripts) in previous commits. --- parm/wflow/verify_pre.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 80831f6f29..3ce65da55d 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -1,7 +1,7 @@ default_task_verify_pre: &default_task_verify_pre account: '&ACCOUNT;' attrs: - cycledefs: cycledef_obs_days_inst + cycledefs: cycledefs_obs_days_inst maxtries: '1' envars: &default_vars GLOBAL_VAR_DEFNS_FP: '&GLOBAL_VAR_DEFNS_FP;' @@ -24,7 +24,7 @@ default_task_verify_pre: &default_task_verify_pre task_get_obs_ccpa: <<: *default_task_verify_pre attrs: - cycledefs: cycledef_obs_days_cumul + cycledefs: cycledefs_obs_days_cumul maxtries: '1' command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: @@ -76,7 +76,7 @@ task_get_obs_ndas: task_run_MET_Pb2nc_obs: <<: *default_task_verify_pre attrs: - cycledefs: cycledef_obs_days_inst + cycledefs: cycledefs_obs_days_inst maxtries: '2' command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PB2NC_OBS"' envars: From 5b5e71f22e7290e1695c9cb9f382487a22ad908a Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 27 Sep 2024 19:49:38 -0600 Subject: [PATCH 118/260] Increase walltime for PcpCombine_fcst tasks since some WE2E tests are running out of time. --- parm/wflow/verify_pre.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 3ce65da55d..102eb9dafa 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -246,7 +246,7 @@ metatask_PcpCombine_fcst_APCP_all_accums_all_mems: attrs: age: 00:00:00:30 text: !cycstr '{{ workflow.EXPTDIR }}/@Y@m@d@H/post_files_exist_mem#mem#.txt' - walltime: 00:10:00 + walltime: 00:30:00 metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems: var: @@ -274,4 +274,4 @@ metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems: attrs: age: 00:00:00:30 text: !cycstr '{{ workflow.EXPTDIR }}/@Y@m@d@H/post_files_exist_mem#mem#.txt' - walltime: 00:10:00 + walltime: 00:30:00 From 2abd9df2df01426924de5641cf0b1cd7281035ee Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 27 Sep 2024 19:53:02 -0600 Subject: [PATCH 119/260] Remove unneeded environment variables from some tasks. --- parm/wflow/verify_det.yaml | 2 -- parm/wflow/verify_pre.yaml | 1 - 2 files changed, 3 deletions(-) diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index c4f420f10c..47be6bb42b 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -102,7 +102,6 @@ metatask_GridStat_MRMS_all_mems: <<: *default_vars OBS_DIR: '&MRMS_OBS_DIR;' VAR: '#VAR#' - ACCUM_HH: '01' METPLUSTOOLNAME: 'GRIDSTAT' OBTYPE: 'MRMS' ENSMEM_INDX: "#mem#" @@ -152,7 +151,6 @@ metatask_PointStat_NDAS_all_mems: VAR: '#VAR#' METPLUSTOOLNAME: 'POINTSTAT' OBTYPE: 'NDAS' - ACCUM_HH: '01' ENSMEM_INDX: "#mem#" SLASH_ENSMEM_SUBDIR_OR_NULL: '{% if global.DO_ENSEMBLE %}{{ "/mem#mem#" }}{% endif %}' OBS_AVAIL_INTVL_HRS: '{{- verification.NDAS_OBS_AVAIL_INTVL_HRS }}' diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 102eb9dafa..6561954d8f 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -154,7 +154,6 @@ metatask_check_post_output_all_mems: envars: <<: *default_vars VAR: APCP - ACCUM_HH: '01' ENSMEM_INDX: '#mem#' dependency: # This "or" checks that the necessary stand-alone post tasks or forecast From 97f0a9c43a4e6a743c5a53d1d32aab352a7f2c90 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 27 Sep 2024 19:58:52 -0600 Subject: [PATCH 120/260] Bug fix for PcpCombine path. --- scripts/exregional_run_met_gridstat_or_pointstat_vx.sh | 7 +++++-- scripts/exregional_run_met_pcpcombine.sh | 9 +++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index 263d22053f..aca2795018 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -144,6 +144,7 @@ ensmem_name="mem${ensmem_indx}" if [ "${RUN_ENVIR}" = "nco" ]; then slash_cdate_or_null="" slash_ensmem_subdir_or_null="" + slash_obs_or_null="" else slash_cdate_or_null="/${CDATE}" # @@ -160,8 +161,10 @@ else # if [ $(boolify "${DO_ENSEMBLE}") = "TRUE" ]; then slash_ensmem_subdir_or_null="/${ensmem_name}" + slash_obs_or_null="/obs" else slash_ensmem_subdir_or_null="" + slash_obs_or_null="" fi fi @@ -169,13 +172,13 @@ if [ "${grid_or_point}" = "grid" ]; then case "${FIELDNAME_IN_MET_FILEDIR_NAMES}" in "APCP"*) - OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}${slash_obs_or_null}/metprd/PcpCombine_obs" OBS_INPUT_FN_TEMPLATE="${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}/metprd/PcpCombine_fcst" FCST_INPUT_FN_TEMPLATE="${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "ASNOW"*) - OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}${slash_obs_or_null}/metprd/PcpCombine_obs" OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}/metprd/PcpCombine_fcst" FCST_INPUT_FN_TEMPLATE="${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 97d156aa62..89d375b7c9 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -147,6 +147,7 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then if [ "${RUN_ENVIR}" = "nco" ]; then slash_cdate_or_null="" slash_ensmem_subdir_or_null="" + slash_obs_or_null="" else slash_cdate_or_null="/${CDATE}" # @@ -169,7 +170,11 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then fi elif [ "${FCST_OR_OBS}" = "OBS" ]; then slash_cdate_or_null="/${CDATE}" - slash_ensmem_subdir_or_null="/obs" + if [ $(boolify "${DO_ENSEMBLE}") = "TRUE" ]; then + slash_obs_or_null="/obs" + else + slash_obs_or_null="" + fi fi OBS_INPUT_DIR="" @@ -193,7 +198,7 @@ elif [ "${FCST_OR_OBS}" = "OBS" ]; then fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE}) OBS_INPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) - OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}" + OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_obs_or_null}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_obs" fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE_PCPCOMBINE_OUTPUT}) OUTPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) From 1c20ad4f40962cd35f8f13af3654ed01b80a5649 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sat, 28 Sep 2024 08:36:22 -0600 Subject: [PATCH 121/260] Modify metatask and task names for clarity; fix paths; add pcpcombine task for NOHRSC; fix task dependencies. --- ...C_OBS => JREGIONAL_RUN_MET_PB2NC_OBS_NDAS} | 2 +- parm/wflow/verify_det.yaml | 12 ++--- parm/wflow/verify_ens.yaml | 27 +++++----- parm/wflow/verify_pre.yaml | 50 +++++++++++++++++-- ...h => exregional_run_met_pb2nc_obs_ndas.sh} | 9 ++-- scripts/exregional_run_met_pcpcombine.sh | 17 +++---- ush/setup.py | 4 +- 7 files changed, 78 insertions(+), 43 deletions(-) rename jobs/{JREGIONAL_RUN_MET_PB2NC_OBS => JREGIONAL_RUN_MET_PB2NC_OBS_NDAS} (98%) rename scripts/{exregional_run_met_pb2nc_obs.sh => exregional_run_met_pb2nc_obs_ndas.sh} (98%) diff --git a/jobs/JREGIONAL_RUN_MET_PB2NC_OBS b/jobs/JREGIONAL_RUN_MET_PB2NC_OBS_NDAS similarity index 98% rename from jobs/JREGIONAL_RUN_MET_PB2NC_OBS rename to jobs/JREGIONAL_RUN_MET_PB2NC_OBS_NDAS index 89c9bb73f4..a6ed90a1a3 100755 --- a/jobs/JREGIONAL_RUN_MET_PB2NC_OBS +++ b/jobs/JREGIONAL_RUN_MET_PB2NC_OBS_NDAS @@ -76,7 +76,7 @@ NDAS observations. # #----------------------------------------------------------------------- # -$SCRIPTSdir/exregional_run_met_pb2nc_obs.sh || \ +$SCRIPTSdir/exregional_run_met_pb2nc_obs_ndas.sh || \ print_err_msg_exit "\ Call to ex-script corresponding to J-job \"${scrfunc_fn}\" failed." # diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index 47be6bb42b..a08fe69e3e 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -50,7 +50,7 @@ metatask_GridStat_CCPA_all_accums_all_mems: and: taskdep_pcpcombine_obs: attrs: - task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h + task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h_CCPA taskdep_pcpcombine_fcst: attrs: task: run_MET_PcpCombine_fcst_APCP#ACCUM_HH#h_mem#mem# @@ -82,9 +82,9 @@ metatask_GridStat_NOHRSC_all_accums_all_mems: walltime: 02:00:00 dependency: and: - taskdep_get_obs_nohrsc: + taskdep_pcpcombine_obs: attrs: - task: get_obs_nohrsc + task: run_MET_PcpCombine_obs_ASNOW#ACCUM_HH#h_NOHRSC taskdep_pcpcombine_fcst: attrs: task: run_MET_PcpCombine_fcst_ASNOW#ACCUM_HH#h_mem#mem# @@ -159,7 +159,7 @@ metatask_PointStat_NDAS_all_mems: walltime: 01:00:00 dependency: and: - datadep_all_pb2nc_obs_complete: + datadep_all_pb2nc_obs_ndas_complete: attrs: age: 00:00:00:30 # Check that the flag files that indicate that the Pb2NC tasks are @@ -170,11 +170,11 @@ metatask_PointStat_NDAS_all_mems: {%- for n in range(0, num_obs_days) %} {%- set yyyymmdd = workflow.OBS_DAYS_ALL_CYCLES_INST[n] %} {%- if n == 0 %} - {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_" ~ yyyymmdd ~ "_complete.txt" }} + {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_ndas_" ~ yyyymmdd ~ "_complete.txt" }} {%- else %} {{- indent ~ "\n" }} {{- indent ~ "\n" }} - {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_" ~ yyyymmdd ~ "_complete.txt" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_ndas_" ~ yyyymmdd ~ "_complete.txt" }} {%- endif %} {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} {%- endfor %}' diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index 046849e126..f92aef4c60 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -48,9 +48,9 @@ metatask_GenEnsProd_EnsembleStat_CCPA: FCST_THRESH: 'none' dependency: and: - taskdep_pcpcombine_obs: &taskdep_pcpcombine_obs + taskdep_pcpcombine_obs_ccpa: &taskdep_pcpcombine_obs_ccpa attrs: - task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h + task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h_CCPA taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h @@ -83,9 +83,9 @@ metatask_GenEnsProd_EnsembleStat_NOHRSC: FCST_THRESH: 'none' dependency: and: - taskdep: + taskdep_pcpcombine_obs_nohrsc: &taskdep_pcpcombine_obs_nohrsc attrs: - task: get_obs_nohrsc + task: run_MET_PcpCombine_obs_ASNOW#ACCUM_HH#h_NOHRSC taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_ASNOW#ACCUM_HH#h @@ -169,7 +169,7 @@ metatask_GenEnsProd_EnsembleStat_NDAS: walltime: 01:00:00 dependency: and: - datadep_all_pb2nc_obs_complete: &all_pb2nc_obs_complete + datadep_all_pb2nc_obs_ndas_complete: &all_pb2nc_obs_ndas_complete attrs: age: 00:00:00:30 # Check that the flag files that indicate that the Pb2NC tasks are @@ -180,11 +180,11 @@ metatask_GenEnsProd_EnsembleStat_NDAS: {%- for n in range(0, num_obs_days) %} {%- set yyyymmdd = workflow.OBS_DAYS_ALL_CYCLES_INST[n] %} {%- if n == 0 %} - {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_" ~ yyyymmdd ~ "_complete.txt" }} + {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_ndas_" ~ yyyymmdd ~ "_complete.txt" }} {%- else %} {{- indent ~ "\n" }} {{- indent ~ "\n" }} - {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_" ~ yyyymmdd ~ "_complete.txt" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_ndas_" ~ yyyymmdd ~ "_complete.txt" }} {%- endif %} {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} {%- endfor %}' @@ -213,8 +213,8 @@ metatask_GridStat_CCPA_ensmeanprob_all_accums: FCST_THRESH: 'all' dependency: and: - taskdep_pcpcombine_obs: - <<: *taskdep_pcpcombine_obs + taskdep_pcpcombine_obs_ccpa: + <<: *taskdep_pcpcombine_obs_ccpa taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h @@ -240,9 +240,8 @@ metatask_GridStat_NOHRSC_ensmeanprob_all_accums: FCST_THRESH: 'all' dependency: and: - taskdep: - attrs: - task: get_obs_nohrsc + taskdep_pcpcombine_obs_nohrsc: + <<: *taskdep_pcpcombine_obs_nohrsc taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_ASNOW#ACCUM_HH#h @@ -291,8 +290,8 @@ metatask_PointStat_NDAS_ensmeanprob: FCST_THRESH: 'all' dependency: and: - datadep_all_pb2nc_obs_complete: - <<: *all_pb2nc_obs_complete + datadep_all_pb2nc_obs_ndas_complete: + <<: *all_pb2nc_obs_ndas_complete taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_#VAR# diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 6561954d8f..220b029412 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -73,12 +73,12 @@ task_get_obs_ndas: partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' walltime: 02:00:00 -task_run_MET_Pb2nc_obs: +task_run_MET_Pb2nc_obs_NDAS: <<: *default_task_verify_pre attrs: cycledefs: cycledefs_obs_days_inst maxtries: '2' - command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PB2NC_OBS"' + command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PB2NC_OBS_NDAS"' envars: <<: *default_vars VAR: ADPSFC @@ -100,10 +100,10 @@ task_run_MET_Pb2nc_obs: attrs: task: get_obs_ndas -metatask_PcpCombine_obs: +metatask_PcpCombine_obs_APCP_all_accums_CCPA: var: ACCUM_HH: '{% for ah in verification.VX_APCP_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' - task_run_MET_PcpCombine_obs_APCP#ACCUM_HH#h: + task_run_MET_PcpCombine_obs_APCP#ACCUM_HH#h_CCPA: <<: *default_task_verify_pre attrs: cycledefs: forecast @@ -142,6 +142,48 @@ metatask_PcpCombine_obs: {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} {%- endfor %}' +metatask_PcpCombine_obs_ASNOW_all_accums_NOHRSC: + var: + ACCUM_HH: '{% for ah in verification.VX_ASNOW_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' + task_run_MET_PcpCombine_obs_ASNOW#ACCUM_HH#h_NOHRSC: + <<: *default_task_verify_pre + attrs: + cycledefs: forecast + maxtries: '2' + command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PCPCOMBINE"' + envars: + <<: *default_vars + VAR: ASNOW + ACCUM_HH: '#ACCUM_HH#' + FCST_OR_OBS: OBS + OBTYPE: NOHRSC + OBS_DIR: '&NOHRSC_OBS_DIR;' + OBS_AVAIL_INTVL_HRS: '{{- verification.NOHRSC_OBS_AVAIL_INTVL_HRS }}' + METPLUSTOOLNAME: 'PCPCOMBINE' + dependency: + and: + datadep: + text: "&NOHRSC_OBS_DIR;" + datadep_all_get_obs_nohrsc_complete: + attrs: + age: 00:00:00:30 + # Check that the flag files that indicate that the get_obs_nohrsc tasks + # are complete are all present before launching any PcpCombine task. + text: '{%- set num_obs_days = workflow.OBS_DAYS_ALL_CYCLES_CUMUL|length %} + {%- set indent = " " %} + {%- set indent_p2 = indent + " " %} + {%- for n in range(0, num_obs_days) %} + {%- set yyyymmdd = workflow.OBS_DAYS_ALL_CYCLES_CUMUL[n] %} + {%- if n == 0 %} + {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/get_obs_nohrsc_" ~ yyyymmdd ~ "_complete.txt" }} + {%- else %} + {{- indent ~ "\n" }} + {{- indent ~ "\n" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/get_obs_nohrsc_" ~ yyyymmdd ~ "_complete.txt" }} + {%- endif %} + {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} + {%- endfor %}' + metatask_check_post_output_all_mems: var: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' diff --git a/scripts/exregional_run_met_pb2nc_obs.sh b/scripts/exregional_run_met_pb2nc_obs_ndas.sh similarity index 98% rename from scripts/exregional_run_met_pb2nc_obs.sh rename to scripts/exregional_run_met_pb2nc_obs_ndas.sh index fbf3ec1689..01e0362cc3 100755 --- a/scripts/exregional_run_met_pb2nc_obs.sh +++ b/scripts/exregional_run_met_pb2nc_obs_ndas.sh @@ -154,8 +154,7 @@ for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do fp="${OBS_INPUT_DIR}/${fn}" if [[ -f "${fp}" ]]; then print_info_msg " -Found ${OBTYPE} obs file corresponding to observation retrieval time -(yyyymmddhh): +Found ${OBTYPE} obs file corresponding to observation retrieval time (yyyymmddhh): yyyymmddhh = \"${yyyymmddhh}\" fp = \"${fp}\" " @@ -263,8 +262,8 @@ metplus_config_tmpl_fn="${MetplusToolName}_obs" # information, but we still include that info in the file name so that # the behavior in the two modes is as similar as possible. # -metplus_config_fn="${metplus_config_tmpl_fn}_${CDATE}" -metplus_log_fn="${metplus_config_fn}" +metplus_config_fn="${metplus_config_tmpl_fn}_NDAS_${CDATE}" +metplus_log_fn="${metplus_config_fn}_NDAS" # # Add prefixes and suffixes (extensions) to the base file names. # @@ -379,7 +378,7 @@ METplus configuration file used is: #----------------------------------------------------------------------- # mkdir -p ${WFLOW_FLAG_FILES_DIR} -touch "${WFLOW_FLAG_FILES_DIR}/run_met_pb2nc_obs_${PDY}_complete.txt" +touch "${WFLOW_FLAG_FILES_DIR}/run_met_pb2nc_obs_ndas_${PDY}_complete.txt" # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 89d375b7c9..43da23ca2e 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -320,18 +320,13 @@ fi # First, set the base file names. # metplus_config_tmpl_fn="${MetplusToolName}" -metplus_config_fn="${metplus_config_tmpl_fn}_$(echo_lowercase ${FCST_OR_OBS})_${FIELDNAME_IN_MET_FILEDIR_NAMES}${ENSMEM_INDX:+_${ensmem_name}}" -metplus_log_fn="${metplus_config_fn}_$CDATE" -# -# If operating on observation files, append the cycle date to the name -# of the configuration file because in this case, the output files from -# METplus are not placed under cycle directories (so another method is -# necessary to associate the configuration file with the cycle for which -# it is used). -# -if [ "${FCST_OR_OBS}" = "OBS" ]; then - metplus_config_fn="${metplus_log_fn}" +if [ "${FCST_OR_OBS}" = "FCST" ]; then + suffix="${ENSMEM_INDX:+_${ensmem_name}}" +elif [ "${FCST_OR_OBS}" = "OBS" ]; then + suffix="_${OBTYPE}" fi +metplus_config_fn="${metplus_config_tmpl_fn}_$(echo_lowercase ${FCST_OR_OBS})_${FIELDNAME_IN_MET_FILEDIR_NAMES}${suffix}" +metplus_log_fn="${metplus_config_fn}_$CDATE" # # Add prefixes and suffixes (extensions) to the base file names. # diff --git a/ush/setup.py b/ush/setup.py index 8aaec0ef90..81e82cc9e6 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -656,7 +656,7 @@ def remove_tag(tasks, tag): vx_fields_all["CCPA"] = ["APCP"] vx_metatasks_all["CCPA"] = ["task_get_obs_ccpa", - "metatask_PcpCombine_obs_CCPA_all_accums", + "metatask_PcpCombine_obs_APCP_all_accums_CCPA", "metatask_PcpCombine_fcst_APCP_all_accums_all_mems", "metatask_GridStat_CCPA_all_accums_all_mems", "metatask_GenEnsProd_EnsembleStat_CCPA", @@ -664,7 +664,7 @@ def remove_tag(tasks, tag): vx_fields_all["NOHRSC"] = ["ASNOW"] vx_metatasks_all["NOHRSC"] = ["task_get_obs_nohrsc", - "metatask_PcpCombine_obs_NOHRSC_all_accums", + "metatask_PcpCombine_obs_ASNOW_all_accums_NOHRSC", "metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems", "metatask_GridStat_NOHRSC_all_accums_all_mems", "metatask_GenEnsProd_EnsembleStat_NOHRSC", From 057ba700fb72c75a45d3b921ecef5cb5c72179a8 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 29 Sep 2024 17:57:34 -0600 Subject: [PATCH 122/260] Make adjustments to ASNOW settings to account for the fact that for ASNOW obs, it's the netcdf files that come out of PcpCombine_obs tasks that are used for verification. --- parm/metplus/EnsembleStat.conf | 9 +-------- parm/metplus/GridStat_ensmean.conf | 9 +-------- parm/metplus/GridStat_ensprob.conf | 9 +-------- scripts/exregional_run_met_genensprod_or_ensemblestat.sh | 4 ++-- ...xregional_run_met_gridstat_or_pointstat_vx_ensprob.sh | 4 ++-- 5 files changed, 7 insertions(+), 28 deletions(-) diff --git a/parm/metplus/EnsembleStat.conf b/parm/metplus/EnsembleStat.conf index 3759d5d8a1..ce38b2d209 100644 --- a/parm/metplus/EnsembleStat.conf +++ b/parm/metplus/EnsembleStat.conf @@ -516,15 +516,8 @@ PcpCombine tool. In that file, the field name consists of the observation field name here (field_obs) with the accumulation period appended to it (separated by an underscore), so we must do the same here to get an exact match. - -Note: -Turns out for ASNOW, PcpCombine is not run for obs, so we exclude that -from the "if" clause here (so it goes into the "else"). For workflow -behavior uniformity between APCP and ASNOW, consider running PcpCombine -for ASNOW observations as well (just as it's run for APCP observations). - {%- if (input_field_group in ['APCP', 'ASNOW']) %} #} - {%- if (input_field_group in ['APCP']) %} + {%- if (input_field_group in ['APCP', 'ASNOW']) %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}}_{{accum_hh}} {%- else %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}} diff --git a/parm/metplus/GridStat_ensmean.conf b/parm/metplus/GridStat_ensmean.conf index 0cfaa707bf..21d23ac4eb 100644 --- a/parm/metplus/GridStat_ensmean.conf +++ b/parm/metplus/GridStat_ensmean.conf @@ -400,15 +400,8 @@ PcpCombine tool. In that file, the field name consists of the observation field name here (field_obs) with the accumulation period appended to it (separated by an underscore), so we must do the same here to get an exact match. - -Note: -Turns out for ASNOW, PcpCombine is not run for obs, so we exclude that -from the "if" clause here (so it goes into the "else"). For workflow -behavior uniformity between APCP and ASNOW, consider running PcpCombine -for ASNOW observations as well (just as it's run for APCP observations). - {%- if (input_field_group in ['APCP', 'ASNOW']) %} #} - {%- if (input_field_group in ['APCP']) %} + {%- if (input_field_group in ['APCP', 'ASNOW']) %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}}_{{accum_hh}} {%- else %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}} diff --git a/parm/metplus/GridStat_ensprob.conf b/parm/metplus/GridStat_ensprob.conf index 6c34eb6ba0..abde89ef4b 100644 --- a/parm/metplus/GridStat_ensprob.conf +++ b/parm/metplus/GridStat_ensprob.conf @@ -354,15 +354,8 @@ PcpCombine tool. In that file, the field name consists of the observation field name here (field_obs) with the accumulation period appended to it (separated by an underscore), so we must do the same here to get an exact match. - -Note: -Turns out for ASNOW, PcpCombine is not run for obs, so we exclude that -from the "if" clause here (so it goes into the "else"). For workflow -behavior uniformity between APCP and ASNOW, consider running PcpCombine -for ASNOW observations as well (just as it's run for APCP observations). - {%- if (input_field_group in ['APCP', 'ASNOW']) %} #} - {%- if (input_field_group in ['APCP']) %} + {%- if (input_field_group in ['APCP', 'ASNOW']) %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}}_{{accum_hh}} {%- else %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}} diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 73d98754b4..9e1d0bd390 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -140,8 +140,8 @@ if [ "${grid_or_point}" = "grid" ]; then FCST_INPUT_DIR="${vx_output_basedir}" ;; "ASNOW"*) - OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" + OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" FCST_INPUT_DIR="${vx_output_basedir}" ;; "REFC") diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 9a8c35d1cb..6a8da3166d 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -127,8 +127,8 @@ if [ "${grid_or_point}" = "grid" ]; then OBS_INPUT_FN_TEMPLATE="${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "ASNOW"*) - OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" + OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "REFC") OBS_INPUT_DIR="${OBS_DIR}" From abf2014db39ae292d29264c9e6d592708893e8e7 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 1 Oct 2024 16:45:26 -0600 Subject: [PATCH 123/260] Clarify informational message. --- ush/mrms_pull_topofhour.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/mrms_pull_topofhour.py b/ush/mrms_pull_topofhour.py index 310c5d97f9..32f511c393 100644 --- a/ush/mrms_pull_topofhour.py +++ b/ush/mrms_pull_topofhour.py @@ -43,7 +43,7 @@ def main(): valid = datetime.datetime(YYYY, MM, DD, HH, 0, 0) valid_str = valid.strftime("%Y%m%d") - print(f"Pulling {args.valid_time} MRMS data") + print(f"Pulling MRMS product {args.product} for valid time: {args.valid_time}") # Set up working directory From 8937a8c472987d4f2f010d1ca39d5c436d29ab2a Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 1 Oct 2024 16:46:00 -0600 Subject: [PATCH 124/260] Comment out debugging lines to reduce clutter. --- ush/bash_utils/eval_METplus_timestr_tmpl.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ush/bash_utils/eval_METplus_timestr_tmpl.sh b/ush/bash_utils/eval_METplus_timestr_tmpl.sh index ae4a1c7ebf..a4421958ee 100644 --- a/ush/bash_utils/eval_METplus_timestr_tmpl.sh +++ b/ush/bash_utils/eval_METplus_timestr_tmpl.sh @@ -62,7 +62,7 @@ function eval_METplus_timestr_tmpl() { # #----------------------------------------------------------------------- # - print_input_args "valid_args" +# print_input_args "valid_args" # #----------------------------------------------------------------------- # @@ -217,7 +217,7 @@ function eval_single_METplus_timefmt() { # #----------------------------------------------------------------------- # - print_input_args "valid_args" +# print_input_args "valid_args" # #----------------------------------------------------------------------- # From 841e141853d7cd9d19c7b79d5f306ae28ceb99f4 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 2 Oct 2024 16:31:43 -0600 Subject: [PATCH 125/260] Ensure that the observation file name templates specified in the SRW App configuration file are used when running the Pb2nc task (as opposed to hard-coding file names). --- scripts/exregional_run_met_pb2nc_obs_ndas.sh | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/scripts/exregional_run_met_pb2nc_obs_ndas.sh b/scripts/exregional_run_met_pb2nc_obs_ndas.sh index 01e0362cc3..77cdb1221d 100755 --- a/scripts/exregional_run_met_pb2nc_obs_ndas.sh +++ b/scripts/exregional_run_met_pb2nc_obs_ndas.sh @@ -79,6 +79,11 @@ to convert NDAS prep buffer observation files to NetCDF format. #----------------------------------------------------------------------- # yyyymmdd_task=${PDY} + +# Seconds since some reference time that the DATE_UTIL utility uses of +# the day of the current task. This will be used below to find hours +# since the start of this day. +sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) # #----------------------------------------------------------------------- # @@ -150,8 +155,17 @@ num_missing_files=0 for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - fn="prepbufr.ndas.${yyyymmddhh}" - fp="${OBS_INPUT_DIR}/${fn}" + + # Set the full path to the final processed obs file (fp_proc) we want to + # create. + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}" \ + outvarname_evaluated_timestr="fp" + if [[ -f "${fp}" ]]; then print_info_msg " Found ${OBTYPE} obs file corresponding to observation retrieval time (yyyymmddhh): From 2e6299ff3d2d8258b0318cd38059eba40358a8f1 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 2 Oct 2024 16:44:03 -0600 Subject: [PATCH 126/260] Add argument to mrms_pull_topofhour.py that specifies whether a subdirectory having the name of the valid day (of the form "YYYYMMDD") should be assumed to exist under the specified source directory, and whether such a subdirectory should be created under the specified output directory. Previously, such a subdirectory was always assumed to exist/created; now, it is an option. --- ush/mrms_pull_topofhour.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/ush/mrms_pull_topofhour.py b/ush/mrms_pull_topofhour.py index 32f511c393..3e5b5ddb6e 100644 --- a/ush/mrms_pull_topofhour.py +++ b/ush/mrms_pull_topofhour.py @@ -20,6 +20,8 @@ def main(): help='Name of MRMS product') parser.add_argument('-l', '--level', type=str, help='MRMS product level', choices=['_00.50_','_18_00.50_']) + parser.add_argument('--add_vdate_subdir', default=True, required=False, action=argparse.BooleanOptionalAction, + help='Flag to add valid-date subdirectory to source and destination directories') parser.add_argument('-d', '--debug', action='store_true', help='Add additional debug output') args = parser.parse_args() @@ -47,14 +49,18 @@ def main(): # Set up working directory - dest_dir = os.path.join(args.outdir, valid_str) + valid_str_or_empty = '' + if args.add_vdate_subdir: + valid_str_or_empty = valid_str + + dest_dir = os.path.join(args.outdir, valid_str_or_empty) if not os.path.exists(dest_dir): os.makedirs(dest_dir) # Sort list of files for each MRMS product if args.debug: print(f"Valid date: {valid_str}") - search_path = f"{args.source}/{valid_str}/{args.product}*.gz" + search_path = os.path.join(args.source, valid_str_or_empty, args.product + "*.gz") file_list = [f for f in glob.glob(search_path)] if args.debug: print(f"Files found: \n{file_list}") @@ -78,7 +84,7 @@ def main(): if difference.total_seconds() <= 900: filename1 = f"{args.product}{args.level}{closest_timestamp.strftime('%Y%m%d-%H%M%S')}.grib2.gz" filename2 = f"{args.product}{args.level}{valid.strftime('%Y%m%d-%H')}0000.grib2" - origfile = os.path.join(args.source, valid_str, filename1) + origfile = os.path.join(args.source, valid_str_or_empty, filename1) target = os.path.join(dest_dir, filename2) if args.debug: From 8eed4a267f4c619146166d9564ca71d6273ef9d6 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 2 Oct 2024 16:51:53 -0600 Subject: [PATCH 127/260] When retrieving files from different data stores (e.g. NOAA's HPSS), make sure that the get_obs tasks place the files (and name them) according to the file name templates specified for each obs type in the SRW App configuration file. Also, remove the variable basedir_proc since it is redundant. --- ush/get_obs_ccpa.sh | 114 +++++++++++++-------- ush/get_obs_mrms.sh | 196 ++++++++++++++++++++++++------------ ush/get_obs_ndas.sh | 227 ++++++++++++++++++++++++------------------ ush/get_obs_nohrsc.sh | 104 ++++++++++++------- 4 files changed, 406 insertions(+), 235 deletions(-) diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh index aabb55e5a4..d3c486c607 100755 --- a/ush/get_obs_ccpa.sh +++ b/ush/get_obs_ccpa.sh @@ -167,7 +167,7 @@ if [ ${remainder} -ne 0 ]; then The obs availability interval CCPA_OBS_AVAIL_INTVL_HRS must divide evenly into 24 but doesn't: CCPA_OBS_AVAIL_INTVL_HRS = ${CCPA_OBS_AVAIL_INTVL_HRS} - mod(24, CCPA_OBS_AVAIL_INTVL_HRS) = ${remainder}" + 24 % CCPA_OBS_AVAIL_INTVL_HRS = ${remainder}" fi # Accumulation period to use when getting obs files. This is simply (a @@ -178,11 +178,10 @@ accum_obs_fmt=$( printf "%02d" "${CCPA_OBS_AVAIL_INTVL_HRS}" ) # task's cycledefs attribute in the ROCOTO xml. yyyymmdd_task=${PDY} -# Base directory in which the daily subdirectories containing the grib2 -# obs files will appear after this script is done. We refer to this as -# the "processed" base directory because it contains the files after all -# processing by this script is complete. -basedir_proc=${OBS_DIR} +# Seconds since some reference time that the DATE_UTIL utility uses of +# the day of the current task. This will be used below to find hours +# since the start of this day. +sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) # #----------------------------------------------------------------------- # @@ -231,16 +230,26 @@ else arcv_hr_end=$(( arcv_hr_end*arcv_hr_incr )) fi -# Check whether any obs files already exist on disk. If so, adjust the -# starting archive hour. In the process, keep a count of the number of -# obs files that already exist on disk. +# Check whether any obs files already exist on disk in their processed +# (i.e. final) locations. Here, by "processed" we mean after any renaming +# and rearrangement of files that this script may do to the "raw" files, +# i.e. the files as they are named and arranged within the archive (tar) +# files on HPSS. If so, adjust the starting archive hour. In the process, +# keep a count of the number of obs files that already exist on disk. num_existing_files=0 for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - day_dir_proc="${basedir_proc}/${yyyymmdd}" - fn_proc="ccpa.t${hh}z.${accum_obs_fmt}h.hrap.conus.gb2" - fp_proc="${day_dir_proc}/${fn_proc}" + + # Set the full path to the final processed obs file (fp_proc). + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${OBS_DIR}/${OBS_CCPA_APCP_FN_TEMPLATE}" \ + outvarname_evaluated_timestr="fp_proc" + if [[ -f ${fp_proc} ]]; then num_existing_files=$((num_existing_files+1)) print_info_msg " @@ -304,7 +313,8 @@ fi #----------------------------------------------------------------------- # -# Whether to move or copy files from raw to processed directories. +# Whether to move the files or copy them from their raw to their processed +# locations. #mv_or_cp="mv" mv_or_cp="cp" # Whether to remove raw observations after processed directories have @@ -317,11 +327,11 @@ if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then mv_or_cp="mv" fi -# Base directory that will contain the daily subdirectories in which the -# CCPA grib2 files retrieved from archive (tar) files will be placed. -# We refer to this as the "raw" base directory because it contains files +# Base directory that will contain the archive subdirectories in which +# the files extracted from each archive (tar) file will be placed. We +# refer to this as the "raw" base directory because it contains files # as they are found in the archives before any processing by this script. -basedir_raw="${basedir_proc}/${yyyymmdd_task}/raw" +basedir_raw="${OBS_DIR}/raw_${yyyymmdd_task}" for arcv_hr in ${arcv_hrs[@]}; do @@ -333,10 +343,10 @@ arcv_hr = ${arcv_hr}" yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) - # Directory that will contain the grib2 files retrieved from the current - # archive file. We refer to this as the "raw" archive directory because - # it will contain the files as they are in the archive before any processing - # by this script. + # Directory that will contain the files retrieved from the current archive + # file. We refer to this as the "raw" archive directory because it will + # contain the files as they are in the archive before any processing by + # this script. arcv_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" # Check whether any of the obs retrieval times for the day associated with @@ -378,12 +388,14 @@ The times at which obs need to be retrieved are: # The retrieve_data.py script first extracts the contents of the archive # file into the directory it was called from and then moves them to the - # specified output location (via the --output_path option). In order to - # avoid other get_obs_ccpa tasks (i.e. those associated with other days) - # from interfering with (clobbering) these files (because extracted files - # from different get_obs_ccpa tasks to have the same names or relative - # paths), we change location to the base raw directory so that files with - # same names are extracted into different directories. + # specified output location (via the --output_path option). Note that + # the relative paths of obs files within archives associted with different + # days may be the same. Thus, if files with the same archive-relative + # paths are being simultaneously extracted from multiple archive files + # (by multiple get_obs tasks), they will likely clobber each other if the + # extracton is being carried out into the same location on disk. To avoid + # this, we first change location to the raw base directory (whose name is + # obs-day dependent) and then call the retrieve_data.py script. cd ${basedir_raw} # Pull obs from HPSS. This will get all the obs files in the current @@ -402,36 +414,56 @@ The times at which obs need to be retrieved are: print_info_msg "CALLING: ${cmd}" $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." - # Create the processed CCPA grib2 files. This usually consists of just - # moving or copying the raw files to their processed location, but for - # times between 20180718 and 20210504 and hours-of-day 19 through the - # end of the day (i.e. hour 0 of the next day), it involves using wgrib2 - # to correct an error in the metadata of the raw file and writing the - # corrected data to a new grib2 file in the processed location. - for hrs_ago in $(seq 5 -1 0); do + # Loop over the raw obs files extracted from the current archive and + # generate from them the processed obs files. + # + # For CCPA obs, for most dates this consists of simply copying or moving + # the files from the raw archive directory to the processed directory, + # possibly renaming them in the process. However, for dates between + # 20180718 and 20210504 and hours-of-day 19 through the end of the day + # (i.e. hour 0 of the next day), it involves using wgrib2 to correct an + # error in the metadata of the raw file and writing the corrected data + # to a new grib2 file in the processed location. + for hrs_ago in $(seq 5 -${CCPA_OBS_AVAIL_INTVL_HRS} 0); do yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - # Create the processed grib2 obs file from the raw one (by moving, copying, - # or otherwise) only if the time of the current file in the current archive + # Create the processed obs file from the raw one (by moving, copying, or + # otherwise) only if the time of the current file in the current archive # also exists in the list of obs retrieval times for the current day. if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + + # The raw file name needs to be the same as what the retrieve_data.py + # script called above ends up retrieving. The list of possibile templates + # for this name is given in parm/data_locations.yml, but which of those + # is actually used is not known until retrieve_data.py completes. Thus, + # that information needs to be passed back by the script and used here. + # For now, we hard-code the file name here. fn_raw="ccpa.t${hh}z.${accum_obs_fmt}h.hrap.conus.gb2" fp_raw="${arcv_dir_raw}/${fn_raw}" - day_dir_proc="${basedir_proc}/${yyyymmdd}" - mkdir -p ${day_dir_proc} - fn_proc="${fn_raw}" - fp_proc="${day_dir_proc}/${fn_proc}" - hh_noZero=$((10#${hh})) + + # Set the full path to the final processed obs file (fp_proc) we want to + # create. + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${OBS_DIR}/${OBS_CCPA_APCP_FN_TEMPLATE}" \ + outvarname_evaluated_timestr="fp_proc" + mkdir -p $( dirname "${fp_proc}" ) + # CCPA files for 1-hour accumulation have incorrect metadata in the files # under the "00" directory from 20180718 to 20210504. After the data is # pulled, reorganize into correct yyyymmdd structure. + hh_noZero=$((10#${hh})) if [[ ${yyyymmdd} -ge 20180718 && ${yyyymmdd} -le 20210504 ]] && \ [[ (${hh_noZero} -ge 19 && ${hh_noZero} -le 23) || (${hh_noZero} -eq 0) ]]; then wgrib2 ${fp_raw} -set_date -24hr -grib ${fp_proc} -s else ${mv_or_cp} ${fp_raw} ${fp_proc} fi + fi done diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index a0d0590667..71eae52b9c 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -49,6 +49,18 @@ set -u #----------------------------------------------------------------------- # +# The time interval (in hours) at which the obs are available on HPSS +# must divide evenly into 24. Otherwise, different days would have obs +# available at different hours-of-day. Make sure this is the case. +remainder=$(( 24 % MRMS_OBS_AVAIL_INTVL_HRS )) +if [ ${remainder} -ne 0 ]; then + print_err_msg_exit "\ +The obs availability interval MRMS_OBS_AVAIL_INTVL_HRS must divide evenly +into 24 but doesn't: + MRMS_OBS_AVAIL_INTVL_HRS = ${MRMS_OBS_AVAIL_INTVL_HRS} + 24 % MRMS_OBS_AVAIL_INTVL_HRS = ${remainder}" +fi + # Create an array-valued counterpart of MRMS_FIELDS. MRMS_FIELDS is an # environment variable created in the ROCOTO XML. It is a scalar variable # because there doesn't seem to be a way to pass a bash array from the @@ -59,14 +71,17 @@ mrms_fields=($(printf "%s" "${MRMS_FIELDS}")) # corresponding to each. fields_in_filenames=() levels_in_filenames=() +obs_mrms_fp_templates=() for field in ${mrms_fields[@]}; do # Set field-dependent parameters needed in forming grib2 file names. if [ "${field}" = "REFC" ]; then fields_in_filenames+=("MergedReflectivityQCComposite") levels_in_filenames+=("00.50") + obs_mrms_fp_templates+=("${OBS_DIR}/${OBS_MRMS_REFC_FN_TEMPLATE}") elif [ "${field}" = "RETOP" ]; then fields_in_filenames+=("EchoTop") levels_in_filenames+=("18_00.50") + obs_mrms_fp_templates+=("${OBS_DIR}/${OBS_MRMS_RETOP_FN_TEMPLATE}") else print_err_msg_exit "\ Invalid field specified: @@ -79,11 +94,10 @@ done # task's cycledefs attribute in the ROCOTO xml. yyyymmdd_task=${PDY} -# Base directory in which the daily subdirectories containing the MRMS -# grib2 files will appear after this script is done. We refer to this -# as the "processed" base directory because it contains the files after -# all processing by this script is complete. -basedir_proc=${OBS_DIR} +# Seconds since some reference time that the DATE_UTIL utility uses of +# the day of the current task. This will be used below to find hours +# since the start of this day. +sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) # #----------------------------------------------------------------------- # @@ -95,45 +109,44 @@ basedir_proc=${OBS_DIR} array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) -# If there are no observation retrieval times on the day of the current -# task, exit the script. -num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} -if [[ ${num_obs_retrieve_times_crnt_day} -eq 0 ]]; then - print_info_msg " -None of the observation retrieval times fall within the day associated -with the current task (yyyymmdd_task): - yyyymmdd_task = \"${yyyymmdd_task}\" -Thus, there is no need to retrieve any obs files." - exit -fi - # Check whether any obs files already exist on disk. If so, adjust the # starting archive hour. In the process, keep a count of the number of # obs files that already exist on disk. num_existing_files=0 num_mrms_fields=${#mrms_fields[@]} -for (( i=0; i<${num_mrms_fields}; i++ )); do - for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do +for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do + for (( i=0; i<${num_mrms_fields}; i++ )); do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - day_dir_proc="${basedir_proc}/${yyyymmdd}" - fn_proc="${fields_in_filenames[$i]}_${levels_in_filenames[$i]}_${yyyymmdd}-${hh}0000.grib2" - fp_proc="${day_dir_proc}/${fn_proc}" + + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${obs_mrms_fp_templates[$i]}" \ + outvarname_evaluated_timestr="fp_proc" + if [[ -f ${fp_proc} ]]; then num_existing_files=$((num_existing_files+1)) print_info_msg " File already exists on disk: fp_proc = \"${fp_proc}\"" else - break + print_info_msg " +File does not exist on disk: + fp_proc = \"${fp_proc}\" +Will attempt to retrieve all obs files." + break 2 fi done done # If the number of obs files that already exist on disk is equal to the -# number of files needed, then there is no need to retrieve any files. -num_needed_files=$((num_obs_retrieve_times_crnt_day*num_mrms_fields)) -if [[ ${num_existing_files} -eq $((num_needed_files)) ]]; then +# number of obs files needed, then there is no need to retrieve any files. +num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} +if [[ ${num_existing_files} -eq ${num_obs_retrieve_times_crnt_day} ]]; then + print_info_msg " All obs files needed for the current day (yyyymmdd_task) already exist on disk: @@ -146,8 +159,9 @@ else At least some obs files needed needed for the current day (yyyymmdd_task) do not exist on disk: yyyymmdd_task = \"${yyyymmdd_task}\" -The number of obs files needed is: - num_needed_files = ${num_needed_files} +The number of obs files needed for the current day (which is equal to the +number of observation retrieval times for the current day) is: + num_obs_retrieve_times_crnt_day = ${num_obs_retrieve_times_crnt_day} The number of obs files that already exist on disk is: num_existing_files = ${num_existing_files} Will retrieve remaining files. @@ -162,7 +176,8 @@ fi #----------------------------------------------------------------------- # -# Whether to move or copy files from raw to processed directories. +# Whether to move the files or copy them from their raw to their processed +# locations. #mv_or_cp="mv" mv_or_cp="cp" # Whether to remove raw observations after processed directories have @@ -175,42 +190,52 @@ if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then mv_or_cp="mv" fi -# Base directory that will contain the daily subdirectories in which the -# MRMS grib2 files retrieved from archive (tar) files will be placed. -# We refer to this as the "raw" base directory because it contains files +# Base directory that will contain the archive subdirectories in which +# the files extracted from each archive (tar) file will be placed. We +# refer to this as the "raw" base directory because it contains files # as they are found in the archives before any processing by this script. -basedir_raw="${basedir_proc}/${yyyymmdd_task}/raw" +basedir_raw="${OBS_DIR}/raw_${yyyymmdd_task}" # Time associated with the archive. MRMS data have daily archives that # have the hour-of-day set to "00". yyyymmddhh_arcv="${yyyymmdd_task}00" -# Directory that will contain the MRMS grib2 files retrieved from the -# current 6-hourly archive file. We refer to this as the "raw" quarter- -# daily directory because it will contain the files as they are in the -# archive before any processing by this script. -day_dir_raw="${basedir_raw}/${yyyymmdd_task}" +# Directory that will contain the files retrieved from the current archive +# file. We refer to this as the "raw" archive directory because it will +# contain the files as they are in the archive before any processing by +# this script. +# +# Note: +# Normally, arcv_dir_raw should consist of basedir_raw and a subdirectory +# that depends on the archive date, e.g. +# +# arcv_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" +# +# but since for MRMS data there is only one archive per day, that directory +# is redundant, so simplicity we set arcv_dir_raw to just basedir_raw. +arcv_dir_raw="${basedir_raw}" -# Make sure the raw quarter-daily directory exists because it is used -# below as the output directory of the retrieve_data.py script (so if -# this directory doesn't already exist, that script will fail). Creating -# this directory also ensures that the raw base directory (basedir_raw) -# exists before we change location to it below. -mkdir -p ${day_dir_raw} +# Make sure the raw archive directory exists because it is used below as +# the output directory of the retrieve_data.py script (so if this directory +# doesn't already exist, that script will fail). Creating this directory +# also ensures that the raw base directory (basedir_raw) exists before we +# change location to it below. +mkdir -p ${arcv_dir_raw} # The retrieve_data.py script first extracts the contents of the archive # file into the directory it was called from and then moves them to the -# specified output location (via the --output_path option). In order to -# avoid other get_obs_ndas tasks (i.e. those associated with other days) -# from interfering with (clobbering) these files (because extracted files -# from different get_obs_ndas tasks to have the same names or relative -# paths), we change location to the base raw directory so that files with -# same names are extracted into different directories. +# specified output location (via the --output_path option). Note that +# the relative paths of obs files within archives associted with different +# days may be the same. Thus, if files with the same archive-relative +# paths are being simultaneously extracted from multiple archive files +# (by multiple get_obs tasks), they will likely clobber each other if the +# extracton is being carried out into the same location on disk. To avoid +# this, we first change location to the raw base directory (whose name is +# obs-day dependent) and then call the retrieve_data.py script. cd ${basedir_raw} -# Pull MRMS data from HPSS. This will get all 7 obs files in the current -# archive and place them in the raw quarter-daily directory, although we -# will make use of only 6 of these (we will not use the tm00 file). +# Pull obs from HPSS. This will get all the obs files in the current +# archive and place them in the raw archive directory. cmd=" python3 -u ${USHdir}/retrieve_data.py \ --debug \ @@ -219,7 +244,7 @@ python3 -u ${USHdir}/retrieve_data.py \ --cycle_date ${yyyymmddhh_arcv} \ --data_stores hpss \ --data_type MRMS_obs \ - --output_path ${day_dir_raw} \ + --output_path ${arcv_dir_raw} \ --summary_file retrieve_data.log" print_info_msg "CALLING: ${cmd}" @@ -233,19 +258,68 @@ $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." #----------------------------------------------------------------------- # -# Loop through all hours of the day associated with the task. For each -# hour, find the gzipped grib2 file in the raw daily directory that is -# closest in time to this hour. Then gunzip the file and copy it (in the -# process renaming it) to the processed location. -for hr in $(seq 0 1 23); do +# Loop over the raw obs files extracted from the current archive and +# generate from them the processed obs files. +# +# For MRMS obs, the raw obs consist of gzipped grib2 files that are +# usually a few minutes apart in time. However, because forecast data +# is available at most every hour, the SRW App configuration parameter +# MRMS_OBS_AVAIL_INTVL_HRS is set to 1 hour instead of a few minutes. +# Below, we loop over the whole day using this 1-hourly interval. For +# each hour of the day, we call the script mrms_pull_topofhour.py to find +# the gzipped grib2 file in the raw archive directory that is closest in +# time to the hour and unzip it in a temporary directory. We then copy +# or move it to the processed directory, possibly renaming it in the +# process. +for hr in $(seq 0 ${MRMS_OBS_AVAIL_INTVL_HRS} 23); do yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_task} ${hr} hours" +%Y%m%d%H) + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + # Create the processed obs file from the raw one (by moving, copying, or + # otherwise) only if the time of the current file in the current archive + # also exists in the list of obs retrieval times for the current day. if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then for (( i=0; i<${num_mrms_fields}; i++ )); do + + # First, select from the set of raw files for the current day those that + # are nearest in time to the current hour. Unzip these in a temporary + # subdirectory under the raw base directory. + # + # Note that the script we call to do this (mrms_pull_topofhour.py) assumes + # a certain file naming convention. That convention must match the names + # of the files that the retrieve_data.py script called above ends up + # retrieving. The list of possibile templates for these names is given + # in parm/data_locations.yml, but which of those is actually used is not + # known until retrieve_data.py completes. Thus, that information needs + # to be passed back by retrieve_data.py and then passed to mrms_pull_topofhour.py. + # For now, we hard-code the file name here. python ${USHdir}/mrms_pull_topofhour.py \ --valid_time ${yyyymmddhh} \ - --outdir ${basedir_proc} \ --source ${basedir_raw} \ - --product ${fields_in_filenames[$i]} + --outdir ${basedir_raw}/topofhour \ + --product ${fields_in_filenames[$i]} \ + --no-add_vdate_subdir + + # Set the name of and the full path to the raw obs file created by the + # mrms_pull_topofhour.py script. This name is currently hard-coded to + # the output of that script. In the future, it should be set in a more + # general way (e.g. obtain from a settings file). + fn_raw="${fields_in_filenames[$i]}_${levels_in_filenames[$i]}_${yyyymmdd_task}-${hh}0000.grib2" + fp_raw="${basedir_raw}/topofhour/${fn_raw}" + + # Set the full path to the final processed obs file (fp_proc) we want to + # create. + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${obs_mrms_fp_templates[$i]}" \ + outvarname_evaluated_timestr="fp_proc" + mkdir -p $( dirname "${fp_proc}" ) + + mv ${fp_raw} ${fp_proc} + done fi done diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh index 7ab6fc652b..45338714a2 100755 --- a/ush/get_obs_ndas.sh +++ b/ush/get_obs_ndas.sh @@ -58,18 +58,17 @@ if [ ${remainder} -ne 0 ]; then The obs availability interval NDAS_OBS_AVAIL_INTVL_HRS must divide evenly into 24 but doesn't: NDAS_OBS_AVAIL_INTVL_HRS = ${NDAS_OBS_AVAIL_INTVL_HRS} - mod(24, NDAS_OBS_AVAIL_INTVL_HRS) = ${remainder}" + 24 % NDAS_OBS_AVAIL_INTVL_HRS = ${remainder}" fi # The day (in the form YYYMMDD) associated with the current task via the # task's cycledefs attribute in the ROCOTO xml. yyyymmdd_task=${PDY} -# Base directory in which the daily subdirectories containing the NDAS -# prepbufr files will appear after this script is done. We refer to this -# as the "processed" base directory because it contains the files after -# all processing by this script is complete. -basedir_proc=${OBS_DIR} +# Seconds since some reference time that the DATE_UTIL utility uses of +# the day of the current task. This will be used below to find hours +# since the start of this day. +sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) # #----------------------------------------------------------------------- # @@ -80,23 +79,9 @@ basedir_proc=${OBS_DIR} # array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) - - - - - -# If there are no observation retrieval times on the day of the current -# task, exit the script. -num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} -if [[ ${num_obs_retrieve_times_crnt_day} -eq 0 ]]; then - print_info_msg " -None of the observation retrieval times fall within the day associated -with the current task (yyyymmdd_task): - yyyymmdd_task = \"${yyyymmdd_task}\" -Thus, there is no need to retrieve any obs files." - exit -fi - +# +#----------------------------------------------------------------------- +# # Obs files will be obtained by extracting them from the relevant 6-hourly # archives. Thus, we need the sequence of archive hours over which to # loop. In the simplest case, this sequence will be "6 12 18 24". This @@ -107,32 +92,45 @@ fi # # To generate this sequence, we first set its starting and ending values # as well as the interval. +# +#----------------------------------------------------------------------- +# # Sequence interval must be 6 hours because the archives are 6-hourly. arcv_hr_incr=6 -# Initial guess for starting archive hour. This is set to the archive -# hour containing obs at the first observation retrieval time of the day. +# Initial guess for starting archive hour. This is set to the archive +# hour containing obs at the first obs retrieval time of the day. hh_first=$(echo ${obs_retrieve_times_crnt_day[0]} | cut -c9-10) hr_first=$((10#${hh_first})) arcv_hr_start=$(( (hr_first/arcv_hr_incr + 1)*arcv_hr_incr )) # Ending archive hour. This is set to the archive hour containing obs at -# the last observation retrieval time of the day. +# the last obs retrieval time of the day. hh_last=$(echo ${obs_retrieve_times_crnt_day[-1]} | cut -c9-10) hr_last=$((10#${hh_last})) arcv_hr_end=$(( (hr_last/arcv_hr_incr + 1)*arcv_hr_incr )) -# Check whether any obs files already exist on disk. If so, adjust the -# starting archive hour. In the process, keep a count of the number of -# obs files that already exist on disk. +# Check whether any obs files already exist on disk in their processed +# (i.e. final) locations. Here, by "processed" we mean after any renaming +# and rearrangement of files that this script may do to the "raw" files, +# i.e. the files as they are named and arranged within the archive (tar) +# files on HPSS. If so, adjust the starting archive hour. In the process, +# keep a count of the number of obs files that already exist on disk. num_existing_files=0 for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - day_dir_proc="${basedir_proc}" - fn_proc="prepbufr.ndas.${yyyymmddhh}" - fp_proc="${day_dir_proc}/${fn_proc}" + + # Set the full path to the final processed obs file (fp_proc). + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}" \ + outvarname_evaluated_timestr="fp_proc" + if [[ -f ${fp_proc} ]]; then num_existing_files=$((num_existing_files+1)) print_info_msg " @@ -151,32 +149,39 @@ Setting the hour (since 00) of the first archive to retrieve to: done # If the number of obs files that already exist on disk is equal to the -# number of files needed, then there is no need to retrieve any files. -num_needed_files=$((num_obs_retrieve_times_crnt_day)) -if [[ ${num_existing_files} -eq ${num_needed_files} ]]; then +# number of obs files needed, then there is no need to retrieve any files. +num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} +if [[ ${num_existing_files} -eq ${num_obs_retrieve_times_crnt_day} ]]; then + print_info_msg " All obs files needed for the current day (yyyymmdd_task) already exist on disk: yyyymmdd_task = \"${yyyymmdd_task}\" Thus, there is no need to retrieve any files." exit -# Otherwise, will need to retrieve files. In this case, set the sequence -# of hours corresponding to the archives from which files will be retrieved. + +# If the number of obs files that already exist on disk is not equal to +# the number of obs files needed, then we will need to retrieve files. +# In this case, set the sequence of hours corresponding to the archives +# from which files will be retrieved. else + arcv_hrs=($(seq ${arcv_hr_start} ${arcv_hr_incr} ${arcv_hr_end})) arcv_hrs_str="( "$( printf "%s " "${arcv_hrs[@]}" )")" print_info_msg " At least some obs files needed needed for the current day (yyyymmdd_task) do not exist on disk: yyyymmdd_task = \"${yyyymmdd_task}\" -The number of obs files needed is: - num_needed_files = ${num_needed_files} +The number of obs files needed for the current day (which is equal to the +number of observation retrieval times for the current day) is: + num_obs_retrieve_times_crnt_day = ${num_obs_retrieve_times_crnt_day} The number of obs files that already exist on disk is: num_existing_files = ${num_existing_files} Will retrieve remaining files by looping over archives corresponding to the following hours (since 00 of this day): arcv_hrs = ${arcv_hrs_str} " + fi # #----------------------------------------------------------------------- @@ -188,7 +193,8 @@ fi #----------------------------------------------------------------------- # -# Whether to move or copy files from raw to processed directories. +# Whether to move the files or copy them from their raw to their processed +# locations. #mv_or_cp="mv" mv_or_cp="cp" # Whether to remove raw observations after processed directories have @@ -201,11 +207,11 @@ if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then mv_or_cp="mv" fi -# Base directory that will contain the daily subdirectories in which the -# NDAS prepbufr files retrieved from archive (tar) files will be placed. -# We refer to this as the "raw" base directory because it contains files +# Base directory that will contain the archive subdirectories in which +# the files extracted from each archive (tar) file will be placed. We +# refer to this as the "raw" base directory because it contains files # as they are found in the archives before any processing by this script. -basedir_raw="${basedir_proc}/raw_${yyyymmdd_task}" +basedir_raw="${OBS_DIR}/raw_${yyyymmdd_task}" for arcv_hr in ${arcv_hrs[@]}; do @@ -217,51 +223,65 @@ arcv_hr = ${arcv_hr}" yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) - # Directory that will contain the NDAS prepbufr files retrieved from the - # current 6-hourly archive file. We refer to this as the "raw" quarter- - # daily directory because it will contain the files as they are in the - # archive before any processing by this script. - qrtrday_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" + # Directory that will contain the files retrieved from the current archive + # file. We refer to this as the "raw" archive directory because it will + # contain the files as they are in the archive before any processing by + # this script. + arcv_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" - # Check whether any of the observation retrieval times for the day - # associated with this task fall in the time interval spanned by the - # current archive. If so, set the flag (do_retrieve) to retrieve the - # files in the current + # Check whether any of the obs retrieval times for the day associated with + # this task fall in the time interval spanned by the current archive. If + # so, set the flag (do_retrieve) to retrieve the files in the current # archive. - yyyymmddhh_qrtrday_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 6 hours ago" +%Y%m%d%H) - yyyymmddhh_qrtrday_end=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 1 hours ago" +%Y%m%d%H) + arcv_contents_yyyymmddhh_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 6 hours ago" +%Y%m%d%H) + arcv_contents_yyyymmddhh_end=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 1 hours ago" +%Y%m%d%H) do_retrieve="FALSE" for (( i=0; i<${num_obs_retrieve_times_crnt_day}; i++ )); do - retrieve_time=${obs_retrieve_times_crnt_day[i]} - if [[ "${retrieve_time}" -ge "${yyyymmddhh_qrtrday_start}" ]] && \ - [[ "${retrieve_time}" -le "${yyyymmddhh_qrtrday_end}" ]]; then + obs_retrieve_time=${obs_retrieve_times_crnt_day[i]} + if [[ "${obs_retrieve_time}" -ge "${arcv_contents_yyyymmddhh_start}" ]] && \ + [[ "${obs_retrieve_time}" -le "${arcv_contents_yyyymmddhh_end}" ]]; then do_retrieve="TRUE" break fi done - if [[ $(boolify "${do_retrieve}") == "TRUE" ]]; then + if [[ $(boolify "${do_retrieve}") != "TRUE" ]]; then + + print_info_msg " +None of the current day's observation retrieval times fall in the range +spanned by the current ${arcv_hr_incr}-hourly archive file. The bounds of the current +archive are: + arcv_contents_yyyymmddhh_start = \"${arcv_contents_yyyymmddhh_start}\" + arcv_contents_yyyymmddhh_end = \"${arcv_contents_yyyymmddhh_end}\" +The times at which obs need to be retrieved are: + obs_retrieve_times_crnt_day = ($(printf "\"%s\" " ${obs_retrieve_times_crnt_day[@]}))" + + else - # Make sure the raw quarter-daily directory exists because it is used - # below as the output directory of the retrieve_data.py script (so if - # this directory doesn't already exist, that script will fail). Creating - # this directory also ensures that the raw base directory (basedir_raw) - # exists before we change location to it below. - mkdir -p ${qrtrday_dir_raw} + # Make sure the raw archive directory exists because it is used below as + # the output directory of the retrieve_data.py script (so if this directory + # doesn't already exist, that script will fail). Creating this directory + # also ensures that the raw base directory (basedir_raw) exists before we + # change location to it below. + mkdir -p ${arcv_dir_raw} # The retrieve_data.py script first extracts the contents of the archive # file into the directory it was called from and then moves them to the - # specified output location (via the --output_path option). In order to - # avoid other get_obs_ndas tasks (i.e. those associated with other days) - # from interfering with (clobbering) these files (because extracted files - # from different get_obs_ndas tasks to have the same names or relative - # paths), we change location to the base raw directory so that files with - # same names are extracted into different directories. + # specified output location (via the --output_path option). Note that + # the relative paths of obs files within archives associted with different + # days may be the same. Thus, if files with the same archive-relative + # paths are being simultaneously extracted from multiple archive files + # (by multiple get_obs tasks), they will likely clobber each other if the + # extracton is being carried out into the same location on disk. To avoid + # this, we first change location to the raw base directory (whose name is + # obs-day dependent) and then call the retrieve_data.py script. cd ${basedir_raw} - # Pull NDAS data from HPSS. This will get all 7 obs files in the current - # archive and place them in the raw quarter-daily directory, although we - # will make use of only 6 of these (we will not use the tm00 file). + # Pull obs from HPSS. This will get all the obs files in the current + # archive and place them in the raw archive directory. + # Note that for the specific case of NDAS obs, this will get all 7 obs + # files in the current archive, although we will make use of only 6 of + # these (we will not use the tm00 file). cmd=" python3 -u ${USHdir}/retrieve_data.py \ --debug \ @@ -270,43 +290,56 @@ arcv_hr = ${arcv_hr}" --cycle_date ${yyyymmddhh_arcv} \ --data_stores hpss \ --data_type NDAS_obs \ - --output_path ${qrtrday_dir_raw} \ + --output_path ${arcv_dir_raw} \ --summary_file retrieve_data.log" print_info_msg "CALLING: ${cmd}" $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." - # Create the processed NDAS prepbufr files. This consists of simply - # copying or moving (and in the process renaming) them from the raw - # quarter-daily directory to the processed directory. Note that the - # tm06 files contain more/better observations than tm00 for the - # equivalent time, so we use those. - for hrs_ago in $(seq --format="%02g" 6 -1 1); do + # Loop over the raw obs files extracted from the current archive and + # generate from them the processed obs files. + # + # For NDAS obs, this consists of simply copying or moving the files from + # the raw archive directory to the processed directory, possibly renaming + # them in the process. + # + # Note that the tm06 file in a given archive contain more/better observations + # than the tm00 file in the next archive (their valid times are equivalent), + # so we use the tm06 files. + for hrs_ago in $(seq --format="%02g" 6 -${NDAS_OBS_AVAIL_INTVL_HRS} 1); do yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) + # Create the processed obs file from the raw one (by moving, copying, or + # otherwise) only if the time of the current file in the current archive + # also exists in the list of obs retrieval times for the current day. if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + + # The raw file name needs to be the same as what the retrieve_data.py + # script called above ends up retrieving. The list of possibile templates + # for this name is given in parm/data_locations.yml, but which of those + # is actually used is not known until retrieve_data.py completes. Thus, + # that information needs to be passed back by the script and used here. + # For now, we hard-code the file name here. fn_raw="nam.t${hh_arcv}z.prepbufr.tm${hrs_ago}.nr" - fp_raw="${qrtrday_dir_raw}/${fn_raw}" - day_dir_proc="${basedir_proc}" - mkdir -p ${day_dir_proc} - fn_proc="prepbufr.ndas.${yyyymmddhh}" - fp_proc="${day_dir_proc}/${fn_proc}" + fp_raw="${arcv_dir_raw}/${fn_raw}" + + # Set the full path to the final processed obs file (fp_proc) we want to + # create. + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}" \ + outvarname_evaluated_timestr="fp_proc" + mkdir -p $( dirname "${fp_proc}" ) + ${mv_or_cp} ${fp_raw} ${fp_proc} + fi done - else - - print_info_msg " -None of the current day's observation retrieval times fall in the range -spanned by the current 6-hourly archive file. The bounds of the current -archive are: - yyyymmddhh_qrtrday_start = \"${yyyymmddhh_qrtrday_start}\" - yyyymmddhh_qrtrday_end = \"${yyyymmddhh_qrtrday_end}\" -The observation retrieval times are: - obs_retrieve_times_crnt_day = ($(printf "\"%s\" " ${obs_retrieve_times_crnt_day[@]}))" - fi done diff --git a/ush/get_obs_nohrsc.sh b/ush/get_obs_nohrsc.sh index c71266ed07..5c56f8a8df 100755 --- a/ush/get_obs_nohrsc.sh +++ b/ush/get_obs_nohrsc.sh @@ -55,7 +55,7 @@ if [ ${remainder} -ne 0 ]; then The obs availability interval NOHRSC_OBS_AVAIL_INTVL_HRS must divide evenly into 24 but doesn't: NOHRSC_OBS_AVAIL_INTVL_HRS = ${NOHRSC_OBS_AVAIL_INTVL_HRS} - mod(24, NOHRSC_OBS_AVAIL_INTVL_HRS) = ${remainder}" + 24 % NOHRSC_OBS_AVAIL_INTVL_HRS = ${remainder}" fi # Accumulation period to use when getting obs files. This is simply (a @@ -66,11 +66,10 @@ accum_obs_fmt=$( printf "%d" "${NOHRSC_OBS_AVAIL_INTVL_HRS}" ) # task's cycledefs attribute in the ROCOTO xml. yyyymmdd_task=${PDY} -# Base directory in which the daily subdirectories containing the grib2 -# obs files will appear after this script is done. We refer to this as -# the "processed" base directory because it contains the files after all -# processing by this script is complete. -basedir_proc=${OBS_DIR} +# Seconds since some reference time that the DATE_UTIL utility uses of +# the day of the current task. This will be used below to find hours +# since the start of this day. +sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) # #----------------------------------------------------------------------- # @@ -119,16 +118,26 @@ else arcv_hr_end=$(( arcv_hr_end*arcv_hr_incr )) fi -# Check whether any obs files already exist on disk. If so, adjust the -# starting archive hour. In the process, keep a count of the number of -# obs files that already exist on disk. +# Check whether any obs files already exist on disk in their processed +# (i.e. final) locations. Here, by "processed" we mean after any renaming +# and rearrangement of files that this script may do to the "raw" files, +# i.e. the files as they are named and arranged within the archive (tar) +# files on HPSS. If so, adjust the starting archive hour. In the process, +# keep a count of the number of obs files that already exist on disk. num_existing_files=0 for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - day_dir_proc="${basedir_proc}" - fn_proc="sfav2_CONUS_${accum_obs_fmt}h_${yyyymmddhh}_grid184.grb2" - fp_proc="${day_dir_proc}/${fn_proc}" + + # Set the full path to the final processed obs file (fp_proc). + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${OBS_DIR}/${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" \ + outvarname_evaluated_timestr="fp_proc" + if [[ -f ${fp_proc} ]]; then num_existing_files=$((num_existing_files+1)) print_info_msg " @@ -205,7 +214,8 @@ fi #----------------------------------------------------------------------- # -# Whether to move or copy files from raw to processed directories. +# Whether to move the files or copy them from their raw to their processed +# locations. #mv_or_cp="mv" mv_or_cp="cp" # Whether to remove raw observations after processed directories have @@ -218,11 +228,11 @@ if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then mv_or_cp="mv" fi -# Base directory that will contain the daily subdirectories in which the -# NOHRSC grib2 files retrieved from archive (tar) files will be placed. -# We refer to this as the "raw" base directory because it contains files +# Base directory that will contain the archive subdirectories in which +# the files extracted from each archive (tar) file will be placed. We +# refer to this as the "raw" base directory because it contains files # as they are found in the archives before any processing by this script. -basedir_raw="${basedir_proc}/raw_${yyyymmdd_task}" +basedir_raw="${OBS_DIR}/raw_${yyyymmdd_task}" for arcv_hr in ${arcv_hrs[@]}; do @@ -234,10 +244,10 @@ arcv_hr = ${arcv_hr}" yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) - # Directory that will contain the grib2 files retrieved from the current - # archive file. We refer to this as the "raw" archive directory because - # it will contain the files as they are in the archive before any processing - # by this script. + # Directory that will contain the files retrieved from the current archive + # file. We refer to this as the "raw" archive directory because it will + # contain the files as they are in the archive before any processing by + # this script. arcv_dir_raw="${basedir_raw}/${yyyymmdd_arcv}" # Check whether any of the obs retrieval times for the day associated with @@ -279,12 +289,14 @@ The times at which obs need to be retrieved are: # The retrieve_data.py script first extracts the contents of the archive # file into the directory it was called from and then moves them to the - # specified output location (via the --output_path option). In order to - # avoid other get_obs_ccpa tasks (i.e. those associated with other days) - # from interfering with (clobbering) these files (because extracted files - # from different get_obs_ccpa tasks to have the same names or relative - # paths), we change location to the base raw directory so that files with - # same names are extracted into different directories. + # specified output location (via the --output_path option). Note that + # the relative paths of obs files within archives associted with different + # days may be the same. Thus, if files with the same archive-relative + # paths are being simultaneously extracted from multiple archive files + # (by multiple get_obs tasks), they will likely clobber each other if the + # extracton is being carried out into the same location on disk. To avoid + # this, we first change location to the raw base directory (whose name is + # obs-day dependent) and then call the retrieve_data.py script. cd ${basedir_raw} # Pull obs from HPSS. This will get all the obs files in the current @@ -303,23 +315,43 @@ The times at which obs need to be retrieved are: print_info_msg "CALLING: ${cmd}" $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." - # Create the processed NOHRSC grib2 files. This consists of simply copying - # or moving them from the raw daily directory to the processed directory. - for hrs in $(seq 0 6 18); do + # Loop over the raw obs files extracted from the current archive and + # generate from them the processed obs files. + # + # For NOHRSC obs, this consists of simply copying or moving the files from + # the raw archive directory to the processed directory, possibly renaming + # them in the process. + for hrs in $(seq 0 ${NOHRSC_OBS_AVAIL_INTVL_HRS} 23); do yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs} hours" +%Y%m%d%H) yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) - # Create the processed grib2 obs file from the raw one (by moving, copying, - # or otherwise) only if the time of the current file in the current archive + # Create the processed obs file from the raw one (by moving, copying, or + # otherwise) only if the time of the current file in the current archive # also exists in the list of obs retrieval times for the current day. if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then + + # The raw file name needs to be the same as what the retrieve_data.py + # script called above ends up retrieving. The list of possibile templates + # for this name is given in parm/data_locations.yml, but which of those + # is actually used is not known until retrieve_data.py completes. Thus, + # that information needs to be passed back by the script and used here. + # For now, we hard-code the file name here. fn_raw="sfav2_CONUS_${accum_obs_fmt}h_${yyyymmddhh}_grid184.grb2" fp_raw="${arcv_dir_raw}/${fn_raw}" - day_dir_proc="${basedir_proc}" - mkdir -p ${day_dir_proc} - fn_proc="${fn_raw}" - fp_proc="${day_dir_proc}/${fn_proc}" + + # Set the full path to the final processed obs file (fp_proc) we want to + # create. + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${OBS_DIR}/${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" \ + outvarname_evaluated_timestr="fp_proc" + mkdir -p $( dirname "${fp_proc}" ) + ${mv_or_cp} ${fp_raw} ${fp_proc} + fi done From 2357cd3d041ea005ad88f4451c9b963e11d4e243 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 3 Oct 2024 03:00:52 -0600 Subject: [PATCH 128/260] Fix bug introduced in previous set of changes. --- ush/get_obs_mrms.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh index 71eae52b9c..d13e374620 100755 --- a/ush/get_obs_mrms.sh +++ b/ush/get_obs_mrms.sh @@ -116,6 +116,7 @@ num_existing_files=0 num_mrms_fields=${#mrms_fields[@]} for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do for (( i=0; i<${num_mrms_fields}; i++ )); do + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) hh=$(echo ${yyyymmddhh} | cut -c9-10) @@ -139,13 +140,16 @@ File does not exist on disk: Will attempt to retrieve all obs files." break 2 fi + done done # If the number of obs files that already exist on disk is equal to the -# number of obs files needed, then there is no need to retrieve any files. +# number of obs files needed (which is num_mrms_fields times the number +# of obs retrieval times in the current day), then there is no need to +# retrieve any files. num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} -if [[ ${num_existing_files} -eq ${num_obs_retrieve_times_crnt_day} ]]; then +if [[ ${num_existing_files} -eq $((num_mrms_fields*num_obs_retrieve_times_crnt_day)) ]]; then print_info_msg " All obs files needed for the current day (yyyymmdd_task) already exist From f36a86628ba8f2949fb3c83b6e5b4acd445eeca9 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 3 Oct 2024 03:02:15 -0600 Subject: [PATCH 129/260] Modify existing vx WE2E test so it is able to find the staged NOHRSC obs file. --- .../config.MET_ensemble_verification_winter_wx.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml index 85a515f293..1845255f54 100644 --- a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml @@ -32,3 +32,6 @@ global: NUM_ENS_MEMBERS: 10 verification: VX_FIELDS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] + OBS_NOHRSC_ASNOW_FN_TEMPLATE: '{%- set data_intvl_hrs = "%02d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} + {{- "{valid?fmt=%Y%m%d}/sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' + From 6ba25960f3239b8b51706366dc0a64561f2474b8 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 6 Oct 2024 12:27:46 -0600 Subject: [PATCH 130/260] Bug fix. --- scripts/exregional_run_met_pb2nc_obs_ndas.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/exregional_run_met_pb2nc_obs_ndas.sh b/scripts/exregional_run_met_pb2nc_obs_ndas.sh index 77cdb1221d..55244b7fdf 100755 --- a/scripts/exregional_run_met_pb2nc_obs_ndas.sh +++ b/scripts/exregional_run_met_pb2nc_obs_ndas.sh @@ -192,9 +192,9 @@ done if [ "${num_missing_files}" -gt "${NUM_MISSING_OBS_FILES_MAX}" ]; then print_err_msg_exit "\ The number of missing ${OBTYPE} obs files (num_missing_files) is greater -than the maximum allowed number (num_missing_files_max): +than the maximum allowed number (NUM_MISSING_FILES_MAX): num_missing_files = ${num_missing_files} - num_missing_files_max = ${num_missing_files_max}" + NUM_MISSING_OBS_FILES_MAX = ${NUM_MISSING_OBS_FILES_MAX}" fi # Remove leading comma from LEADHR_LIST. From e3a05bf92a7a4fc05e85662b7f5585b2eec74945 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 6 Oct 2024 12:31:24 -0600 Subject: [PATCH 131/260] Change config variable name for clarity. --- scripts/exregional_run_met_genensprod_or_ensemblestat.sh | 2 +- scripts/exregional_run_met_gridstat_or_pointstat_vx.sh | 2 +- ...exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh | 2 +- ...exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh | 2 +- scripts/exregional_run_met_pb2nc_obs_ndas.sh | 6 +++--- ush/config_defaults.yaml | 8 ++++---- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 9e1d0bd390..67ae70c8b9 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -159,7 +159,7 @@ if [ "${grid_or_point}" = "grid" ]; then elif [ "${grid_or_point}" = "point" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" - OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" + OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" FCST_INPUT_DIR="${vx_fcst_input_basedir}" fi diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index aca2795018..e16b06cb46 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -200,7 +200,7 @@ if [ "${grid_or_point}" = "grid" ]; then elif [ "${grid_or_point}" = "point" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" - OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" + OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" FCST_INPUT_DIR="${vx_fcst_input_basedir}" FCST_INPUT_FN_TEMPLATE="${FCST_SUBDIR_TEMPLATE:+${FCST_SUBDIR_TEMPLATE}/}${FCST_FN_TEMPLATE}" diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index 5ad0560f28..adecb68bcd 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -144,7 +144,7 @@ if [ "${grid_or_point}" = "grid" ]; then elif [ "${grid_or_point}" = "point" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" - OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" + OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/metprd/GenEnsProd" fi diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 6a8da3166d..2c27a9a597 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -143,7 +143,7 @@ if [ "${grid_or_point}" = "grid" ]; then elif [ "${grid_or_point}" = "point" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" - OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" + OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" fi OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_INPUT_FN_TEMPLATE} ) diff --git a/scripts/exregional_run_met_pb2nc_obs_ndas.sh b/scripts/exregional_run_met_pb2nc_obs_ndas.sh index 55244b7fdf..e93387ed0a 100755 --- a/scripts/exregional_run_met_pb2nc_obs_ndas.sh +++ b/scripts/exregional_run_met_pb2nc_obs_ndas.sh @@ -136,11 +136,11 @@ set_vx_params \ vx_output_basedir=$( eval echo "${VX_OUTPUT_BASEDIR}" ) OBS_INPUT_DIR="${OBS_DIR}" -OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE} ) +OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE} ) OUTPUT_BASE="${vx_output_basedir}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_obs" -OUTPUT_FN_TEMPLATE=$( eval echo ${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT} ) +OUTPUT_FN_TEMPLATE=$( eval echo ${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT} ) STAGING_DIR="${OUTPUT_BASE}/stage/${MetplusToolName}_obs" # #----------------------------------------------------------------------- @@ -163,7 +163,7 @@ for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do eval_METplus_timestr_tmpl \ init_time="${yyyymmdd_task}00" \ fhr="${lhr}" \ - METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}" \ + METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE}" \ outvarname_evaluated_timestr="fp" if [[ -f "${fp}" ]]; then diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 56bd15b814..8a02964cc2 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2448,7 +2448,7 @@ verification: # OBS_MRMS_RETOP_FN_TEMPLATE: # File name template for MRMS echo top observations. # - # OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE: + # OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE: # File name template for NDAS surface and upper air observations. # This template is used by the workflow tasks that call the METplus Pb2nc # tool on NDAS obs to find the input observation files containing ADP @@ -2461,7 +2461,7 @@ verification: {{- "sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' OBS_MRMS_REFC_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' OBS_MRMS_RETOP_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' - OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE: 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' + OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE: 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' # # OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: # Template used to specify the names of the output NetCDF observation @@ -2470,7 +2470,7 @@ verification: # APCP, both for 1 hour and for > 1 hour accumulation periods, in NetCDF # format.) # - # OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: + # OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: # Template used to specify the names of the output NetCDF observation # files generated by the worfklow verification tasks that call the # METplus Pb2nc tool on NDAS observations. (These files will contain @@ -2480,7 +2480,7 @@ verification: {{- "ccpa.t{valid?fmt=%H}z." ~ data_intvl_hrs ~ "h.hrap.conus.gb2_a${ACCUM_HH}h.nc" }}' OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{%- set data_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} {{- "sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2_a${ACCUM_HH}h.nc" }}' - OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: '${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}.nc' + OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: '${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE}.nc' # # VX_FCST_MODEL_NAME: # String that specifies a descriptive name for the model being verified. From 84c54ba7ae450e0c9504ff11eeb944e24847cf1f Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 6 Oct 2024 16:37:40 -0600 Subject: [PATCH 132/260] Use new python script to get any of the allowed kinds of obs instead of the four separate shell scripts. --- scripts/exregional_get_verif_obs.sh | 102 +-- ush/get_obs.py | 885 +++++++++++++++++++++++++++ ush/get_obs_ccpa.sh | 484 --------------- ush/get_obs_mrms.sh | 341 ----------- ush/get_obs_ndas.sh | 357 ----------- ush/get_obs_nohrsc.sh | 372 ----------- ush/run_eval_METplus_timestr_tmpl.sh | 28 + 7 files changed, 926 insertions(+), 1643 deletions(-) create mode 100644 ush/get_obs.py delete mode 100755 ush/get_obs_ccpa.sh delete mode 100755 ush/get_obs_mrms.sh delete mode 100755 ush/get_obs_ndas.sh delete mode 100755 ush/get_obs_nohrsc.sh create mode 100755 ush/run_eval_METplus_timestr_tmpl.sh diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 4e981b3958..158218889e 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -46,92 +46,8 @@ done # #----------------------------------------------------------------------- # -# This script performs several important tasks for preparing data for -# verification tasks. Depending on the value of the environment variable -# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data -# set. -# -# If data is not available on disk (in the location specified by -# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), -# the script attempts to retrieve the data from HPSS using the retrieve_data.py -# script. Depending on the data set, there are a few strange quirks and/or -# bugs in the way data is organized; see in-line comments for details. -# -# -# CCPA (Climatology-Calibrated Precipitation Analysis) precipitation accumulation obs -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: -# -# {CCPA_OBS_DIR}/{YYYYMMDD}/ccpa.t{HH}z.01h.hrap.conus.gb2 -# -# If data is retrieved from HPSS, it will be automatically staged by this -# script. -# -# Notes about the data and how it's used for verification: -# -# 1. Accumulation is currently hardcoded to 01h. The verification will -# use MET/pcp-combine to sum 01h files into desired accumulations. -# -# 2. There is a problem with the valid time in the metadata for files -# valid from 19 - 00 UTC (or files under the '00' directory). This is -# accounted for in this script for data retrieved from HPSS, but if you -# have manually staged data on disk you should be sure this is accounted -# for. See in-line comments below for details. -# -# -# MRMS (Multi-Radar Multi-Sensor) radar observations -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: -# -# {MRMS_OBS_DIR}/{YYYYMMDD}/[PREFIX]{YYYYMMDD}-{HH}0000.grib2, -# -# Where [PREFIX] is MergedReflectivityQCComposite_00.50_ for reflectivity -# data and EchoTop_18_00.50_ for echo top data. If data is not available -# at the top of the hour, you should rename the file closest in time to -# your hour(s) of interest to the above naming format. A script -# "ush/mrms_pull_topofhour.py" is provided for this purpose. -# -# If data is retrieved from HPSS, it will automatically staged by this -# this script. -# -# -# NDAS (NAM Data Assimilation System) conventional observations -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: -# -# {NDAS_OBS_DIR}/{YYYYMMDD}/prepbufr.ndas.{YYYYMMDDHH} -# -# Note that data retrieved from HPSS and other sources may be in a -# different format: nam.t{hh}z.prepbufr.tm{prevhour}.nr, where hh is -# either 00, 06, 12, or 18, and prevhour is the number of hours prior to -# hh (00 through 05). If using custom staged data, you will have to -# rename the files accordingly. -# -# If data is retrieved from HPSS, it will be automatically staged by this -# this script. -# -# -# NOHRSC snow accumulation observations -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: -# -# {NOHRSC_OBS_DIR}/{YYYYMMDD}/sfav2_CONUS_{AA}h_{YYYYMMDD}{HH}_grid184.grb2 -# -# where AA is the 2-digit accumulation duration in hours: 06 or 24 -# -# METplus is configured to verify snowfall using 06- and 24-h accumulated -# snowfall from 6- and 12-hourly NOHRSC files, respectively. -# -# If data is retrieved from HPSS, it will automatically staged by this -# this script. +# Make sure the obs type is valid. Then call the python script get_obs.py +# to get the obs files. # #----------------------------------------------------------------------- # @@ -144,8 +60,15 @@ Valid observation types are: $(printf "\"%s\" " ${valid_obtypes[@]}) " fi -script_bn="get_obs_$(echo_lowercase ${OBTYPE})" -$USHdir/${script_bn}.sh + +script_bn="get_obs" +cmd="\ +python3 -u ${USHdir}/${script_bn}.py \ +--var_defns_path "${GLOBAL_VAR_DEFNS_FP}" \ +--obtype ${OBTYPE} \ +--obs_day ${PDY}" +print_info_msg "CALLING: ${cmd}" +${cmd} || print_err_msg_exit "Error calling ${script_bn}.py." # #----------------------------------------------------------------------- # @@ -155,7 +78,8 @@ $USHdir/${script_bn}.sh #----------------------------------------------------------------------- # mkdir -p ${WFLOW_FLAG_FILES_DIR} -touch "${WFLOW_FLAG_FILES_DIR}/${script_bn}_${PDY}_complete.txt" +file_bn="get_obs_$(echo_lowercase ${OBTYPE})" +touch "${WFLOW_FLAG_FILES_DIR}/${file_bn}_${PDY}_complete.txt" # #----------------------------------------------------------------------- # diff --git a/ush/get_obs.py b/ush/get_obs.py new file mode 100644 index 0000000000..f6e2fed265 --- /dev/null +++ b/ush/get_obs.py @@ -0,0 +1,885 @@ +#!/usr/bin/env python3 + +import os +import sys +import shutil +import argparse +import logging +from pathlib import Path +import datetime as dt +from textwrap import dedent +from pprint import pprint +from math import ceil, floor +import subprocess +from python_utils import ( + load_yaml_config, +) + + +def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): + """ + This file defines a function that, for the given observation type, obs + archive interval, and hour of day, returns the hour (counting from the + start of the day) corresponding to the archive file in which the obs file + for the given hour of day is included. + + Note that for cumulative fields (like CCPA and NOHRSC, as opposed to + instantaneous ones like MRMS and NDAS), the archive files corresponding + to hour 0 of the day represent accumulations over the previous day. Thus, + here, we never return an achive hour of 0 for cumulative fields. Instead, + if the specified hour-of-day is 0, we consider that to represent the 0th + hour of the NEXT day (i.e. the 24th hour of the current day) and set the + archive hour to 24. + + Args: + obtype: + The observation type. A string. + + arcv_intvl_hrs: + Time interval (in hours) between archive files. An integer. For example, + if the obs files are bundled into 6-hourly archives, then this will be + set to 6. This must be between 1 and 24 and must divide evenly into 24 + (this is checked for elsewhere). + + hod: + The hour of the day. An integer. This must be between 0 and 23. For + cumulative fields (CCPA and NOHRSC), hour 0 is treated as that of the + next day, i.e. as the 24th hour of the current day. + + Returns: + arcv_hr: + The hour since the start of day corresponding to the archive file containing + the obs file for the given hour of day. An integer. + """ + + valid_obtypes = ['CCPA', 'ccpa', 'NOHRSC', 'nohrsc', 'MRMS', 'mrms', 'NDAS', 'ndas'] + if obtype not in valid_obtypes: + msg = dedent(f""" + The specified observation type is not supported: + obtype = {obtype} + Valid observation types are: + {valid_obtypes} + """) + logging.error(msg) + raise Exception(msg) + + if (hod < 0) or (hod > 23): + msg = dedent(f""" + The specified hour-of-day must be between 0 and 23, inclusive but isn't: + hod = {hod} + """) + logging.error(msg) + raise Exception(msg) + + obtype_upper = obtype.upper() + if obtype_upper in ['CCPA']: + if hod == 0: + arcv_hr = 24 + else: + arcv_hr = ceil(hod/arcv_intvl_hrs)*arcv_intvl_hrs + elif obtype_upper in ['NOHRSC']: + if hod == 0: + arcv_hr = 24 + else: + arcv_hr = floor(hod/arcv_intvl_hrs)*arcv_intvl_hrs + elif obtype_upper in ['MRMS']: + arcv_hr = (floor(hod/arcv_intvl_hrs))*arcv_intvl_hrs + elif obtype_upper in ['NDAS']: + arcv_hr = (floor(hod/arcv_intvl_hrs) + 1)*arcv_intvl_hrs + + return arcv_hr + + +def get_obs(config, obtype, yyyymmdd_task): + """ +This script performs several important tasks for preparing data for +verification tasks. Depending on the value of the environment variable +OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data +set. + +If data is not available on disk (in the location specified by +CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), +the script attempts to retrieve the data from HPSS using the retrieve_data.py +script. Depending on the data set, there are a few strange quirks and/or +bugs in the way data is organized; see in-line comments for details. + + +CCPA (Climatology-Calibrated Precipitation Analysis) precipitation accumulation obs +---------- +If data is available on disk, it must be in the following +directory structure and file name conventions expected by verification +tasks: + +{CCPA_OBS_DIR}/{YYYYMMDD}/ccpa.t{HH}z.01h.hrap.conus.gb2 + +If data is retrieved from HPSS, it will be automatically staged by this +script. + +Notes about the data and how it's used for verification: + +1. Accumulation is currently hardcoded to 01h. The verification will +use MET/pcp-combine to sum 01h files into desired accumulations. + +2. There is a problem with the valid time in the metadata for files +valid from 19 - 00 UTC (or files under the '00' directory). This is +accounted for in this script for data retrieved from HPSS, but if you +have manually staged data on disk you should be sure this is accounted +for. See in-line comments below for details. + + +MRMS (Multi-Radar Multi-Sensor) radar observations +---------- +If data is available on disk, it must be in the following +directory structure and file name conventions expected by verification +tasks: + +{MRMS_OBS_DIR}/{YYYYMMDD}/[PREFIX]{YYYYMMDD}-{HH}0000.grib2, + +Where [PREFIX] is MergedReflectivityQCComposite_00.50_ for reflectivity +data and EchoTop_18_00.50_ for echo top data. If data is not available +at the top of the hour, you should rename the file closest in time to +your hour(s) of interest to the above naming format. A script +"ush/mrms_pull_topofhour.py" is provided for this purpose. + +If data is retrieved from HPSS, it will automatically staged by this +this script. + + +NDAS (NAM Data Assimilation System) conventional observations +---------- +If data is available on disk, it must be in the following +directory structure and file name conventions expected by verification +tasks: + +{NDAS_OBS_DIR}/{YYYYMMDD}/prepbufr.ndas.{YYYYMMDDHH} + +Note that data retrieved from HPSS and other sources may be in a +different format: nam.t{hh}z.prepbufr.tm{prevhour}.nr, where hh is +either 00, 06, 12, or 18, and prevhour is the number of hours prior to +hh (00 through 05). If using custom staged data, you will have to +rename the files accordingly. + +If data is retrieved from HPSS, it will be automatically staged by this +this script. + + +NOHRSC snow accumulation observations +---------- +If data is available on disk, it must be in the following +directory structure and file name conventions expected by verification +tasks: + +{NOHRSC_OBS_DIR}/{YYYYMMDD}/sfav2_CONUS_{AA}h_{YYYYMMDD}{HH}_grid184.grb2 + +where AA is the 2-digit accumulation duration in hours: 06 or 24 + +METplus is configured to verify snowfall using 06- and 24-h accumulated +snowfall from 6- and 12-hourly NOHRSC files, respectively. + +If data is retrieved from HPSS, it will automatically staged by this +this script. + """ + + # Convert obtype to upper case to simplify code below. + obtype = obtype.upper() + + # For convenience, get the verification portion of the configuration + # dictionary. + vx_config = cfg['verification'] + + # Get the time interval (in hours) at which the obs are available. + key = obtype + '_OBS_AVAIL_INTVL_HRS' + obs_avail_intvl_hrs = vx_config[key] + + # The obs availability inerval must divide evenly into 24 hours. Otherwise, + # different days would have obs available at different hours-of-day. Make + # sure this is the case. + remainder = 24 % obs_avail_intvl_hrs + if remainder != 0: + msg = dedent(f""" + The obs availability interval for obs of type {obtype} must divide evenly + into 24 but doesn't: + obs_avail_intvl_hrs = {obs_avail_intvl_hrs} + 24 % obs_avail_intvl_hrs = {remainder} + """) + raise Exception(msg) + + # For convenience, get obs availability interval as a datetime object. + obs_avail_intvl = dt.timedelta(hours=obs_avail_intvl_hrs) + + # Get the base directory for the observations. + key = obtype + '_OBS_DIR' + obs_dir = config['platform'][key] + + # Set the group of fields for each observation type. We assume there is + # a separate obs file type for each such field group in the observations. + if obtype == 'CCPA': + field_groups_in_obs = ['APCP'] + elif obtype == 'NOHRSC': + field_groups_in_obs = ['ASNOW'] + elif obtype == 'MRMS': + field_groups_in_obs = ['REFC', 'RETOP'] + elif obtype == 'NDAS': + field_groups_in_obs = ['ADPSFCandADPUPA'] + num_field_groups = len(field_groups_in_obs) + + # For each field group in the observations, get the METplus file name + # template for the observation files. Then combine these with the base + # directory to get the METplus template for the full path to the processed + # obs files. + fp_proc_templates = [] + for fg in field_groups_in_obs: + key = 'OBS_' + obtype + '_' + fg + '_FN_TEMPLATE' + fn_proc_template = vx_config[key] + fp_proc_templates.append(os.path.join(obs_dir, fn_proc_template)) + # + #----------------------------------------------------------------------- + # + # Set variables that are only needed for some obs types. + # + #----------------------------------------------------------------------- + # + + # For cumulative obs, set the accumulation period to use when getting obs + # files. This is simply (a properly formatted version of) the obs + # availability interval. + accum_obs_formatted = None + if obtype == 'CCPA': + accum_obs_formatted = f'{obs_avail_intvl_hrs:02d}' + elif obtype == 'NOHRSC': + accum_obs_formatted = f'{obs_avail_intvl_hrs:d}' + + # For MRMS obs, set field-dependent parameters needed in forming grib2 + # file names. + fields_in_filenames = [] + levels_in_filenames = [] + if obtype == 'MRMS': + for fg in field_groups_in_obs: + if fg == 'REFC': + fields_in_filenames.append('MergedReflectivityQCComposite') + levels_in_filenames.append('00.50') + elif fg == 'RETOP': + fields_in_filenames.append('EchoTop') + levels_in_filenames.append('18_00.50') + else: + msg = dedent(f""" + Invalid field specified for obs type: + obtype = {obtype} + field = {field} + """) + logging.error(msg) + raise Exception(msg) + + # CCPA files for 1-hour accumulation have incorrect metadata in the files + # under the "00" directory from 20180718 to 20210504. Set these starting + # and ending dates as datetime objects for later use. + yyyymmdd_bad_metadata_start_str = None + yyyymmdd_bad_metadata_end_str = None + yyyymmdd_bad_metadata_start = None + yyyymmdd_bad_metadata_end = None + if obtype == 'CCPA': + yyyymmdd_bad_metadata_start_str = '20180718' + yyyymmdd_bad_metadata_end_str = '20210504' + yyyymmdd_bad_metadata_start = dt.datetime.strptime(yyyymmdd_bad_metadata_start_str, '%Y%m%d') + yyyymmdd_bad_metadata_end = dt.datetime.strptime(yyyymmdd_bad_metadata_end_str, '%Y%m%d') + # + #----------------------------------------------------------------------- + # + # Get the list of all the times in the current day at which to retrieve + # obs. This is an array with elements having format "YYYYMMDDHH". + # + #----------------------------------------------------------------------- + # + yyyymmdd_task_str = dt.datetime.strftime(yyyymmdd_task, '%Y%m%d') + key = 'OBS_RETRIEVE_TIMES_' + obtype + '_' + yyyymmdd_task_str + obs_retrieve_times_crnt_day_str = vx_config[key] + obs_retrieve_times_crnt_day \ + = [dt.datetime.strptime(yyyymmddhh_str, '%Y%m%d%H') for yyyymmddhh_str in obs_retrieve_times_crnt_day_str] + # + #----------------------------------------------------------------------- + # + # Obs files will be obtained by extracting them from the relevant n-hourly + # archives, where n is the archive interval in hours (denoted below by the + # variable arcv_intvl_hrs). Thus, we must first obtain the sequence of + # hours (since hour 0 of the task day) corresponding to the archive files + # from which we must extract obs files. We refer to this as the sequence + # of archive hours. + # + # To generate this sequence, we first set the archive interval and then + # set the starting and ending archive hour values. + # + # For CCPA, the archive interval is 6 hours, i.e. the obs files are + # bundled into 6-hourly archives. This implies 4 archives per day. The + # archives are organized such that each one contains 6 files, so that the + # obs availability interval is + # + # obs_avail_intvl_hrs = (24 hrs)/[(4 archives)*(6 files/archive)] + # = 1 hr/file + # + # i.e. there is one obs file for each hour of the day containing the + # accumulation over that one hour. The archive corresponding to hour 0 + # of the current day contains 6 files representing accumulations during + # the 6 hours of the previous day. The archive corresponding to hour 6 + # of the current day corresponds to accumulations during the first 6 + # hours of the current day, and the archives corresponding to hours 12 + # and 18 of the current day correspond to accumulations during the 2nd + # and 3rd 6-hourly intervals of the current day. Thus, to obtain all the + # one-hour accumulations for the current day, we must extract all the obs + # files from the archives corresponding to hours 6, 12, and 18 of the + # current day and hour 0 of the next day. This corresponds to an archive + # hour sequence set below of [6, 12, 18, 24]. Thus, in the simplest case + # in which the observation retrieval times include all hours of the + # current task's day at which obs files are available and none of the obs + # files for this day already exist on disk, this sequence will be [6, 12, + # 18, 24]. In other cases, the sequence we loop over will be a subset of + # [6, 12, 18, 24]. + # + # For NOHRSC, the archive interval is 24 hours, i.e. the obs files are + # bundled into 24-hourly archives. This implies just 1 archive per day. + # The archives are organized such that each one contains 4 files, so that + # the obs availability interval is + # + # obs_avail_intvl_hrs = (24 hrs)/[(1 archive)*(4 files/archive)] + # = 6 hr/file + # + # i.e. there is one obs file for each 6-hour interval of the day containing + # the accumulation over those 6 hours. The 4 obs files within each archive + # correspond to hours 0, 6, 12, and 18 of the current day. The obs file + # for hour 0 contains accumulations during the last 6 hours of the previous + # day, while those for hours 6, 12, and 18 contain accumulations for the + # first, second, and third 6-hour chunks of the current day. Thus, to + # obtain all the 6-hour accumulations for the current day, we must extract + # from the archive for the current day the obs files for hours 6, 12, and + # 18 and from the archive for the next day the obs file for hour 0. This + # corresponds to an archive hour sequence set below of [0, 24]. Thus, in + # the simplest case in which the observation retrieval times include all + # hours of the current task's day at which obs files are available and + # none of the obs files for this day already exist on disk, this sequence + # will be [0, 24]. In other cases, the sequence we loop over will be a + # subset of [0, 24]. + # + # For NDAS, the archive interval is 6 hours, i.e. the obs files are + # bundled into 6-hourly archives. This implies 4 archives per day. The + # archives are organized such that each one contains 7 files (not say 6). + # The archive associated with time yyyymmddhh_arcv contains the hourly + # files at + # + # yyyymmddhh_arcv - 6 hours + # yyyymmddhh_arcv - 5 hours + # ... + # yyyymmddhh_arcv - 2 hours + # yyyymmddhh_arcv - 1 hours + # yyyymmddhh_arcv - 0 hours + # + # These are known as the tm06, tm05, ..., tm02, tm01, and tm00 files, + # respectively. Thus, the tm06 file from the current archive, say the + # one associated with time yyyymmddhh_arcv, has the same valid time as + # the tm00 file from the previous archive, i.e. the one associated with + # time (yyyymmddhh_arcv - 6 hours). It turns out the tm06 file from the + # current archive contains more/better observations than the tm00 file + # from the previous archive. Thus, for a given archive time yyyymmddhh_arcv, + # we use 6 of the 7 files at tm06, ..., tm01 but not the one at tm00, + # effectively resulting in an 6 files per archive for NDAS obs. The obs + # availability interval is then + # + # obs_avail_intvl_hrs = (24 hrs)/[(4 archives)*(6 files/archive)] + # = 1 hr/file + # + # i.e. there is one obs file for each hour of the day containing values + # at that hour. The archive corresponding to hour 0 of the current day + # contains 6 files valid at hours 18 through 23 of the previous day. The + # archive corresponding to hour 6 of the current day contains 6 files + # valid at hours 0 through 5 of the current day, and the archives + # corresponding to hours 12 and 18 of the current day each contain 6 + # files valid at hours 6 through 11 and 12 through 17 of the current day. + # Thus, to obtain all the hourly values for the current day (from hour + # 0 to hour 23), we must extract the 6 obs files (excluding the tm00 + # ones) from the archives corresponding to hours 6, 12, and 18 of the + # current day and the archive corresponding to hour 0 of the next day. + # This corresponds to an archive hour sequence set below of [6, 12, 18, + # 24]. Thus, in the simplest case in which the observation retrieval + # times include all hours of the current task's day at which obs files + # are available and none of the obs files for this day already exist on + # disk, this sequence will be [6, 12, 18, 24]. In other cases, the + # sequence we loop over will be a subset of [6, 12, 18, 24]. + # + #----------------------------------------------------------------------- + # + if obtype == 'CCPA': + arcv_intvl_hrs = 6 + elif obtype == 'NOHRSC': + arcv_intvl_hrs = 24 + elif obtype == 'MRMS': + arcv_intvl_hrs = 24 + elif obtype == 'NDAS': + arcv_intvl_hrs = 6 + arcv_intvl = dt.timedelta(hours=arcv_intvl_hrs) + + # Number of obs files within each archive. + num_obs_files_per_arcv = int(arcv_intvl/obs_avail_intvl) + + # Initial guess for starting archive hour. This is set to the archive + # hour containing obs at the first obs retrieval time of the day. + hod_first = obs_retrieve_times_crnt_day[0].hour + arcv_hr_start = get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod_first) + + # Ending archive hour. This is set to the archive hour containing obs at + # the last obs retrieval time of the day. + hod_last = obs_retrieve_times_crnt_day[-1].hour + arcv_hr_end = get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod_last) + + # Set other variables needed below when evaluating the METplus template for + # the full path to the processed observation files. + one_hour = dt.timedelta(hours=1) + ushdir = config['user']['USHdir'] + + # Check whether any obs files already exist on disk in their processed + # (i.e. final) locations. Here, by "processed" we mean after any renaming + # and rearrangement of files that this script may do to the "raw" files, + # i.e. the files as they are named and arranged within the archive (tar) + # files on HPSS. If so, adjust the starting archive hour. In the process, + # keep a count of the number of obs files that already exist on disk. + num_existing_files = 0 + do_break = False + for yyyymmddhh in obs_retrieve_times_crnt_day: + + for fp_proc_templ in fp_proc_templates: + # Set the full path to the final processed obs file (fp_proc). + lhr = int((yyyymmddhh - yyyymmdd_task)/one_hour) + cmd = '; '.join(['export USHdir=' + ushdir, + 'export yyyymmdd_task=' + yyyymmdd_task_str, + 'export lhr=' + str(lhr), + 'export METplus_timestr_tmpl=' + fp_proc_templ, + os.path.join(ushdir, 'run_eval_METplus_timestr_tmpl.sh')]) + result = subprocess.run(cmd, shell=True, capture_output=True, text=True) + fp_proc = result.stdout.strip() + + # Check whether file already exists. + if os.path.isfile(fp_proc): + num_existing_files += 1 + msg = dedent(f""" + File already exists on disk: + fp_proc = {fp_proc} + """) + logging.info(msg) + else: + hod = yyyymmddhh.hour + arcv_hr_start = get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod) + msg = dedent(f""" + File does not exist on disk: + fp_proc = {fp_proc} + Setting the hour (since hour 0 of the current task day) of the first + archive to retrieve to: + arcv_hr_start = {arcv_hr_start} + """) + logging.info(msg) + do_break = True + break + + if do_break: break + + # If the number of obs files that already exist on disk is equal to the + # number of obs files needed, then there is no need to retrieve any files. + num_obs_retrieve_times_crnt_day = len(obs_retrieve_times_crnt_day) + num_files_needed = num_obs_retrieve_times_crnt_day*num_field_groups + if num_existing_files == num_files_needed: + + msg = dedent(f""" + All obs files needed for the current day (yyyymmdd_task) already exist + on disk: + yyyymmdd_task = {yyyymmdd_task} + Thus, there is no need to retrieve any files. + """) + logging.info(msg) + return True + + # If the number of obs files that already exist on disk is not equal to + # the number of obs files needed, then we will need to retrieve files. + # In this case, set the sequence of hours corresponding to the archives + # from which files will be retrieved. + else: + + arcv_hrs = [hr for hr in range(arcv_hr_start, arcv_hr_end+arcv_intvl_hrs, arcv_intvl_hrs)] + msg = dedent(f""" + At least some obs files needed needed for the current day (yyyymmdd_task) + do not exist on disk: + yyyymmdd_task = {yyyymmdd_task} + The number of obs files needed for the current day is: + num_files_needed = {num_files_needed} + The number of obs files that already exist on disk is: + num_existing_files = {num_existing_files} + Will retrieve remaining files by looping over archives corresponding to + the following hours (since hour 0 of the current day): + arcv_hrs = {arcv_hrs} + """) + logging.info(msg) + # + #----------------------------------------------------------------------- + # + # At this point, at least some obs files for the current day need to be + # retrieved. Thus, loop over the relevant archives that contain obs for + # the day given by yyyymmdd_task and retrieve files as needed. + # + # Note that the NOHRSC data on HPSS are archived by day, with the archive + # for a given day containing 6-hour as well as 24-hour grib2 files. As + # described above, the four 6-hour files are for accumulated snowfall at + # hour 0 of the current day (which represents accumulation over the last + # 6 hours of the previous day) as well as hours 6, 12, and 18, while the + # two 24-hour files are at hour 0 (which represents accumulation over all + # 24 hours of the previous day) and 12 (which represents accumulation over + # the last 12 hours of the previous day plus the first 12 hours of the + # current day). Here, we will only obtain the 6-hour files. In other + # workflow tasks, the values in these 6-hour files will be added as + # necessary to obtain accumulations over longer periods (e.g. 24 hours). + # Since the four 6-hour files are in one archive and are relatively small + # (on the order of kilobytes), we get them all with a single call to the + # retrieve_data.py script. + # + #----------------------------------------------------------------------- + # + + # Whether to move the files or copy them from their raw to their processed + # locations. + mv_or_cp = 'cp' + # Whether to remove raw observations after processed directories have + # been created from them. + key = 'REMOVE_RAW_OBS_' + obtype + remove_raw_obs = config['platform'][key] + # If the raw directories and files are to be removed at the end of this + # script, no need to copy the files since the raw directories are going + # to be removed anyway. + if remove_raw_obs: + mv_or_cp = 'mv' + + # Base directory that will contain the archive subdirectories in which + # the files extracted from each archive (tar) file will be placed. We + # refer to this as the "raw" base directory because it contains files + # as they are found in the archives before any processing by this script. + basedir_raw = os.path.join(obs_dir, 'raw_' + yyyymmdd_task_str) + + for arcv_hr in arcv_hrs: + + msg = dedent(f""" + Processing archive hour {arcv_hr} ... + """) + logging.info(msg) + + # Calculate the time information for the current archive. + yyyymmddhh_arcv = yyyymmdd_task + dt.timedelta(hours=arcv_hr) + yyyymmddhh_arcv_str = dt.datetime.strftime(yyyymmddhh_arcv, '%Y%m%d%H') + yyyymmdd_arcv_str = dt.datetime.strftime(yyyymmddhh_arcv, '%Y%m%d') + + # Directory that will contain the files retrieved from the current archive + # file. We refer to this as the "raw" archive directory because it will + # contain the files as they are in the archive before any processing by + # this script. + if obtype == 'CCPA': + arcv_dir_raw = os.path.join(basedir_raw, yyyymmddhh_arcv_str) + # For NOHRSC, the hour-of-day for the archive is irrelevant since there + # is only one archive per day, so don't include it in the raw archive + # directory's name. + elif obtype == 'NOHRSC': + arcv_dir_raw = os.path.join(basedir_raw, yyyymmdd_arcv_str) + # Since for MRMS data there is only one archive per day, that directory + # is redundant, so for simplicity we set arcv_dir_raw to just basedir_raw. + elif obtype == 'MRMS': + arcv_dir_raw = basedir_raw + elif obtype == 'NDAS': + arcv_dir_raw = os.path.join(basedir_raw, yyyymmddhh_arcv_str) + + # Check whether any of the obs retrieval times for the day associated with + # this task fall in the time interval spanned by the current archive. If + # so, set the flag (do_retrieve) to retrieve the files in the current + # archive. + if obtype == 'CCPA': + arcv_contents_start = yyyymmddhh_arcv - (num_obs_files_per_arcv - 1)*obs_avail_intvl + arcv_contents_end = yyyymmddhh_arcv + elif obtype == 'NOHRSC': + arcv_contents_start = yyyymmddhh_arcv + arcv_contents_end = yyyymmddhh_arcv + (num_obs_files_per_arcv - 1)*obs_avail_intvl + elif obtype == 'MRMS': + arcv_contents_start = yyyymmddhh_arcv + arcv_contents_end = yyyymmddhh_arcv + (num_obs_files_per_arcv - 1)*obs_avail_intvl + elif obtype == 'NDAS': + arcv_contents_start = yyyymmddhh_arcv - num_obs_files_per_arcv*obs_avail_intvl + arcv_contents_end = yyyymmddhh_arcv - obs_avail_intvl + + do_retrieve = False + for obs_retrieve_time in obs_retrieve_times_crnt_day: + if (obs_retrieve_time >= arcv_contents_start) and \ + (obs_retrieve_time <= arcv_contents_end): + do_retrieve = True + break + + if not do_retrieve: + msg = dedent(f""" + None of the current day's observation retrieval times (possibly including + hour 0 of the next day if considering a cumulative obs type) fall in the + range spanned by the current {arcv_intvl_hrs}-hourly archive file. The + bounds of the data in the current archive are: + arcv_contents_start = {arcv_contents_start} + arcv_contents_end = {arcv_contents_end} + The times at which obs need to be retrieved are: + obs_retrieve_times_crnt_day = {obs_retrieve_times_crnt_day} + """) + logging.info(msg) + + else: + + # Make sure the raw archive directory exists because it is used below as + # the output directory of the retrieve_data.py script (so if this directory + # doesn't already exist, that script will fail). Creating this directory + # also ensures that the raw base directory (basedir_raw) exists before we + # change location to it below. + Path(arcv_dir_raw).mkdir(parents=True, exist_ok=True) + + # The retrieve_data.py script first extracts the contents of the archive + # file into the directory it was called from and then moves them to the + # specified output location (via the --output_path option). Note that + # the relative paths of obs files within archives associted with different + # days may be the same. Thus, if files with the same archive-relative + # paths are being simultaneously extracted from multiple archive files + # (by multiple get_obs tasks), they will likely clobber each other if the + # extracton is being carried out into the same location on disk. To avoid + # this, we first change location to the raw base directory (whose name is + # obs-day dependent) and then call the retrieve_data.py script. + os.chdir(basedir_raw) + + # Pull obs from HPSS. This will get all the obs files in the current + # archive and place them in the raw archive directory. + # + # Note that for the specific case of NDAS obs, this will get all 7 obs + # files in the current archive, although we will make use of only 6 of + # these (we will not use the tm00 file). + parmdir = config['user']['PARMdir'] + cmd = ' '.join(['python3', \ + '-u', os.path.join(ushdir, 'retrieve_data.py'), \ + '--debug', \ + '--file_set', 'obs', \ + '--config', os.path.join(parmdir, 'data_locations.yml'), \ + '--cycle_date', yyyymmddhh_arcv_str, \ + '--data_stores', 'hpss', \ + '--data_type', obtype + '_obs', \ + '--output_path', arcv_dir_raw, \ + '--summary_file', 'retrieve_data.log']) + result = subprocess.run(cmd, shell=True, capture_output=True, text=True) + rc = result.returncode + + # Loop over the raw obs files extracted from the current archive and + # generate from them the processed obs files. + # + # For CCPA obs, for most dates this consists of simply copying or moving + # the files from the raw archive directory to the processed directory, + # possibly renaming them in the process. However, for dates between + # 20180718 and 20210504 and hours-of-day 19 through the end of the day + # (i.e. hour 0 of the next day), it involves using wgrib2 to correct an + # error in the metadata of the raw file and writing the corrected data + # to a new grib2 file in the processed location. + # + # For NOHRSC obs, this consists of simply copying or moving the files from + # the raw archive directory to the processed directory, possibly renaming + # them in the process. + # + # For NDAS obs, this consists of simply copying or moving the files from + # the raw archive directory to the processed directory, possibly renaming + # them in the process. Note that the tm06 file in a given archive contain + # more/better observations than the tm00 file in the next archive (their + # valid times are equivalent), so we use the tm06 files. + if obtype == 'CCPA': + in_arcv_times = [yyyymmddhh_arcv - i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + elif obtype == 'NOHRSC': + in_arcv_times = [yyyymmddhh_arcv + i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + elif obtype == 'MRMS': + in_arcv_times = [yyyymmddhh_arcv + i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + elif obtype == 'NDAS': + in_arcv_times = [yyyymmddhh_arcv - (i+1)*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + in_arcv_times.sort() + + for yyyymmddhh in in_arcv_times: + + # Create the processed obs file from the raw one (by moving, copying, or + # otherwise) only if the time of the current file in the current archive + # also exists in the list of obs retrieval times for the current day. + if yyyymmddhh in obs_retrieve_times_crnt_day: + + for i, fp_proc_templ in enumerate(fp_proc_templates): + + # For MRMS obs, first select from the set of raw files for the current day + # those that are nearest in time to the current hour. Unzip these in a + # temporary subdirectory under the raw base directory. + # + # Note that the script we call to do this (mrms_pull_topofhour.py) assumes + # a certain file naming convention. That convention must match the names + # of the files that the retrieve_data.py script called above ends up + # retrieving. The list of possibile templates for these names is given + # in parm/data_locations.yml, but which of those is actually used is not + # known until retrieve_data.py completes. Thus, that information needs + # to be passed back by retrieve_data.py and then passed to mrms_pull_topofhour.py. + # For now, we hard-code the file name here. + if obtype == 'MRMS': + yyyymmddhh_str = dt.datetime.strftime(yyyymmddhh, '%Y%m%d%H') + cmd = ' '.join(['python3', \ + '-u', os.path.join(ushdir, 'mrms_pull_topofhour.py'), \ + '--valid_time', yyyymmddhh_str, \ + '--source', basedir_raw, \ + '--outdir', os.path.join(basedir_raw, 'topofhour'), \ + '--product', fields_in_filenames[i], \ + '--no-add_vdate_subdir']) + result = subprocess.run(cmd, shell=True, capture_output=True, text=True) + rc = result.returncode + + # The raw file name needs to be the same as what the retrieve_data.py + # script called above ends up retrieving. The list of possibile templates + # for this name is given in parm/data_locations.yml, but which of those + # is actually used is not known until retrieve_data.py completes. Thus, + # that information needs to be passed back by the script and used here. + # For now, we hard-code the file name here. + if obtype == 'CCPA': + hr = yyyymmddhh.hour + fn_raw = 'ccpa.t' + f'{hr:02d}' + 'z.' + accum_obs_formatted + 'h.hrap.conus.gb2' + elif obtype == 'NOHRSC': + yyyymmddhh_str = dt.datetime.strftime(yyyymmddhh, '%Y%m%d%H') + fn_raw = 'sfav2_CONUS_' + accum_obs_formatted + 'h_' + yyyymmddhh_str + '_grid184.grb2' + elif obtype == 'MRMS': + hr = yyyymmddhh.hour + fn_raw = fields_in_filenames[i] + '_' + levels_in_filenames[i] \ + + '_' + yyyymmdd_task_str + '-' + f'{hr:02d}' + '0000.grib2' + fn_raw = os.path.join('topofhour', fn_raw) + elif obtype == 'NDAS': + time_ago = yyyymmddhh_arcv - yyyymmddhh + hrs_ago = int(time_ago.seconds/3600) + hh_arcv_str = dt.datetime.strftime(yyyymmddhh_arcv, '%H') + fn_raw = 'nam.t' + hh_arcv_str + 'z.prepbufr.tm' + f'{hrs_ago:02d}' + '.nr' + fp_raw = os.path.join(arcv_dir_raw, fn_raw) + + # Set the full path to the final processed obs file (fp_proc) we want to + # create. + lhr = int((yyyymmddhh - yyyymmdd_task)/one_hour) + cmd = '; '.join(['export USHdir=' + ushdir, + 'export yyyymmdd_task=' + yyyymmdd_task_str, + 'export lhr=' + str(lhr), + 'export METplus_timestr_tmpl=' + fp_proc_templ, + os.path.join(ushdir, 'run_eval_METplus_timestr_tmpl.sh')]) + result = subprocess.run(cmd, shell=True, capture_output=True, text=True) + fp_proc = result.stdout.strip() + + # Make sure the directory in which the processed file will be created exists. + dir_proc = os.path.dirname(fp_proc) + Path(dir_proc).mkdir(parents=True, exist_ok=True) + + msg = dedent(f""" + Creating the processed obs file + {fp_proc} + from the raw file + {fp_raw} + ... + """) + logging.info(msg) + + yyyymmdd = yyyymmddhh.replace(hour=0, minute=0, second=0) + # CCPA files for 1-hour accumulation have incorrect metadata in the files + # under the "00" directory from 20180718 to 20210504. After the data is + # pulled, reorganize into correct yyyymmdd structure. + if (obtype == 'CCPA') and \ + ((yyyymmdd >= yyyymmdd_bad_metadata_start) and (yyyymmdd <= yyyymmdd_bad_metadata_end)) and \ + (((hr >= 19) and (hr <= 23)) or (hr == 0)): + cmd = ' '.join(['wgrib2', fp_raw, '-set_date -24hr -grib', fp_proc, '-s']) + result = subprocess.run(cmd, shell=True, capture_output=True, text=True) + elif mv_or_cp == 'mv': + shutil.move(fp_raw, fp_proc) + elif mv_or_cp == 'cp': + shutil.copy(fp_raw, fp_proc) + # + #----------------------------------------------------------------------- + # + # Clean up raw obs directories. + # + #----------------------------------------------------------------------- + # + if remove_raw_obs: + msg = dedent(f""" + Removing raw obs directories ..." + """) + logging.info(msg) + shutil.rmtree(basedir_raw) + + return True + + + +def parse_args(argv): + """Parse command line arguments""" + parser = argparse.ArgumentParser( + description="Get observations." + ) + + parser.add_argument( + "--obtype", + dest="obtype", + type=str, + required=True, + choices=['CCPA', 'NOHRSC', 'MRMS', 'NDAS'], + help="Cumulative observation type.", + ) + + parser.add_argument( + "--obs_day", + dest="obs_day", + type=lambda d: dt.datetime.strptime(d, '%Y%m%d'), + required=True, + help="Date of observation day, in the form 'YYYMMDD'.", + ) + + parser.add_argument( + "--var_defns_path", + dest="var_defns_path", + type=str, + required=True, + help="Path to variable definitions file.", + ) + + choices_log_level = [pair for lvl in list(logging._nameToLevel.keys()) + for pair in (str.lower(lvl), str.upper(lvl))] + parser.add_argument( + "--log_level", + dest="log_level", + type=str, + required=False, + default='info', + choices=choices_log_level, + help=dedent(f""" + Logging level to use with the 'logging' module. + """)) + + parser.add_argument( + "--log_fp", + dest="log_fp", + type=str, + required=False, + default='', + help=dedent(f""" + Name of or path (absolute or relative) to log file. If not specified, + the output goes to screen. + """)) + + return parser.parse_args(argv) + + +if __name__ == "__main__": + args = parse_args(sys.argv[1:]) + + # Set up logging. + # If the name/path of a log file has been specified in the command line + # arguments, place the logging output in it (existing log files of the + # same name are overwritten). Otherwise, direct the output to the screen. + log_level = str.upper(args.log_level) + msg_format = "[%(levelname)s:%(name)s: %(filename)s, line %(lineno)s: %(funcName)s()] %(message)s" + if args.log_fp: + logging.basicConfig(level=log_level, format=msg_format, filename=args.log_fp, filemode='w') + else: + logging.basicConfig(level=log_level, format=msg_format) + + cfg = load_yaml_config(args.var_defns_path) + get_obs(cfg, args.obtype, args.obs_day) + + diff --git a/ush/get_obs_ccpa.sh b/ush/get_obs_ccpa.sh deleted file mode 100755 index d3c486c607..0000000000 --- a/ush/get_obs_ccpa.sh +++ /dev/null @@ -1,484 +0,0 @@ -#!/usr/bin/env bash - -# -#----------------------------------------------------------------------- -# -# Source the variable definitions file and the bash utility functions. -# -#----------------------------------------------------------------------- -# -. $USHdir/source_util_funcs.sh -for sect in user platform verification ; do - source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} -done - -set -u -#set -x -# -#----------------------------------------------------------------------- -# -# This script performs several important tasks for preparing data for -# verification tasks. Depending on the value of the environment variable -# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data -# set. -# -# If data is not available on disk (in the location specified by -# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), -# the script attempts to retrieve the data from HPSS using the retrieve_data.py -# script. Depending on the data set, there are a few strange quirks and/or -# bugs in the way data is organized; see in-line comments for details. -# -# -# CCPA (Climatology-Calibrated Precipitation Analysis) precipitation accumulation obs -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: -# -# {CCPA_OBS_DIR}/{YYYYMMDD}/ccpa.t{HH}z.01h.hrap.conus.gb2 -# -# If data is retrieved from HPSS, it will be automatically staged by this -# script. -# -# Notes about the data and how it's used for verification: -# -# 1. Accumulation is currently hardcoded to 01h. The verification will -# use MET/pcp-combine to sum 01h files into desired accumulations. -# -# 2. There is a problem with the valid time in the metadata for files -# valid from 19 - 00 UTC (or files under the '00' directory). This is -# accounted for in this script for data retrieved from HPSS, but if you -# have manually staged data on disk you should be sure this is accounted -# for. See in-line comments below for details. -# -#----------------------------------------------------------------------- -# - -# -#----------------------------------------------------------------------- -# -# Below, we will use the retrieve_data.py script to retrieve the CCPA -# grib2 file from a data store (e.g. HPSS). Before doing so, note the -# following: -# -# * The daily archive (tar) file containing CCPA obs has a name of the -# form -# -# [PREFIX].YYYYMMDD.tar -# -# where YYYYMMDD is a given year, month, and day combination, and -# [PREFIX] is a string that is not relevant to the discussion here -# (the value it can take on depends on which of several time periods -# YYYYMMDD falls in, and the retrieve_data.py tries various values -# until it finds one for which a tar file exists). Unintuitively, this -# archive file contains accumulation data for valid times starting at -# hour 19 of the PREVIOUS day (YYYYMM[DD-1]) to hour 18 of the current -# day (YYYYMMDD). In other words, the valid times of the contents of -# this archive file are shifted back by 6 hours relative to the time -# string appearing in the name of the file. See section "DETAILS..." -# for a detailed description of the directory structure in the CCPA -# archive files. -# -# * We call retrieve_data.py in a temporary cycle-specific subdirectory -# in order to prevent get_obs_ccpa tasks for different cycles from -# clobbering each other's output. We refer to this as the "raw" CCPA -# base directory because it contains files as they are found in the -# archives before any processing by this script. -# -# * In each (cycle-specific) raw base directory, the data is arranged in -# daily subdirectories with the same timing as in the archive (tar) -# files (which are described in the section "DETAILS..." below). In -# particular, each daily subdirectory has the form YYYYMDD, and it may -# contain CCPA grib2 files for accumulations valid at hour 19 of the -# previous day (YYYYMM[DD-1]) to hour 18 of the current day (YYYYMMDD). -# (Data valid at hours 19-23 of the current day (YYYYMMDD) go into the -# daily subdirectory for the next day, i.e. YYYYMM[DD+1].) We refer -# to these as raw daily (sub)directories to distinguish them from the -# processed daily subdirectories under the processed (final) CCPA base -# directory (basedir_proc). -# -# * For a given cycle, some of the valid times at which there is forecast -# output may not have a corresponding file under the raw base directory -# for that cycle. This is because another cycle that overlaps this cycle -# has already obtained the grib2 CCPA file for that valid time and placed -# it in its processed location; as a result, the retrieveal of that grib2 -# file for this cycle is skipped. -# -# * To obtain a more intuitive temporal arrangement of the data in the -# processed CCPA directory structure than the temporal arrangement used -# in the archives and raw directories, we process the raw files such -# that the data in the processed directory structure is shifted forward -# in time 6 hours relative to the data in the archives and raw directories. -# This results in a processed base directory that, like the raw base -# directory, also contains daily subdirectories of the form YYYYMMDD, -# but each such subdirectory may only contain CCPA data at valid hours -# within that day, i.e. at valid times YYYYMMDD[00, 01, ..., 23] (but -# may not contain data that is valid on the previous, next, or any other -# day). -# -# * For data between 20180718 and 20210504, the 01h accumulation data -# (which is the only accumulation we are retrieving) have incorrect -# metadata under the "00" directory in the archive files (meaning for -# hour 00 and hours 19-23, which are the ones in the "00" directory). -# Below, we use wgrib2 to make a correction for this when transferring -# (moving or copying) grib2 files from the raw daily directories to -# the processed daily directories. -# -# -# DETAILS OF DIRECTORY STRUCTURE IN CCPA ARCHIVE (TAR) FILES -# ---------------------------------------------------------- -# -# The daily archive file containing CCPA obs is named -# -# [PREFIX].YYYYMMDD.tar -# -# This file contains accumulation data for valid times starting at hour -# 19 of the PREVIOUS day (YYYYMM[DD-1]) to hour 18 of the current day -# (YYYYMMDD). In particular, when untarred, the daily archive file -# expands into four subdirectories: 00, 06, 12, and 18. The 06, 12, and -# 18 subdirectories contain grib2 files for accumulations valid at or -# below the hour-of-day given by the subdirectory name (and on YYYYMMDD). -# For example, the 06 directory contains data valid at: -# -# * YYYYMMDD[01, 02, 03, 04, 05, 06] for 01h accumulations; -# * YYYYMMDD[03, 06] for 03h accumulations; -# * YYYYMMDD[06] for 06h accumulations. -# -# The valid times for the data in the 12 and 18 subdirectories are -# analogous. However, the 00 subdirectory is different in that it -# contains accumulations at hour 00 on YYYYMMDD as well as ones BEFORE -# this time, i.e. the data for valid times other than YYYYMMDD00 are on -# the PREVIOUS day. Thus, the 00 subdirectory contains data valid at -# (note the DD-1, meaning one day prior): -# -# * YYYYMM[DD-1][19, 20, 21, 22, 23] and YYYYMMDD00 for 01h accumulations; -# * YYYYMM[DD-1][19] and YYYYMMDD00 for 03h accumulations; -# * YYYYMMDD00 for 06h accumulations. -# -#----------------------------------------------------------------------- -# - -# The time interval (in hours) at which the obs are available on HPSS -# must divide evenly into 24. Otherwise, different days would have obs -# available at different hours-of-day. Make sure this is the case. -remainder=$(( 24 % CCPA_OBS_AVAIL_INTVL_HRS )) -if [ ${remainder} -ne 0 ]; then - print_err_msg_exit "\ -The obs availability interval CCPA_OBS_AVAIL_INTVL_HRS must divide evenly -into 24 but doesn't: - CCPA_OBS_AVAIL_INTVL_HRS = ${CCPA_OBS_AVAIL_INTVL_HRS} - 24 % CCPA_OBS_AVAIL_INTVL_HRS = ${remainder}" -fi - -# Accumulation period to use when getting obs files. This is simply (a -# properly formatted version of) the obs availability interval. -accum_obs_fmt=$( printf "%02d" "${CCPA_OBS_AVAIL_INTVL_HRS}" ) - -# The day (in the form YYYMMDD) associated with the current task via the -# task's cycledefs attribute in the ROCOTO xml. -yyyymmdd_task=${PDY} - -# Seconds since some reference time that the DATE_UTIL utility uses of -# the day of the current task. This will be used below to find hours -# since the start of this day. -sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) -# -#----------------------------------------------------------------------- -# -# Get the list of all the times in the current day at which to retrieve -# obs. This is an array with elements having format "YYYYMMDDHH". -# -#----------------------------------------------------------------------- -# -array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" -eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) -# -#----------------------------------------------------------------------- -# -# Obs files will be obtained by extracting them from the relevant 6-hourly -# archives. Thus, we need the sequence of archive hours over which to -# loop. In the simplest case, this sequence will be "6 12 18 24". This -# will be the case if the observation retrieval times include all hours -# of the task's day and if none of the obs files for this day already -# exist on disk. In other cases, the sequence we loop over will be a -# subset of "6 12 18 24". -# -# To generate this sequence, we first set its starting and ending values -# as well as the interval. -# -#----------------------------------------------------------------------- -# - -# Sequence interval must be 6 hours because the archives are 6-hourly. -arcv_hr_incr=6 - -# Initial guess for starting archive hour. This is set to the archive -# hour containing obs at the first obs retrieval time of the day. -hh_first=$(echo ${obs_retrieve_times_crnt_day[0]} | cut -c9-10) -hr_first=$((10#${hh_first})) -arcv_hr_start=$(ceil ${hr_first} ${arcv_hr_incr}) -arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) - -# Ending archive hour. This is set to the archive hour containing obs at -# the last obs retrieval time of the day. -hh_last=$(echo ${obs_retrieve_times_crnt_day[-1]} | cut -c9-10) -hr_last=$((10#${hh_last})) -if [[ ${hr_last} -eq 0 ]]; then - arcv_hr_end=24 -else - arcv_hr_end=$(ceil ${hr_last} ${arcv_hr_incr}) - arcv_hr_end=$(( arcv_hr_end*arcv_hr_incr )) -fi - -# Check whether any obs files already exist on disk in their processed -# (i.e. final) locations. Here, by "processed" we mean after any renaming -# and rearrangement of files that this script may do to the "raw" files, -# i.e. the files as they are named and arranged within the archive (tar) -# files on HPSS. If so, adjust the starting archive hour. In the process, -# keep a count of the number of obs files that already exist on disk. -num_existing_files=0 -for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do - yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) - hh=$(echo ${yyyymmddhh} | cut -c9-10) - - # Set the full path to the final processed obs file (fp_proc). - sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) - lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${OBS_DIR}/${OBS_CCPA_APCP_FN_TEMPLATE}" \ - outvarname_evaluated_timestr="fp_proc" - - if [[ -f ${fp_proc} ]]; then - num_existing_files=$((num_existing_files+1)) - print_info_msg " -File already exists on disk: - fp_proc = \"${fp_proc}\"" - else - hr=$((10#${hh})) - arcv_hr_start=$(ceil ${hr} ${arcv_hr_incr}) - arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) - print_info_msg " -File does not exist on disk: - fp_proc = \"${fp_proc}\" -Setting the hour (since 00) of the first archive to retrieve to: - arcv_hr_start = \"${arcv_hr_start}\"" - break - fi -done - -# If the number of obs files that already exist on disk is equal to the -# number of obs files needed, then there is no need to retrieve any files. -num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} -if [[ ${num_existing_files} -eq ${num_obs_retrieve_times_crnt_day} ]]; then - - print_info_msg " -All obs files needed for the current day (yyyymmdd_task) already exist -on disk: - yyyymmdd_task = \"${yyyymmdd_task}\" -Thus, there is no need to retrieve any files." - exit - -# If the number of obs files that already exist on disk is not equal to -# the number of obs files needed, then we will need to retrieve files. -# In this case, set the sequence of hours corresponding to the archives -# from which files will be retrieved. -else - - arcv_hrs=($(seq ${arcv_hr_start} ${arcv_hr_incr} ${arcv_hr_end})) - arcv_hrs_str="( "$( printf "%s " "${arcv_hrs[@]}" )")" - print_info_msg " -At least some obs files needed needed for the current day (yyyymmdd_task) -do not exist on disk: - yyyymmdd_task = \"${yyyymmdd_task}\" -The number of obs files needed for the current day (which is equal to the -number of observation retrieval times for the current day) is: - num_obs_retrieve_times_crnt_day = ${num_obs_retrieve_times_crnt_day} -The number of obs files that already exist on disk is: - num_existing_files = ${num_existing_files} -Will retrieve remaining files by looping over archives corresponding to -the following hours (since 00 of this day): - arcv_hrs = ${arcv_hrs_str} -" - -fi -# -#----------------------------------------------------------------------- -# -# At this point, at least some obs files for the current day need to be -# retrieved. Thus, loop over the relevant archives that contain obs for -# the day given by yyyymmdd_task and retrieve files as needed. -# -#----------------------------------------------------------------------- -# - -# Whether to move the files or copy them from their raw to their processed -# locations. -#mv_or_cp="mv" -mv_or_cp="cp" -# Whether to remove raw observations after processed directories have -# been created from them. -remove_raw_obs="${REMOVE_RAW_OBS_CCPA}" -# If the raw directories and files are to be removed at the end of this -# script, no need to copy the files since the raw directories are going -# to be removed anyway. -if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then - mv_or_cp="mv" -fi - -# Base directory that will contain the archive subdirectories in which -# the files extracted from each archive (tar) file will be placed. We -# refer to this as the "raw" base directory because it contains files -# as they are found in the archives before any processing by this script. -basedir_raw="${OBS_DIR}/raw_${yyyymmdd_task}" - -for arcv_hr in ${arcv_hrs[@]}; do - - print_info_msg " -arcv_hr = ${arcv_hr}" - - # Calculate the time information for the current archive. - yyyymmddhh_arcv=$(${DATE_UTIL} --date "${yyyymmdd_task} ${arcv_hr} hours" +%Y%m%d%H) - yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) - hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) - - # Directory that will contain the files retrieved from the current archive - # file. We refer to this as the "raw" archive directory because it will - # contain the files as they are in the archive before any processing by - # this script. - arcv_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" - - # Check whether any of the obs retrieval times for the day associated with - # this task fall in the time interval spanned by the current archive. If - # so, set the flag (do_retrieve) to retrieve the files in the current - # archive. - hrs_ago=$((arcv_hr_incr - 1)) - arcv_contents_yyyymmddhh_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) - arcv_contents_yyyymmddhh_end=${yyyymmddhh_arcv} - do_retrieve="FALSE" - for (( i=0; i<${num_obs_retrieve_times_crnt_day}; i++ )); do - obs_retrieve_time=${obs_retrieve_times_crnt_day[i]} - if [[ "${obs_retrieve_time}" -ge "${arcv_contents_yyyymmddhh_start}" ]] && \ - [[ "${obs_retrieve_time}" -le "${arcv_contents_yyyymmddhh_end}" ]]; then - do_retrieve="TRUE" - break - fi - done - - if [[ $(boolify "${do_retrieve}") != "TRUE" ]]; then - - print_info_msg " -None of the times in the current day (or hour 00 of the next day) at which -obs need to be retrieved fall in the range spanned by the current ${arcv_hr_incr}-hourly -archive file. The bounds of the data in the current archive file are: - arcv_contents_yyyymmddhh_start = \"${arcv_contents_yyyymmddhh_start}\" - arcv_contents_yyyymmddhh_end = \"${arcv_contents_yyyymmddhh_end}\" -The times at which obs need to be retrieved are: - obs_retrieve_times_crnt_day = ($(printf "\"%s\" " ${obs_retrieve_times_crnt_day[@]}))" - - else - - # Make sure the raw archive directory exists because it is used below as - # the output directory of the retrieve_data.py script (so if this directory - # doesn't already exist, that script will fail). Creating this directory - # also ensures that the raw base directory (basedir_raw) exists before we - # change location to it below. - mkdir -p ${arcv_dir_raw} - - # The retrieve_data.py script first extracts the contents of the archive - # file into the directory it was called from and then moves them to the - # specified output location (via the --output_path option). Note that - # the relative paths of obs files within archives associted with different - # days may be the same. Thus, if files with the same archive-relative - # paths are being simultaneously extracted from multiple archive files - # (by multiple get_obs tasks), they will likely clobber each other if the - # extracton is being carried out into the same location on disk. To avoid - # this, we first change location to the raw base directory (whose name is - # obs-day dependent) and then call the retrieve_data.py script. - cd ${basedir_raw} - - # Pull obs from HPSS. This will get all the obs files in the current - # archive and place them in the raw archive directory. - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${yyyymmddhh_arcv} \ - --data_stores hpss \ - --data_type CCPA_obs \ - --output_path ${arcv_dir_raw} \ - --summary_file retrieve_data.log" - - print_info_msg "CALLING: ${cmd}" - $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." - - # Loop over the raw obs files extracted from the current archive and - # generate from them the processed obs files. - # - # For CCPA obs, for most dates this consists of simply copying or moving - # the files from the raw archive directory to the processed directory, - # possibly renaming them in the process. However, for dates between - # 20180718 and 20210504 and hours-of-day 19 through the end of the day - # (i.e. hour 0 of the next day), it involves using wgrib2 to correct an - # error in the metadata of the raw file and writing the corrected data - # to a new grib2 file in the processed location. - for hrs_ago in $(seq 5 -${CCPA_OBS_AVAIL_INTVL_HRS} 0); do - yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) - yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) - hh=$(echo ${yyyymmddhh} | cut -c9-10) - # Create the processed obs file from the raw one (by moving, copying, or - # otherwise) only if the time of the current file in the current archive - # also exists in the list of obs retrieval times for the current day. - if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then - - # The raw file name needs to be the same as what the retrieve_data.py - # script called above ends up retrieving. The list of possibile templates - # for this name is given in parm/data_locations.yml, but which of those - # is actually used is not known until retrieve_data.py completes. Thus, - # that information needs to be passed back by the script and used here. - # For now, we hard-code the file name here. - fn_raw="ccpa.t${hh}z.${accum_obs_fmt}h.hrap.conus.gb2" - fp_raw="${arcv_dir_raw}/${fn_raw}" - - # Set the full path to the final processed obs file (fp_proc) we want to - # create. - sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) - lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${OBS_DIR}/${OBS_CCPA_APCP_FN_TEMPLATE}" \ - outvarname_evaluated_timestr="fp_proc" - mkdir -p $( dirname "${fp_proc}" ) - - # CCPA files for 1-hour accumulation have incorrect metadata in the files - # under the "00" directory from 20180718 to 20210504. After the data is - # pulled, reorganize into correct yyyymmdd structure. - hh_noZero=$((10#${hh})) - if [[ ${yyyymmdd} -ge 20180718 && ${yyyymmdd} -le 20210504 ]] && \ - [[ (${hh_noZero} -ge 19 && ${hh_noZero} -le 23) || (${hh_noZero} -eq 0) ]]; then - wgrib2 ${fp_raw} -set_date -24hr -grib ${fp_proc} -s - else - ${mv_or_cp} ${fp_raw} ${fp_proc} - fi - - fi - done - - fi - -done -# -#----------------------------------------------------------------------- -# -# Clean up raw obs directories. -# -#----------------------------------------------------------------------- -# -if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then - print_info_msg "Removing raw obs directories..." - rm -rf ${basedir_raw} || print_err_msg_exit "\ -Failed to remove raw obs directories." -fi diff --git a/ush/get_obs_mrms.sh b/ush/get_obs_mrms.sh deleted file mode 100755 index d13e374620..0000000000 --- a/ush/get_obs_mrms.sh +++ /dev/null @@ -1,341 +0,0 @@ -#!/usr/bin/env bash - -# -#----------------------------------------------------------------------- -# -# Source the variable definitions file and the bash utility functions. -# -#----------------------------------------------------------------------- -# -. $USHdir/source_util_funcs.sh -for sect in user platform verification ; do - source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} -done - -set -u -#set -x -# -#----------------------------------------------------------------------- -# -# This script performs several important tasks for preparing data for -# verification tasks. Depending on the value of the environment variable -# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data -# set. -# -# If data is not available on disk (in the location specified by -# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), -# the script attempts to retrieve the data from HPSS using the retrieve_data.py -# script. Depending on the data set, there are a few strange quirks and/or -# bugs in the way data is organized; see in-line comments for details. -# -# -# MRMS (Multi-Radar Multi-Sensor) radar observations -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: -# -# {MRMS_OBS_DIR}/{YYYYMMDD}/[PREFIX]{YYYYMMDD}-{HH}0000.grib2, -# -# Where [PREFIX] is MergedReflectivityQCComposite_00.50_ for reflectivity -# data and EchoTop_18_00.50_ for echo top data. If data is not available -# at the top of the hour, you should rename the file closest in time to -# your hour(s) of interest to the above naming format. A script -# "ush/mrms_pull_topofhour.py" is provided for this purpose. -# -# If data is retrieved from HPSS, it will automatically staged by this -# this script. -# -#----------------------------------------------------------------------- -# - -# The time interval (in hours) at which the obs are available on HPSS -# must divide evenly into 24. Otherwise, different days would have obs -# available at different hours-of-day. Make sure this is the case. -remainder=$(( 24 % MRMS_OBS_AVAIL_INTVL_HRS )) -if [ ${remainder} -ne 0 ]; then - print_err_msg_exit "\ -The obs availability interval MRMS_OBS_AVAIL_INTVL_HRS must divide evenly -into 24 but doesn't: - MRMS_OBS_AVAIL_INTVL_HRS = ${MRMS_OBS_AVAIL_INTVL_HRS} - 24 % MRMS_OBS_AVAIL_INTVL_HRS = ${remainder}" -fi - -# Create an array-valued counterpart of MRMS_FIELDS. MRMS_FIELDS is an -# environment variable created in the ROCOTO XML. It is a scalar variable -# because there doesn't seem to be a way to pass a bash array from the -# XML to the task's script. -mrms_fields=($(printf "%s" "${MRMS_FIELDS}")) - -# Loop over the fields (REFC and RETOP) and set the file base name -# corresponding to each. -fields_in_filenames=() -levels_in_filenames=() -obs_mrms_fp_templates=() -for field in ${mrms_fields[@]}; do - # Set field-dependent parameters needed in forming grib2 file names. - if [ "${field}" = "REFC" ]; then - fields_in_filenames+=("MergedReflectivityQCComposite") - levels_in_filenames+=("00.50") - obs_mrms_fp_templates+=("${OBS_DIR}/${OBS_MRMS_REFC_FN_TEMPLATE}") - elif [ "${field}" = "RETOP" ]; then - fields_in_filenames+=("EchoTop") - levels_in_filenames+=("18_00.50") - obs_mrms_fp_templates+=("${OBS_DIR}/${OBS_MRMS_RETOP_FN_TEMPLATE}") - else - print_err_msg_exit "\ -Invalid field specified: - field = \"${field}\" -Valid options are 'REFC', 'RETOP'." - fi -done - -# The day (in the form YYYMMDD) associated with the current task via the -# task's cycledefs attribute in the ROCOTO xml. -yyyymmdd_task=${PDY} - -# Seconds since some reference time that the DATE_UTIL utility uses of -# the day of the current task. This will be used below to find hours -# since the start of this day. -sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) -# -#----------------------------------------------------------------------- -# -# Get the list of all the times in the current day at which to retrieve -# obs. This is an array with elements having format "YYYYMMDDHH". -# -#----------------------------------------------------------------------- -# -array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" -eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) - -# Check whether any obs files already exist on disk. If so, adjust the -# starting archive hour. In the process, keep a count of the number of -# obs files that already exist on disk. -num_existing_files=0 -num_mrms_fields=${#mrms_fields[@]} -for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do - for (( i=0; i<${num_mrms_fields}; i++ )); do - - yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) - hh=$(echo ${yyyymmddhh} | cut -c9-10) - - sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) - lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${obs_mrms_fp_templates[$i]}" \ - outvarname_evaluated_timestr="fp_proc" - - if [[ -f ${fp_proc} ]]; then - num_existing_files=$((num_existing_files+1)) - print_info_msg " -File already exists on disk: - fp_proc = \"${fp_proc}\"" - else - print_info_msg " -File does not exist on disk: - fp_proc = \"${fp_proc}\" -Will attempt to retrieve all obs files." - break 2 - fi - - done -done - -# If the number of obs files that already exist on disk is equal to the -# number of obs files needed (which is num_mrms_fields times the number -# of obs retrieval times in the current day), then there is no need to -# retrieve any files. -num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} -if [[ ${num_existing_files} -eq $((num_mrms_fields*num_obs_retrieve_times_crnt_day)) ]]; then - - print_info_msg " -All obs files needed for the current day (yyyymmdd_task) already exist -on disk: - yyyymmdd_task = \"${yyyymmdd_task}\" -Thus, there is no need to retrieve any files." - exit -# Otherwise, will need to retrieve files. -else - print_info_msg " -At least some obs files needed needed for the current day (yyyymmdd_task) -do not exist on disk: - yyyymmdd_task = \"${yyyymmdd_task}\" -The number of obs files needed for the current day (which is equal to the -number of observation retrieval times for the current day) is: - num_obs_retrieve_times_crnt_day = ${num_obs_retrieve_times_crnt_day} -The number of obs files that already exist on disk is: - num_existing_files = ${num_existing_files} -Will retrieve remaining files. -" -fi -# -#----------------------------------------------------------------------- -# -# At this point, at least some obs files for the current day need to be -# retrieved. -# -#----------------------------------------------------------------------- -# - -# Whether to move the files or copy them from their raw to their processed -# locations. -#mv_or_cp="mv" -mv_or_cp="cp" -# Whether to remove raw observations after processed directories have -# been created from them. -remove_raw_obs="${REMOVE_RAW_OBS_MRMS}" -# If the raw directories and files are to be removed at the end of this -# script, no need to copy the files since the raw directories are going -# to be removed anyway. -if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then - mv_or_cp="mv" -fi - -# Base directory that will contain the archive subdirectories in which -# the files extracted from each archive (tar) file will be placed. We -# refer to this as the "raw" base directory because it contains files -# as they are found in the archives before any processing by this script. -basedir_raw="${OBS_DIR}/raw_${yyyymmdd_task}" - -# Time associated with the archive. MRMS data have daily archives that -# have the hour-of-day set to "00". -yyyymmddhh_arcv="${yyyymmdd_task}00" - -# Directory that will contain the files retrieved from the current archive -# file. We refer to this as the "raw" archive directory because it will -# contain the files as they are in the archive before any processing by -# this script. -# -# Note: -# Normally, arcv_dir_raw should consist of basedir_raw and a subdirectory -# that depends on the archive date, e.g. -# -# arcv_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" -# -# but since for MRMS data there is only one archive per day, that directory -# is redundant, so simplicity we set arcv_dir_raw to just basedir_raw. -arcv_dir_raw="${basedir_raw}" - -# Make sure the raw archive directory exists because it is used below as -# the output directory of the retrieve_data.py script (so if this directory -# doesn't already exist, that script will fail). Creating this directory -# also ensures that the raw base directory (basedir_raw) exists before we -# change location to it below. -mkdir -p ${arcv_dir_raw} - -# The retrieve_data.py script first extracts the contents of the archive -# file into the directory it was called from and then moves them to the -# specified output location (via the --output_path option). Note that -# the relative paths of obs files within archives associted with different -# days may be the same. Thus, if files with the same archive-relative -# paths are being simultaneously extracted from multiple archive files -# (by multiple get_obs tasks), they will likely clobber each other if the -# extracton is being carried out into the same location on disk. To avoid -# this, we first change location to the raw base directory (whose name is -# obs-day dependent) and then call the retrieve_data.py script. -cd ${basedir_raw} - -# Pull obs from HPSS. This will get all the obs files in the current -# archive and place them in the raw archive directory. -cmd=" -python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${yyyymmddhh_arcv} \ - --data_stores hpss \ - --data_type MRMS_obs \ - --output_path ${arcv_dir_raw} \ - --summary_file retrieve_data.log" - -print_info_msg "CALLING: ${cmd}" -$cmd || print_err_msg_exit "Could not retrieve obs from HPSS." -# -#----------------------------------------------------------------------- -# -# Loop over the 24 hour period starting with the zeroth hour of the day -# associated with this task and ending with the 23rd hour. -# -#----------------------------------------------------------------------- -# - -# Loop over the raw obs files extracted from the current archive and -# generate from them the processed obs files. -# -# For MRMS obs, the raw obs consist of gzipped grib2 files that are -# usually a few minutes apart in time. However, because forecast data -# is available at most every hour, the SRW App configuration parameter -# MRMS_OBS_AVAIL_INTVL_HRS is set to 1 hour instead of a few minutes. -# Below, we loop over the whole day using this 1-hourly interval. For -# each hour of the day, we call the script mrms_pull_topofhour.py to find -# the gzipped grib2 file in the raw archive directory that is closest in -# time to the hour and unzip it in a temporary directory. We then copy -# or move it to the processed directory, possibly renaming it in the -# process. -for hr in $(seq 0 ${MRMS_OBS_AVAIL_INTVL_HRS} 23); do - yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_task} ${hr} hours" +%Y%m%d%H) - yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) - hh=$(echo ${yyyymmddhh} | cut -c9-10) - # Create the processed obs file from the raw one (by moving, copying, or - # otherwise) only if the time of the current file in the current archive - # also exists in the list of obs retrieval times for the current day. - if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then - for (( i=0; i<${num_mrms_fields}; i++ )); do - - # First, select from the set of raw files for the current day those that - # are nearest in time to the current hour. Unzip these in a temporary - # subdirectory under the raw base directory. - # - # Note that the script we call to do this (mrms_pull_topofhour.py) assumes - # a certain file naming convention. That convention must match the names - # of the files that the retrieve_data.py script called above ends up - # retrieving. The list of possibile templates for these names is given - # in parm/data_locations.yml, but which of those is actually used is not - # known until retrieve_data.py completes. Thus, that information needs - # to be passed back by retrieve_data.py and then passed to mrms_pull_topofhour.py. - # For now, we hard-code the file name here. - python ${USHdir}/mrms_pull_topofhour.py \ - --valid_time ${yyyymmddhh} \ - --source ${basedir_raw} \ - --outdir ${basedir_raw}/topofhour \ - --product ${fields_in_filenames[$i]} \ - --no-add_vdate_subdir - - # Set the name of and the full path to the raw obs file created by the - # mrms_pull_topofhour.py script. This name is currently hard-coded to - # the output of that script. In the future, it should be set in a more - # general way (e.g. obtain from a settings file). - fn_raw="${fields_in_filenames[$i]}_${levels_in_filenames[$i]}_${yyyymmdd_task}-${hh}0000.grib2" - fp_raw="${basedir_raw}/topofhour/${fn_raw}" - - # Set the full path to the final processed obs file (fp_proc) we want to - # create. - sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) - lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${obs_mrms_fp_templates[$i]}" \ - outvarname_evaluated_timestr="fp_proc" - mkdir -p $( dirname "${fp_proc}" ) - - mv ${fp_raw} ${fp_proc} - - done - fi -done -# -#----------------------------------------------------------------------- -# -# Clean up raw obs directories. -# -#----------------------------------------------------------------------- -# -if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then - print_info_msg "Removing raw obs directories..." - rm -rf ${basedir_raw} || print_err_msg_exit "\ -Failed to remove raw obs directories." -fi diff --git a/ush/get_obs_ndas.sh b/ush/get_obs_ndas.sh deleted file mode 100755 index 45338714a2..0000000000 --- a/ush/get_obs_ndas.sh +++ /dev/null @@ -1,357 +0,0 @@ -#!/usr/bin/env bash - -# -#----------------------------------------------------------------------- -# -# Source the variable definitions file and the bash utility functions. -# -#----------------------------------------------------------------------- -# -. $USHdir/source_util_funcs.sh -for sect in user platform verification ; do - source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} -done - -set -u -#set -x -# -#----------------------------------------------------------------------- -# -# This script performs several important tasks for preparing data for -# verification tasks. Depending on the value of the environment variable -# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data -# set. -# -# If data is not available on disk (in the location specified by -# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), -# the script attempts to retrieve the data from HPSS using the retrieve_data.py -# script. Depending on the data set, there are a few strange quirks and/or -# bugs in the way data is organized; see in-line comments for details. -# -# -# NDAS (NAM Data Assimilation System) conventional observations -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: -# -# {NDAS_OBS_DIR}/{YYYYMMDD}/prepbufr.ndas.{YYYYMMDDHH} -# -# Note that data retrieved from HPSS and other sources may be in a -# different format: nam.t{hh}z.prepbufr.tm{prevhour}.nr, where hh is -# either 00, 06, 12, or 18, and prevhour is the number of hours prior to -# hh (00 through 05). If using custom staged data, you will have to -# rename the files accordingly. -# -# If data is retrieved from HPSS, it will be automatically staged by this -# this script. -# -#----------------------------------------------------------------------- -# - -# The time interval (in hours) at which the obs are available on HPSS -# must divide evenly into 24. Otherwise, different days would have obs -# available at different hours-of-day. Make sure this is the case. -remainder=$(( 24 % NDAS_OBS_AVAIL_INTVL_HRS )) -if [ ${remainder} -ne 0 ]; then - print_err_msg_exit "\ -The obs availability interval NDAS_OBS_AVAIL_INTVL_HRS must divide evenly -into 24 but doesn't: - NDAS_OBS_AVAIL_INTVL_HRS = ${NDAS_OBS_AVAIL_INTVL_HRS} - 24 % NDAS_OBS_AVAIL_INTVL_HRS = ${remainder}" -fi - -# The day (in the form YYYMMDD) associated with the current task via the -# task's cycledefs attribute in the ROCOTO xml. -yyyymmdd_task=${PDY} - -# Seconds since some reference time that the DATE_UTIL utility uses of -# the day of the current task. This will be used below to find hours -# since the start of this day. -sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) -# -#----------------------------------------------------------------------- -# -# Get the list of all the times in the current day at which to retrieve -# obs. This is an array with elements having format "YYYYMMDDHH". -# -#----------------------------------------------------------------------- -# -array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" -eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) -# -#----------------------------------------------------------------------- -# -# Obs files will be obtained by extracting them from the relevant 6-hourly -# archives. Thus, we need the sequence of archive hours over which to -# loop. In the simplest case, this sequence will be "6 12 18 24". This -# will be the case if the observation retrieval times include all hours -# of the task's day and if none of the obs files for this day already -# exist on disk. In other cases, the sequence we loop over will be a -# subset of "6 12 18 24". -# -# To generate this sequence, we first set its starting and ending values -# as well as the interval. -# -#----------------------------------------------------------------------- -# - -# Sequence interval must be 6 hours because the archives are 6-hourly. -arcv_hr_incr=6 - -# Initial guess for starting archive hour. This is set to the archive -# hour containing obs at the first obs retrieval time of the day. -hh_first=$(echo ${obs_retrieve_times_crnt_day[0]} | cut -c9-10) -hr_first=$((10#${hh_first})) -arcv_hr_start=$(( (hr_first/arcv_hr_incr + 1)*arcv_hr_incr )) - -# Ending archive hour. This is set to the archive hour containing obs at -# the last obs retrieval time of the day. -hh_last=$(echo ${obs_retrieve_times_crnt_day[-1]} | cut -c9-10) -hr_last=$((10#${hh_last})) -arcv_hr_end=$(( (hr_last/arcv_hr_incr + 1)*arcv_hr_incr )) - -# Check whether any obs files already exist on disk in their processed -# (i.e. final) locations. Here, by "processed" we mean after any renaming -# and rearrangement of files that this script may do to the "raw" files, -# i.e. the files as they are named and arranged within the archive (tar) -# files on HPSS. If so, adjust the starting archive hour. In the process, -# keep a count of the number of obs files that already exist on disk. -num_existing_files=0 -for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do - yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) - hh=$(echo ${yyyymmddhh} | cut -c9-10) - - # Set the full path to the final processed obs file (fp_proc). - sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) - lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}" \ - outvarname_evaluated_timestr="fp_proc" - - if [[ -f ${fp_proc} ]]; then - num_existing_files=$((num_existing_files+1)) - print_info_msg " -File already exists on disk: - fp_proc = \"${fp_proc}\"" - else - hr=$((10#${hh})) - arcv_hr_start=$(( (hr/arcv_hr_incr + 1)*arcv_hr_incr )) - print_info_msg " -File does not exist on disk: - fp_proc = \"${fp_proc}\" -Setting the hour (since 00) of the first archive to retrieve to: - arcv_hr_start = \"${arcv_hr_start}\"" - break - fi -done - -# If the number of obs files that already exist on disk is equal to the -# number of obs files needed, then there is no need to retrieve any files. -num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} -if [[ ${num_existing_files} -eq ${num_obs_retrieve_times_crnt_day} ]]; then - - print_info_msg " -All obs files needed for the current day (yyyymmdd_task) already exist -on disk: - yyyymmdd_task = \"${yyyymmdd_task}\" -Thus, there is no need to retrieve any files." - exit - -# If the number of obs files that already exist on disk is not equal to -# the number of obs files needed, then we will need to retrieve files. -# In this case, set the sequence of hours corresponding to the archives -# from which files will be retrieved. -else - - arcv_hrs=($(seq ${arcv_hr_start} ${arcv_hr_incr} ${arcv_hr_end})) - arcv_hrs_str="( "$( printf "%s " "${arcv_hrs[@]}" )")" - print_info_msg " -At least some obs files needed needed for the current day (yyyymmdd_task) -do not exist on disk: - yyyymmdd_task = \"${yyyymmdd_task}\" -The number of obs files needed for the current day (which is equal to the -number of observation retrieval times for the current day) is: - num_obs_retrieve_times_crnt_day = ${num_obs_retrieve_times_crnt_day} -The number of obs files that already exist on disk is: - num_existing_files = ${num_existing_files} -Will retrieve remaining files by looping over archives corresponding to -the following hours (since 00 of this day): - arcv_hrs = ${arcv_hrs_str} -" - -fi -# -#----------------------------------------------------------------------- -# -# At this point, at least some obs files for the current day need to be -# retrieved. Thus, loop over the relevant archives that contain obs for -# the day given by yyyymmdd_task and retrieve files as needed. -# -#----------------------------------------------------------------------- -# - -# Whether to move the files or copy them from their raw to their processed -# locations. -#mv_or_cp="mv" -mv_or_cp="cp" -# Whether to remove raw observations after processed directories have -# been created from them. -remove_raw_obs="${REMOVE_RAW_OBS_NDAS}" -# If the raw directories and files are to be removed at the end of this -# script, no need to copy the files since the raw directories are going -# to be removed anyway. -if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then - mv_or_cp="mv" -fi - -# Base directory that will contain the archive subdirectories in which -# the files extracted from each archive (tar) file will be placed. We -# refer to this as the "raw" base directory because it contains files -# as they are found in the archives before any processing by this script. -basedir_raw="${OBS_DIR}/raw_${yyyymmdd_task}" - -for arcv_hr in ${arcv_hrs[@]}; do - - print_info_msg " -arcv_hr = ${arcv_hr}" - - # Calculate the time information for the current archive. - yyyymmddhh_arcv=$(${DATE_UTIL} --date "${yyyymmdd_task} ${arcv_hr} hours" +%Y%m%d%H) - yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) - hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) - - # Directory that will contain the files retrieved from the current archive - # file. We refer to this as the "raw" archive directory because it will - # contain the files as they are in the archive before any processing by - # this script. - arcv_dir_raw="${basedir_raw}/${yyyymmddhh_arcv}" - - # Check whether any of the obs retrieval times for the day associated with - # this task fall in the time interval spanned by the current archive. If - # so, set the flag (do_retrieve) to retrieve the files in the current - # archive. - arcv_contents_yyyymmddhh_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 6 hours ago" +%Y%m%d%H) - arcv_contents_yyyymmddhh_end=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} 1 hours ago" +%Y%m%d%H) - do_retrieve="FALSE" - for (( i=0; i<${num_obs_retrieve_times_crnt_day}; i++ )); do - obs_retrieve_time=${obs_retrieve_times_crnt_day[i]} - if [[ "${obs_retrieve_time}" -ge "${arcv_contents_yyyymmddhh_start}" ]] && \ - [[ "${obs_retrieve_time}" -le "${arcv_contents_yyyymmddhh_end}" ]]; then - do_retrieve="TRUE" - break - fi - done - - if [[ $(boolify "${do_retrieve}") != "TRUE" ]]; then - - print_info_msg " -None of the current day's observation retrieval times fall in the range -spanned by the current ${arcv_hr_incr}-hourly archive file. The bounds of the current -archive are: - arcv_contents_yyyymmddhh_start = \"${arcv_contents_yyyymmddhh_start}\" - arcv_contents_yyyymmddhh_end = \"${arcv_contents_yyyymmddhh_end}\" -The times at which obs need to be retrieved are: - obs_retrieve_times_crnt_day = ($(printf "\"%s\" " ${obs_retrieve_times_crnt_day[@]}))" - - else - - # Make sure the raw archive directory exists because it is used below as - # the output directory of the retrieve_data.py script (so if this directory - # doesn't already exist, that script will fail). Creating this directory - # also ensures that the raw base directory (basedir_raw) exists before we - # change location to it below. - mkdir -p ${arcv_dir_raw} - - # The retrieve_data.py script first extracts the contents of the archive - # file into the directory it was called from and then moves them to the - # specified output location (via the --output_path option). Note that - # the relative paths of obs files within archives associted with different - # days may be the same. Thus, if files with the same archive-relative - # paths are being simultaneously extracted from multiple archive files - # (by multiple get_obs tasks), they will likely clobber each other if the - # extracton is being carried out into the same location on disk. To avoid - # this, we first change location to the raw base directory (whose name is - # obs-day dependent) and then call the retrieve_data.py script. - cd ${basedir_raw} - - # Pull obs from HPSS. This will get all the obs files in the current - # archive and place them in the raw archive directory. - # Note that for the specific case of NDAS obs, this will get all 7 obs - # files in the current archive, although we will make use of only 6 of - # these (we will not use the tm00 file). - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${yyyymmddhh_arcv} \ - --data_stores hpss \ - --data_type NDAS_obs \ - --output_path ${arcv_dir_raw} \ - --summary_file retrieve_data.log" - - print_info_msg "CALLING: ${cmd}" - $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." - - # Loop over the raw obs files extracted from the current archive and - # generate from them the processed obs files. - # - # For NDAS obs, this consists of simply copying or moving the files from - # the raw archive directory to the processed directory, possibly renaming - # them in the process. - # - # Note that the tm06 file in a given archive contain more/better observations - # than the tm00 file in the next archive (their valid times are equivalent), - # so we use the tm06 files. - for hrs_ago in $(seq --format="%02g" 6 -${NDAS_OBS_AVAIL_INTVL_HRS} 1); do - yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs_ago} hours ago" +%Y%m%d%H) - yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) - hh=$(echo ${yyyymmddhh} | cut -c9-10) - # Create the processed obs file from the raw one (by moving, copying, or - # otherwise) only if the time of the current file in the current archive - # also exists in the list of obs retrieval times for the current day. - if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then - - # The raw file name needs to be the same as what the retrieve_data.py - # script called above ends up retrieving. The list of possibile templates - # for this name is given in parm/data_locations.yml, but which of those - # is actually used is not known until retrieve_data.py completes. Thus, - # that information needs to be passed back by the script and used here. - # For now, we hard-code the file name here. - fn_raw="nam.t${hh_arcv}z.prepbufr.tm${hrs_ago}.nr" - fp_raw="${arcv_dir_raw}/${fn_raw}" - - # Set the full path to the final processed obs file (fp_proc) we want to - # create. - sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) - lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}" \ - outvarname_evaluated_timestr="fp_proc" - mkdir -p $( dirname "${fp_proc}" ) - - ${mv_or_cp} ${fp_raw} ${fp_proc} - - fi - done - - fi - -done -# -#----------------------------------------------------------------------- -# -# Clean up raw obs directories. -# -#----------------------------------------------------------------------- -# -if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then - print_info_msg "Removing raw obs directories..." - rm -rf ${basedir_raw} || print_err_msg_exit "\ -Failed to remove raw obs directories." -fi diff --git a/ush/get_obs_nohrsc.sh b/ush/get_obs_nohrsc.sh deleted file mode 100755 index 5c56f8a8df..0000000000 --- a/ush/get_obs_nohrsc.sh +++ /dev/null @@ -1,372 +0,0 @@ -#!/usr/bin/env bash - -# -#----------------------------------------------------------------------- -# -# Source the variable definitions file and the bash utility functions. -# -#----------------------------------------------------------------------- -# -. $USHdir/source_util_funcs.sh -for sect in user platform verification ; do - source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} -done - -set -u -#set -x -# -#----------------------------------------------------------------------- -# -# This script performs several important tasks for preparing data for -# verification tasks. Depending on the value of the environment variable -# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data -# set. -# -# If data is not available on disk (in the location specified by -# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), -# the script attempts to retrieve the data from HPSS using the retrieve_data.py -# script. Depending on the data set, there are a few strange quirks and/or -# bugs in the way data is organized; see in-line comments for details. -# -# NOHRSC snow accumulation observations -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: -# -# {NOHRSC_OBS_DIR}/{YYYYMMDD}/sfav2_CONUS_{AA}h_{YYYYMMDD}{HH}_grid184.grb2 -# -# where AA is the 2-digit accumulation duration in hours: 06 or 24 -# -# METplus is configured to verify snowfall using 06- and 24-h accumulated -# snowfall from 6- and 12-hourly NOHRSC files, respectively. -# -# If data is retrieved from HPSS, it will automatically staged by this -# this script. -#----------------------------------------------------------------------- -# - -# The time interval (in hours) at which the obs are available on HPSS -# must divide evenly into 24. Otherwise, different days would have obs -# available at different hours-of-day. Make sure this is the case. -remainder=$(( 24 % NOHRSC_OBS_AVAIL_INTVL_HRS )) -if [ ${remainder} -ne 0 ]; then - print_err_msg_exit "\ -The obs availability interval NOHRSC_OBS_AVAIL_INTVL_HRS must divide evenly -into 24 but doesn't: - NOHRSC_OBS_AVAIL_INTVL_HRS = ${NOHRSC_OBS_AVAIL_INTVL_HRS} - 24 % NOHRSC_OBS_AVAIL_INTVL_HRS = ${remainder}" -fi - -# Accumulation period to use when getting obs files. This is simply (a -# properly formatted version of) the obs availability interval. -accum_obs_fmt=$( printf "%d" "${NOHRSC_OBS_AVAIL_INTVL_HRS}" ) - -# The day (in the form YYYMMDD) associated with the current task via the -# task's cycledefs attribute in the ROCOTO xml. -yyyymmdd_task=${PDY} - -# Seconds since some reference time that the DATE_UTIL utility uses of -# the day of the current task. This will be used below to find hours -# since the start of this day. -sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) -# -#----------------------------------------------------------------------- -# -# Get the list of all the times in the current day at which to retrieve -# obs. This is an array with elements having format "YYYYMMDDHH". -# -#----------------------------------------------------------------------- -# -array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" -eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) -# -#----------------------------------------------------------------------- -# -# Obs files will be obtained by extracting them from the relevant 24-hourly -# archives. Thus, we need the sequence of archive hours over which to -# loop. In the simplest case, this sequence will be "0 24". This will -# be the case if the observation retrieval times include all hours of the -# task's day and if none of the obs files for this day already exist on -# disk. In other cases, the sequence we loop over will be a subset of -# "0 24", e.g. just "0" or just "24". -# -# To generate this sequence, we first set its starting and ending values -# as well as the interval. -# -#----------------------------------------------------------------------- -# - -# Sequence interval must be 24 hours because the archives are 24-hourly. -arcv_hr_incr=24 - -# Initial guess for starting archive hour. This is set to the archive -# hour containing obs at the first obs retrieval time of the day. -hh_first=$(echo ${obs_retrieve_times_crnt_day[0]} | cut -c9-10) -hr_first=$((10#${hh_first})) -arcv_hr_start=$(( hr_first/arcv_hr_incr )) -arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) - -# Ending archive hour. This is set to the archive hour containing obs at -# the last obs retrieval time of the day. -hh_last=$(echo ${obs_retrieve_times_crnt_day[-1]} | cut -c9-10) -hr_last=$((10#${hh_last})) -if [[ ${hr_last} -eq 0 ]]; then - arcv_hr_end=24 -else - arcv_hr_end=$(( hr_last/arcv_hr_incr )) - arcv_hr_end=$(( arcv_hr_end*arcv_hr_incr )) -fi - -# Check whether any obs files already exist on disk in their processed -# (i.e. final) locations. Here, by "processed" we mean after any renaming -# and rearrangement of files that this script may do to the "raw" files, -# i.e. the files as they are named and arranged within the archive (tar) -# files on HPSS. If so, adjust the starting archive hour. In the process, -# keep a count of the number of obs files that already exist on disk. -num_existing_files=0 -for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do - yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) - hh=$(echo ${yyyymmddhh} | cut -c9-10) - - # Set the full path to the final processed obs file (fp_proc). - sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) - lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${OBS_DIR}/${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" \ - outvarname_evaluated_timestr="fp_proc" - - if [[ -f ${fp_proc} ]]; then - num_existing_files=$((num_existing_files+1)) - print_info_msg " -File already exists on disk: - fp_proc = \"${fp_proc}\"" - else - hr=$((10#${hh})) - arcv_hr_start=$(( hr/arcv_hr_incr )) - arcv_hr_start=$(( arcv_hr_start*arcv_hr_incr )) - print_info_msg " -File does not exist on disk: - fp_proc = \"${fp_proc}\" -Setting the hour (since 00) of the first archive to retrieve to: - arcv_hr_start = \"${arcv_hr_start}\"" - break - fi -done - -# If the number of obs files that already exist on disk is equal to the -# number of obs files needed, then there is no need to retrieve any files. -num_obs_retrieve_times_crnt_day=${#obs_retrieve_times_crnt_day[@]} -if [[ ${num_existing_files} -eq ${num_obs_retrieve_times_crnt_day} ]]; then - - print_info_msg " -All obs files needed for the current day (yyyymmdd_task) already exist -on disk: - yyyymmdd_task = \"${yyyymmdd_task}\" -Thus, there is no need to retrieve any files." - exit - -# If the number of obs files that already exist on disk is not equal to -# the number of obs files needed, then we will need to retrieve files. -# In this case, set the sequence of hours corresponding to the archives -# from which files will be retrieved. -else - - arcv_hrs=($(seq ${arcv_hr_start} ${arcv_hr_incr} ${arcv_hr_end})) - arcv_hrs_str="( "$( printf "%s " "${arcv_hrs[@]}" )")" - print_info_msg " -At least some obs files needed needed for the current day (yyyymmdd_task) -do not exist on disk: - yyyymmdd_task = \"${yyyymmdd_task}\" -The number of obs files needed for the current day (which is equal to the -number of observation retrieval times for the current day) is: - num_obs_retrieve_times_crnt_day = ${num_obs_retrieve_times_crnt_day} -The number of obs files that already exist on disk is: - num_existing_files = ${num_existing_files} -Will retrieve remaining files by looping over archives corresponding to -the following hours (since 00 of this day): - arcv_hrs = ${arcv_hrs_str} -" - -fi -# -#----------------------------------------------------------------------- -# -# At this point, at least some obs files for the current day need to be -# retrieved. The NOHRSC data on HPSS are archived by day, with the -# archive for a given day containing 6-hour as well as 24-hour grib2 -# files. The four 6-hour files are for accumulated snowfall at 00z -# (which represents accumulation over the last 6 hours of the previous -# day), 06z, 12z, and 18z, while the two 24-hour files are at 00z (which -# represents accumulation over all 24 hours of the previous day) and 12z -# (which represents accumulation over the last 12 hours of the previous -# day plus the first 12 hours of the current day). -# -# Here, we will only obtain the 6-hour files. In other workflow tasks, -# the values in these 6-hour files will be added as necessary to obtain -# accumulations over longer periods (e.g. 24 hours). Since the four -# 6-hour files are in one archive and are relatively small (on the order -# of kilobytes), we get them all with a single call to the retrieve_data.py -# script. -# -#----------------------------------------------------------------------- -# - -# Whether to move the files or copy them from their raw to their processed -# locations. -#mv_or_cp="mv" -mv_or_cp="cp" -# Whether to remove raw observations after processed directories have -# been created from them. -remove_raw_obs="${REMOVE_RAW_OBS_NOHRSC}" -# If the raw directories and files are to be removed at the end of this -# script, no need to copy the files since the raw directories are going -# to be removed anyway. -if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then - mv_or_cp="mv" -fi - -# Base directory that will contain the archive subdirectories in which -# the files extracted from each archive (tar) file will be placed. We -# refer to this as the "raw" base directory because it contains files -# as they are found in the archives before any processing by this script. -basedir_raw="${OBS_DIR}/raw_${yyyymmdd_task}" - -for arcv_hr in ${arcv_hrs[@]}; do - - print_info_msg " -arcv_hr = ${arcv_hr}" - - # Calculate the time information for the current archive. - yyyymmddhh_arcv=$(${DATE_UTIL} --date "${yyyymmdd_task} ${arcv_hr} hours" +%Y%m%d%H) - yyyymmdd_arcv=$(echo ${yyyymmddhh_arcv} | cut -c1-8) - hh_arcv=$(echo ${yyyymmddhh_arcv} | cut -c9-10) - - # Directory that will contain the files retrieved from the current archive - # file. We refer to this as the "raw" archive directory because it will - # contain the files as they are in the archive before any processing by - # this script. - arcv_dir_raw="${basedir_raw}/${yyyymmdd_arcv}" - - # Check whether any of the obs retrieval times for the day associated with - # this task fall in the time interval spanned by the current archive. If - # so, set the flag (do_retrieve) to retrieve the files in the current - # archive. - arcv_contents_yyyymmddhh_start=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv}" +%Y%m%d%H) - hrs=$((arcv_hr_incr - 1)) - arcv_contents_yyyymmddhh_end=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs} hours" +%Y%m%d%H) - do_retrieve="FALSE" - for (( i=0; i<${num_obs_retrieve_times_crnt_day}; i++ )); do - obs_retrieve_time=${obs_retrieve_times_crnt_day[i]} - if [[ "${obs_retrieve_time}" -ge "${arcv_contents_yyyymmddhh_start}" ]] && \ - [[ "${obs_retrieve_time}" -le "${arcv_contents_yyyymmddhh_end}" ]]; then - do_retrieve="TRUE" - break - fi - done - - if [[ $(boolify "${do_retrieve}") != "TRUE" ]]; then - - print_info_msg " -None of the times in the current day (or hour 00 of the next day) at which -obs need to be retrieved fall in the range spanned by the current ${arcv_hr_incr}-hourly -archive file. The bounds of the data in the current archive file are: - arcv_contents_yyyymmddhh_start = \"${arcv_contents_yyyymmddhh_start}\" - arcv_contents_yyyymmddhh_end = \"${arcv_contents_yyyymmddhh_end}\" -The times at which obs need to be retrieved are: - obs_retrieve_times_crnt_day = ($(printf "\"%s\" " ${obs_retrieve_times_crnt_day[@]}))" - - else - - # Make sure the raw archive directory exists because it is used below as - # the output directory of the retrieve_data.py script (so if this directory - # doesn't already exist, that script will fail). Creating this directory - # also ensures that the raw base directory (basedir_raw) exists before we - # change location to it below. - mkdir -p ${arcv_dir_raw} - - # The retrieve_data.py script first extracts the contents of the archive - # file into the directory it was called from and then moves them to the - # specified output location (via the --output_path option). Note that - # the relative paths of obs files within archives associted with different - # days may be the same. Thus, if files with the same archive-relative - # paths are being simultaneously extracted from multiple archive files - # (by multiple get_obs tasks), they will likely clobber each other if the - # extracton is being carried out into the same location on disk. To avoid - # this, we first change location to the raw base directory (whose name is - # obs-day dependent) and then call the retrieve_data.py script. - cd ${basedir_raw} - - # Pull obs from HPSS. This will get all the obs files in the current - # archive and place them in the raw archive directory. - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${yyyymmddhh_arcv} \ - --data_stores hpss \ - --data_type NOHRSC_obs \ - --output_path ${arcv_dir_raw} \ - --summary_file retrieve_data.log" - - print_info_msg "CALLING: ${cmd}" - $cmd || print_err_msg_exit "Could not retrieve obs from HPSS." - - # Loop over the raw obs files extracted from the current archive and - # generate from them the processed obs files. - # - # For NOHRSC obs, this consists of simply copying or moving the files from - # the raw archive directory to the processed directory, possibly renaming - # them in the process. - for hrs in $(seq 0 ${NOHRSC_OBS_AVAIL_INTVL_HRS} 23); do - yyyymmddhh=$(${DATE_UTIL} --date "${yyyymmdd_arcv} ${hh_arcv} ${hrs} hours" +%Y%m%d%H) - yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) - hh=$(echo ${yyyymmddhh} | cut -c9-10) - # Create the processed obs file from the raw one (by moving, copying, or - # otherwise) only if the time of the current file in the current archive - # also exists in the list of obs retrieval times for the current day. - if [[ ${obs_retrieve_times_crnt_day[@]} =~ ${yyyymmddhh} ]]; then - - # The raw file name needs to be the same as what the retrieve_data.py - # script called above ends up retrieving. The list of possibile templates - # for this name is given in parm/data_locations.yml, but which of those - # is actually used is not known until retrieve_data.py completes. Thus, - # that information needs to be passed back by the script and used here. - # For now, we hard-code the file name here. - fn_raw="sfav2_CONUS_${accum_obs_fmt}h_${yyyymmddhh}_grid184.grb2" - fp_raw="${arcv_dir_raw}/${fn_raw}" - - # Set the full path to the final processed obs file (fp_proc) we want to - # create. - sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) - lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${OBS_DIR}/${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" \ - outvarname_evaluated_timestr="fp_proc" - mkdir -p $( dirname "${fp_proc}" ) - - ${mv_or_cp} ${fp_raw} ${fp_proc} - - fi - done - - fi - -done -# -#----------------------------------------------------------------------- -# -# Clean up raw obs directories. -# -#----------------------------------------------------------------------- -# -if [[ $(boolify "${remove_raw_obs}") == "TRUE" ]]; then - print_info_msg "Removing raw obs directories..." - rm -rf ${basedir_raw} || print_err_msg_exit "\ -Failed to remove raw obs directories." -fi diff --git a/ush/run_eval_METplus_timestr_tmpl.sh b/ush/run_eval_METplus_timestr_tmpl.sh new file mode 100755 index 0000000000..b2df03c56c --- /dev/null +++ b/ush/run_eval_METplus_timestr_tmpl.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +# +#----------------------------------------------------------------------- +# +# Source the variable definitions file and the bash utility functions. +# +#----------------------------------------------------------------------- +# +#OBS_DIR="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/expt_dirs/get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW/obs_data/ccpa" +#OBS_CCPA_APCP_FN_TEMPLATE="{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2" + +#USHdir="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/ufs-srweather-app/ush" +#yyyymmdd_task="20230217" +#lhr="22" +#METplus_timestr_tmpl="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/expt_dirs/get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW/obs_data/ccpa/{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2" + +#USHdir="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/ufs-srweather-app/ush"; yyyymmdd_task="20230217"; lhr="22"; METplus_timestr_tmpl="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/expt_dirs/get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW/obs_data/ccpa/{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2" +set -u +. $USHdir/source_util_funcs.sh +eval_METplus_timestr_tmpl \ + init_time="${yyyymmdd_task}00" \ + fhr="${lhr}" \ + METplus_timestr_tmpl="${METplus_timestr_tmpl}" \ + outvarname_evaluated_timestr="fp_proc" +echo "${fp_proc}" + +# METplus_timestr_tmpl="${OBS_DIR}/${OBS_CCPA_APCP_FN_TEMPLATE}" \ From 7d684057f7e73b75804549735fdd1fbf3830b5e7 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 7 Oct 2024 12:51:33 -0600 Subject: [PATCH 133/260] Clean up and clarify comments; calculate list of processed obs file paths only once and save for later use; other minor code improvements. --- ush/get_obs.py | 202 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 132 insertions(+), 70 deletions(-) diff --git a/ush/get_obs.py b/ush/get_obs.py index f6e2fed265..84e49e6f40 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -19,8 +19,8 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): """ This file defines a function that, for the given observation type, obs - archive interval, and hour of day, returns the hour (counting from the - start of the day) corresponding to the archive file in which the obs file + archive interval, and hour of day, returns the hour (counting from hour + zero of the day) corresponding to the archive file in which the obs file for the given hour of day is included. Note that for cumulative fields (like CCPA and NOHRSC, as opposed to @@ -38,8 +38,7 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): arcv_intvl_hrs: Time interval (in hours) between archive files. An integer. For example, if the obs files are bundled into 6-hourly archives, then this will be - set to 6. This must be between 1 and 24 and must divide evenly into 24 - (this is checked for elsewhere). + set to 6. This must be between 1 and 24 and must divide evenly into 24. hod: The hour of the day. An integer. This must be between 0 and 23. For @@ -52,26 +51,45 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): the obs file for the given hour of day. An integer. """ - valid_obtypes = ['CCPA', 'ccpa', 'NOHRSC', 'nohrsc', 'MRMS', 'mrms', 'NDAS', 'ndas'] - if obtype not in valid_obtypes: + valid_obtypes = ['CCPA', 'NOHRSC', 'MRMS', 'NDAS'] + obtype_upper = obtype.upper() + if obtype_upper not in valid_obtypes: msg = dedent(f""" - The specified observation type is not supported: - obtype = {obtype} + The specified observation type (after converting to upper case) is not + supported: + obtype_upper = {obtype_upper} Valid observation types are: - {valid_obtypes} + {valid_obtypes} """) logging.error(msg) raise Exception(msg) + # Ensure that the archive inerval divides evenly into 24 hours. + remainder = 24 % arcv_intvl_hrs + if remainder != 0: + msg = dedent(f""" + The archive interval for obs of type {obtype} must divide evenly into 24 + but doesn't: + arcv_intvl_hrs = {arcv_intvl_hrs} + 24 % arcv_intvl_hrs = {remainder} + """) + logging.error(msg) + raise Exception(msg) + if (hod < 0) or (hod > 23): msg = dedent(f""" - The specified hour-of-day must be between 0 and 23, inclusive but isn't: - hod = {hod} + The specified hour-of-day must be between 0 and 23, inclusive, but isn't: + hod = {hod} """) logging.error(msg) raise Exception(msg) - obtype_upper = obtype.upper() + # Set the archive hour. This depends on the obs type because each obs + # type can organize its observation files into archives in a different + # way, e.g. a cumulative obs type may put the obs files for hours 1 + # through 6 of the day in the archive labeled with hour 6 while an + # instantaneous obs type may put the obs files for hours 0 through 5 of + # the day in the archive labeled with hour 6. if obtype_upper in ['CCPA']: if hod == 0: arcv_hr = 24 @@ -199,20 +217,48 @@ def get_obs(config, obtype, yyyymmdd_task): msg = dedent(f""" The obs availability interval for obs of type {obtype} must divide evenly into 24 but doesn't: - obs_avail_intvl_hrs = {obs_avail_intvl_hrs} - 24 % obs_avail_intvl_hrs = {remainder} + obs_avail_intvl_hrs = {obs_avail_intvl_hrs} + 24 % obs_avail_intvl_hrs = {remainder} """) + logging.error(msg) raise Exception(msg) - # For convenience, get obs availability interval as a datetime object. + # For convenience, convert the obs availability interval to a datetime + # object. obs_avail_intvl = dt.timedelta(hours=obs_avail_intvl_hrs) # Get the base directory for the observations. key = obtype + '_OBS_DIR' obs_dir = config['platform'][key] - # Set the group of fields for each observation type. We assume there is - # a separate obs file type for each such field group in the observations. + # For each observation type, set the group of fields contained in those + # observation files that we need for verification. Each group of fields + # is one that is verified together in the workflow. We assume there is + # a separate set of obs files for each such field group in the observations, + # and in the code below we loop over these sets of files as necessary. + # There are several scenarios to consider: + # + # * An obs type consists of only one set of files containing only one + # field. + # This is the case for CCPA and NOHRSC obs. CCPA obs consist only one + # set of files that contain APCP data, and NOHRSC obs consist of only + # one set of files that contain ASNOW data. + # + # * An obs type consists of more than one set of files, with each file + # containing a different field. + # This is the case for MRMS obs. These consist of two sets of files. + # The first set contains REFC data, and the second contains RETOP data. + # + # * An obs type consists of only one set of files, but each file contains + # multiple groups of fields needed for verification. + # This is the case for NDAS obs. These consist of a single set of files, + # but each file contains both the ADPSFC fields (like 2-m temperature) + # and ADPUPA fields (like 500-mb temperature) that are verified separately + # in the workflow tasks and thus are considered separate field groups. + # + # Other obs type and field group scenarios are also possible, but we do + # not describe them since they are not applicable to any of the obs types + # considered here. if obtype == 'CCPA': field_groups_in_obs = ['APCP'] elif obtype == 'NOHRSC': @@ -225,8 +271,9 @@ def get_obs(config, obtype, yyyymmdd_task): # For each field group in the observations, get the METplus file name # template for the observation files. Then combine these with the base - # directory to get the METplus template for the full path to the processed - # obs files. + # directory to get the METplus template for the full path on disk to + # the processed obs files. If obs files do not already exist at these + # locations, they will be retrieved from HPSS and placed at these locations. fp_proc_templates = [] for fg in field_groups_in_obs: key = 'OBS_' + obtype + '_' + fg + '_FN_TEMPLATE' @@ -241,8 +288,8 @@ def get_obs(config, obtype, yyyymmdd_task): # # For cumulative obs, set the accumulation period to use when getting obs - # files. This is simply (a properly formatted version of) the obs - # availability interval. + # files. This is simply a properly formatted version of the obs availability + # interval. accum_obs_formatted = None if obtype == 'CCPA': accum_obs_formatted = f'{obs_avail_intvl_hrs:02d}' @@ -264,8 +311,8 @@ def get_obs(config, obtype, yyyymmdd_task): else: msg = dedent(f""" Invalid field specified for obs type: - obtype = {obtype} - field = {field} + obtype = {obtype} + field = {field} """) logging.error(msg) raise Exception(msg) @@ -433,19 +480,23 @@ def get_obs(config, obtype, yyyymmdd_task): one_hour = dt.timedelta(hours=1) ushdir = config['user']['USHdir'] - # Check whether any obs files already exist on disk in their processed - # (i.e. final) locations. Here, by "processed" we mean after any renaming - # and rearrangement of files that this script may do to the "raw" files, - # i.e. the files as they are named and arranged within the archive (tar) - # files on HPSS. If so, adjust the starting archive hour. In the process, - # keep a count of the number of obs files that already exist on disk. - num_existing_files = 0 - do_break = False - for yyyymmddhh in obs_retrieve_times_crnt_day: - - for fp_proc_templ in fp_proc_templates: - # Set the full path to the final processed obs file (fp_proc). + # Create dictionary containing the paths to all the processed obs files + # that should exist once this script successfully completes. In this + # dictionary, the keys are the field groups, and the values are lists of + # paths. Here, by "paths to processed files" we mean the paths after any + # renaming and rearrangement of files that this script may do to the "raw" + # files, i.e. the files as they are named and arranged within the archive + # (tar) files on HPSS. + all_fp_proc_dict = {} + for fg, fp_proc_templ in zip(field_groups_in_obs, fp_proc_templates): + all_fp_proc_dict[fg] = [] + for yyyymmddhh in obs_retrieve_times_crnt_day: + # Set the lead hour, i.e. the number of hours from the beginning of the + # day at which the file is valid. lhr = int((yyyymmddhh - yyyymmdd_task)/one_hour) + # Call a bash script to evaluate the template for the full path to the + # file containing METplus timestrings at the current time. This should + # be upgraded to a python script at some point. cmd = '; '.join(['export USHdir=' + ushdir, 'export yyyymmdd_task=' + yyyymmdd_task_str, 'export lhr=' + str(lhr), @@ -453,13 +504,22 @@ def get_obs(config, obtype, yyyymmdd_task): os.path.join(ushdir, 'run_eval_METplus_timestr_tmpl.sh')]) result = subprocess.run(cmd, shell=True, capture_output=True, text=True) fp_proc = result.stdout.strip() + all_fp_proc_dict[fg].append(fp_proc) - # Check whether file already exists. + # Check whether any obs files already exist on disk in their processed + # (i.e. final) locations. If so, adjust the starting archive hour. In + # the process, keep a count of the number of obs files that already exist + # on disk. + num_existing_files = 0 + do_break = False + for fg in field_groups_in_obs: + for yyyymmddhh, fp_proc in zip(obs_retrieve_times_crnt_day, all_fp_proc_dict[fg]): + # Check whether the processed file already exists. if os.path.isfile(fp_proc): num_existing_files += 1 msg = dedent(f""" File already exists on disk: - fp_proc = {fp_proc} + fp_proc = {fp_proc} """) logging.info(msg) else: @@ -467,15 +527,14 @@ def get_obs(config, obtype, yyyymmdd_task): arcv_hr_start = get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod) msg = dedent(f""" File does not exist on disk: - fp_proc = {fp_proc} + fp_proc = {fp_proc} Setting the hour (since hour 0 of the current task day) of the first archive to retrieve to: - arcv_hr_start = {arcv_hr_start} + arcv_hr_start = {arcv_hr_start} """) logging.info(msg) do_break = True break - if do_break: break # If the number of obs files that already exist on disk is equal to the @@ -487,7 +546,7 @@ def get_obs(config, obtype, yyyymmdd_task): msg = dedent(f""" All obs files needed for the current day (yyyymmdd_task) already exist on disk: - yyyymmdd_task = {yyyymmdd_task} + yyyymmdd_task = {yyyymmdd_task} Thus, there is no need to retrieve any files. """) logging.info(msg) @@ -503,14 +562,14 @@ def get_obs(config, obtype, yyyymmdd_task): msg = dedent(f""" At least some obs files needed needed for the current day (yyyymmdd_task) do not exist on disk: - yyyymmdd_task = {yyyymmdd_task} + yyyymmdd_task = {yyyymmdd_task} The number of obs files needed for the current day is: - num_files_needed = {num_files_needed} + num_files_needed = {num_files_needed} The number of obs files that already exist on disk is: - num_existing_files = {num_existing_files} + num_existing_files = {num_existing_files} Will retrieve remaining files by looping over archives corresponding to the following hours (since hour 0 of the current day): - arcv_hrs = {arcv_hrs} + arcv_hrs = {arcv_hrs} """) logging.info(msg) # @@ -617,10 +676,10 @@ def get_obs(config, obtype, yyyymmdd_task): hour 0 of the next day if considering a cumulative obs type) fall in the range spanned by the current {arcv_intvl_hrs}-hourly archive file. The bounds of the data in the current archive are: - arcv_contents_start = {arcv_contents_start} - arcv_contents_end = {arcv_contents_end} + arcv_contents_start = {arcv_contents_start} + arcv_contents_end = {arcv_contents_end} The times at which obs need to be retrieved are: - obs_retrieve_times_crnt_day = {obs_retrieve_times_crnt_day} + obs_retrieve_times_crnt_day = {obs_retrieve_times_crnt_day} """) logging.info(msg) @@ -665,6 +724,18 @@ def get_obs(config, obtype, yyyymmdd_task): result = subprocess.run(cmd, shell=True, capture_output=True, text=True) rc = result.returncode + # Get the list of times corresponding to the obs files in the current + # archive. This is a list of datetime objects. + if obtype == 'CCPA': + obs_times_in_arcv = [yyyymmddhh_arcv - i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + elif obtype == 'NOHRSC': + obs_times_in_arcv = [yyyymmddhh_arcv + i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + elif obtype == 'MRMS': + obs_times_in_arcv = [yyyymmddhh_arcv + i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + elif obtype == 'NDAS': + obs_times_in_arcv = [yyyymmddhh_arcv - (i+1)*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + obs_times_in_arcv.sort() + # Loop over the raw obs files extracted from the current archive and # generate from them the processed obs files. # @@ -685,24 +756,21 @@ def get_obs(config, obtype, yyyymmdd_task): # them in the process. Note that the tm06 file in a given archive contain # more/better observations than the tm00 file in the next archive (their # valid times are equivalent), so we use the tm06 files. - if obtype == 'CCPA': - in_arcv_times = [yyyymmddhh_arcv - i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] - elif obtype == 'NOHRSC': - in_arcv_times = [yyyymmddhh_arcv + i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] - elif obtype == 'MRMS': - in_arcv_times = [yyyymmddhh_arcv + i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] - elif obtype == 'NDAS': - in_arcv_times = [yyyymmddhh_arcv - (i+1)*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] - in_arcv_times.sort() - - for yyyymmddhh in in_arcv_times: + for yyyymmddhh in obs_times_in_arcv: # Create the processed obs file from the raw one (by moving, copying, or # otherwise) only if the time of the current file in the current archive - # also exists in the list of obs retrieval times for the current day. + # also exists in the list of obs retrieval times for the current day. We + # need to check this because it is possible that some of the obs retrieval + # times come before the range of times spanned by the current archive while + # the others come after, but none fall within that range. This can happen + # because the set of archive hours over which we are looping were constructed + # above without considering whether there are obs retrieve time gaps that + # make it unnecessary to retrieve some of the archives between the first + # and last ones that must be retrieved. if yyyymmddhh in obs_retrieve_times_crnt_day: - for i, fp_proc_templ in enumerate(fp_proc_templates): + for i, fg in enumerate(field_groups_in_obs): # For MRMS obs, first select from the set of raw files for the current day # those that are nearest in time to the current hour. Unzip these in a @@ -752,16 +820,10 @@ def get_obs(config, obtype, yyyymmdd_task): fn_raw = 'nam.t' + hh_arcv_str + 'z.prepbufr.tm' + f'{hrs_ago:02d}' + '.nr' fp_raw = os.path.join(arcv_dir_raw, fn_raw) - # Set the full path to the final processed obs file (fp_proc) we want to + # Get the full path to the final processed obs file (fp_proc) we want to # create. - lhr = int((yyyymmddhh - yyyymmdd_task)/one_hour) - cmd = '; '.join(['export USHdir=' + ushdir, - 'export yyyymmdd_task=' + yyyymmdd_task_str, - 'export lhr=' + str(lhr), - 'export METplus_timestr_tmpl=' + fp_proc_templ, - os.path.join(ushdir, 'run_eval_METplus_timestr_tmpl.sh')]) - result = subprocess.run(cmd, shell=True, capture_output=True, text=True) - fp_proc = result.stdout.strip() + indx = obs_retrieve_times_crnt_day.index(yyyymmddhh) + fp_proc = all_fp_proc_dict[fg][indx] # Make sure the directory in which the processed file will be created exists. dir_proc = os.path.dirname(fp_proc) From 2b4c9569bcbb70656ed99edc0c16b4162ea61b6c Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 7 Oct 2024 16:22:52 -0600 Subject: [PATCH 134/260] Minor cleanup. --- scripts/exregional_get_verif_obs.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 158218889e..d1ee4116e8 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -46,8 +46,8 @@ done # #----------------------------------------------------------------------- # -# Make sure the obs type is valid. Then call the python script get_obs.py -# to get the obs files. +# Make sure the obs type is valid. Then call a python script to check +# for the presence of obs files on disk and get them if needed. # #----------------------------------------------------------------------- # @@ -67,7 +67,8 @@ python3 -u ${USHdir}/${script_bn}.py \ --var_defns_path "${GLOBAL_VAR_DEFNS_FP}" \ --obtype ${OBTYPE} \ --obs_day ${PDY}" -print_info_msg "CALLING: ${cmd}" +print_info_msg " +CALLING: ${cmd}" ${cmd} || print_err_msg_exit "Error calling ${script_bn}.py." # #----------------------------------------------------------------------- From a35f240400709838ebda05198891e2fde5e804cc Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 7 Oct 2024 16:32:47 -0600 Subject: [PATCH 135/260] Remove unneeded environment variables. --- parm/wflow/verify_pre.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 220b029412..567f045188 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -29,7 +29,6 @@ task_get_obs_ccpa: command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars - OBS_DIR: '&CCPA_OBS_DIR;' OBTYPE: 'CCPA' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' @@ -41,7 +40,6 @@ task_get_obs_nohrsc: command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars - OBS_DIR: '&NOHRSC_OBS_DIR;' OBTYPE: 'NOHRSC' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' @@ -53,7 +51,6 @@ task_get_obs_mrms: command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars - OBS_DIR: '&MRMS_OBS_DIR;' OBTYPE: 'MRMS' MRMS_FIELDS: 'REFC RETOP' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' @@ -66,7 +63,6 @@ task_get_obs_ndas: command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars - OBS_DIR: '&NDAS_OBS_DIR;' OBTYPE: 'NDAS' queue: "&QUEUE_HPSS;" native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' From 9d7c0478b09e3637ad8e8967cb5ea4f0582030af Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 7 Oct 2024 16:38:18 -0600 Subject: [PATCH 136/260] Move the two sets of variables [CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR and REMOVE_RAW_OBS_[CCPA|NOHRSC|MRMS|NDAS] in the default app configuration file from the "platform" section to the "verification" section so that they are closer to the METplus file name template variables OBS_[...]_FN_TEMPLATE that they are closely coupled with. --- parm/wflow/default_workflow.yaml | 8 +- tests/WE2E/run_WE2E_tests.py | 6 +- ush/config_defaults.yaml | 186 +++++++++++++++---------------- ush/get_obs.py | 4 +- 4 files changed, 99 insertions(+), 105 deletions(-) diff --git a/parm/wflow/default_workflow.yaml b/parm/wflow/default_workflow.yaml index 39b66fc95c..4ffb6f288a 100644 --- a/parm/wflow/default_workflow.yaml +++ b/parm/wflow/default_workflow.yaml @@ -4,7 +4,7 @@ rocoto: entities: ACCOUNT: '{{ user.ACCOUNT }}' - CCPA_OBS_DIR: '{{ platform.CCPA_OBS_DIR }}' + CCPA_OBS_DIR: '{{ verification.CCPA_OBS_DIR }}' COLDSTART: '{{ workflow.COLDSTART }}' COMINgfs: '{{ platform.get("COMINgfs") }}' GLOBAL_VAR_DEFNS_FP: '{{ workflow.GLOBAL_VAR_DEFNS_FP }}' @@ -14,10 +14,10 @@ rocoto: LOAD_MODULES_RUN_TASK: '{{ workflow.LOAD_MODULES_RUN_TASK_FP }} {{ user.MACHINE }}' LOGEXT: ".log" NET: '{{ nco.NET_default }}' - MRMS_OBS_DIR: '{{ platform.MRMS_OBS_DIR }}' + MRMS_OBS_DIR: '{{ verification.MRMS_OBS_DIR }}' NCORES_PER_NODE: '{{ platform.NCORES_PER_NODE }}' - NDAS_OBS_DIR: '{{ platform.NDAS_OBS_DIR }}' - NOHRSC_OBS_DIR: '{{ platform.NOHRSC_OBS_DIR }}' + NDAS_OBS_DIR: '{{ verification.NDAS_OBS_DIR }}' + NOHRSC_OBS_DIR: '{{ verification.NOHRSC_OBS_DIR }}' PARTITION_DEFAULT: '{{ platform.get("PARTITION_DEFAULT") }}' PARTITION_FCST: '{{ platform.get("PARTITION_FCST") }}' PARTITION_HPSS: '{{ platform.get("PARTITION_HPSS") }}' diff --git a/tests/WE2E/run_WE2E_tests.py b/tests/WE2E/run_WE2E_tests.py index d3c2cb98ab..6a3e3bc7f4 100755 --- a/tests/WE2E/run_WE2E_tests.py +++ b/tests/WE2E/run_WE2E_tests.py @@ -207,13 +207,11 @@ def run_we2e_tests(homedir, args) -> None: # obs. If so, and if the config file does not explicitly set the observation locations, # fill these in with defaults from the machine files obs_vars = ['CCPA_OBS_DIR','MRMS_OBS_DIR','NDAS_OBS_DIR','NOHRSC_OBS_DIR'] - if 'platform' not in test_cfg: - test_cfg['platform'] = {} for obvar in obs_vars: mach_path = machine_defaults['platform'].get('TEST_'+obvar) - if not test_cfg['platform'].get(obvar) and mach_path: + if not test_cfg['verification'].get(obvar) and mach_path: logging.debug(f'Setting {obvar} = {mach_path} from machine file') - test_cfg['platform'][obvar] = mach_path + test_cfg['verification'][obvar] = mach_path if args.compiler == "gnu": # 2D decomposition doesn't work with GNU compilers. Deactivate 2D decomposition for GNU diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 8a02964cc2..9750724494 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -276,72 +276,6 @@ platform: # #----------------------------------------------------------------------- # - # Set METplus parameters. Definitions: - # - # CCPA_OBS_DIR: - # User-specified location of the directory where CCPA hourly - # precipitation files used by METplus are located (or, if - # retrieved by the workflow, where they will be placed). See comments - # in file scripts/exregional_get_verif_obs.sh for more details about - # files and directory structure, as well as important caveats about - # errors in the metadata and file names. - # NOTE: Do not set this to the same path as other *_OBS_DIR variables; - # otherwise unexpected results and data loss may occur. - # - # NOHRSC_OBS_DIR: - # User-specified location of top-level directory where NOHRSC 6- and - # 24-hour snowfall accumulation files used by METplus are located (or, - # if retrieved by the workflow, where they will be placed). See comments - # in file scripts/exregional_get_verif_obs.sh for more details about - # files and directory structure - # NOTE: Do not set this to the same path as other *_OBS_DIR variables; - # otherwise unexpected results and data loss may occur. - # - # MRMS_OBS_DIR: - # User-specified location of the directory where MRMS composite - # reflectivity and echo top files used by METplus are located (or, if - # retrieved by the workflow, where they will be placed). See comments - # in the scripts/exregional_get_verif_obs.sh for more details about - # files and directory structure. - # NOTE: Do not set this to the same path as other *_OBS_DIR variables; - # otherwise unexpected results and data loss may occur. - # - # NDAS_OBS_DIR: - # User-specified location of top-level directory where NDAS prepbufr - # files used by METplus are located (or, if retrieved by the workflow, - # where they will be placed). See comments in file - # scripts/exregional_get_verif_obs.sh for more details about files - # and directory structure. - # NOTE: Do not set this to the same path as other *_OBS_DIR variables; - # otherwise unexpected results and data loss may occur. - # - #----------------------------------------------------------------------- - # - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - # - #----------------------------------------------------------------------- - # - # REMOVE_RAW_OBS_DIRS_[CCPA|MRMS|NDAS|NOHRSC]: - # Boolean flag specifying whether to remove the "raw" observation - # directories after pulling the specified type of obs (CCPA, MRMS, - # NDAS, or NOHRSC). The raw directories are the ones in which the - # observation files are placed immediately after pulling them from - # a data store (e.g. NOAA's HPSS) but before performing any processing - # on them (e.g. renaming the files or reorganizing their directory - # structure). - # - #----------------------------------------------------------------------- - # - REMOVE_RAW_OBS_CCPA: true - REMOVE_RAW_OBS_MRMS: true - REMOVE_RAW_OBS_NDAS: true - REMOVE_RAW_OBS_NOHRSC: true - # - #----------------------------------------------------------------------- - # # DOMAIN_PREGEN_BASEDIR: # The base directory containing pregenerated grid, orography, and surface # climatology files. This is an alternative for setting GRID_DIR, @@ -2423,37 +2357,66 @@ verification: # METPLUS_VERBOSITY_LEVEL: 2 # - # Time interval (in hours) at which various types of obs are available on - # NOAA's HPSS. - CCPA_OBS_AVAIL_INTVL_HRS: 1 - NOHRSC_OBS_AVAIL_INTVL_HRS: 6 - MRMS_OBS_AVAIL_INTVL_HRS: 1 - NDAS_OBS_AVAIL_INTVL_HRS: 1 + # [CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR: + # Base directory in which CCPA, NOHRSC, MRMS, or NDAS obs files needed by + # the verification tasks are located. If the files do not exist, they + # will be retrieved and placed under this directory. # - # Templates for CCPA, MRMS, and NDAS observation files. - # - # OBS_CCPA_APCP_FN_TEMPLATE: - # File name template for CCPA accumulated precipitation (APCP) observations. - # This template is used by the workflow tasks that call the METplus PcpCombine - # tool on CCPA obs to find the input observation files containing 1-hour - # APCP and then generate NetCDF files containing either 1-hour or greater - # than 1-hour APCP. - # - # OBS_NOHRSC_ASNOW_FN_TEMPLATE: - # File name template for NOHRSC snow observations. - # - # OBS_MRMS_REFC_FN_TEMPLATE: - # File name template for MRMS reflectivity observations. - # - # OBS_MRMS_RETOP_FN_TEMPLATE: - # File name template for MRMS echo top observations. + # Notes: # - # OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE: - # File name template for NDAS surface and upper air observations. - # This template is used by the workflow tasks that call the METplus Pb2nc - # tool on NDAS obs to find the input observation files containing ADP - # surface (ADPSFC) or ADP upper air (ADPUPA) fields and then generate - # NetCDF versions of these files. + # * If the obs files need to be retrieved (e.g. from NOAA's HPSS), then + # the user must have write permission to this directory. Otherwise, + # the get_obs tasks that attempt to create these files will fail. + # + # * Do not set two or more of these directories to the same location. + # Otherwise, unexpected results and data loss may occur. + # + # * The script ush/get_obs.py contains further details on the files and + # directory structure of each obs type. + # + # * CCPA obs contain errors in the metadata for a certain range of dates + # that need to be corrected during obs retrieval. This is described + # in more detail in ush/get_obs.py. + # + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # + # OBS_[CCPA_APCP|NOHRSC_ASNOW|MRMS_[REFC|RETOP]|NDAS_ADPSFCandADPUPA]_FN_TEMPLATE: + # File name templates for various obs type and vx field group combinations. + # + # Notes: + # + # * These are relative to the obs base directories + # [CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR + # defined above. Thus, the full template to the obs files is given, e.g. + # for CCPA obs, by {CCPA_OBS_DIR}/{OBS_CCPA_APCP_FN_TEMPLATE}. + # + # * These may represent file names only, or they may include relative paths + # before the file names. + # + # * These templates must contain full information about the year, month, + # day, and hour by including METplus time strings that serve as templates + # for this information. Some of this information may be in the relative + # directory portion and the rest in the file name, or there may be no + # relative directory portion and all of it may be in the file name, but + # all four pieces of timing information must be present somewhere in + # this template as METplus time strings. Otherwise, obs files created + # by the get_obs tasks for different days might overwrite each other. + # + # * If one or more of the obs files specified by this full path do not + # exist on disk, all the files will be created by first retrieving "raw" + # versions of them from a data store (e.g. NOAA's HPSS) and then placing + # these raw files in the locations specified by this full path template. + # + # * The raw obs files, i.e. the obs files as they are named and arranged + # in the data stores, may be different than the file path/name specified + # in these variables. The list of templates for raw files to search + # for in the data stores is given in the data retrieval configuration + # file at parm/data_locations.yml. Once retrieved, these raw files are + # renamed and relocated on disk to the locations specified by + # {..._OBS_DIR}/{..._FN_TEMPLATE}. # OBS_CCPA_APCP_FN_TEMPLATE: '{%- set data_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} {{- "{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z." ~ data_intvl_hrs ~ "h.hrap.conus.gb2" }}' @@ -2462,6 +2425,32 @@ verification: OBS_MRMS_REFC_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' OBS_MRMS_RETOP_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE: 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' + # + # Time interval (in hours) at which various types of obs are available on + # NOAA's HPSS. + # + # Note that MRMS files are in fact available every few minutes, but here + # we set the obs availability interval to 1 hour because the forecast + # cannot (yet) support sub-hourly output. + # + CCPA_OBS_AVAIL_INTVL_HRS: 1 + NOHRSC_OBS_AVAIL_INTVL_HRS: 6 + MRMS_OBS_AVAIL_INTVL_HRS: 1 + NDAS_OBS_AVAIL_INTVL_HRS: 1 + # + # REMOVE_RAW_OBS_DIRS_[CCPA|MRMS|NDAS|NOHRSC]: + # Boolean flag specifying whether to remove the "raw" observation + # directories after pulling the specified type of obs (CCPA, NOHRSC, + # MRMS, or NOHRSC). The raw directories are the ones in which the + # observation files are placed immediately after pulling them from + # a data store (e.g. NOAA's HPSS) but before performing any processing + # on them (e.g. renaming the files or reorganizing their directory + # structure). + # + REMOVE_RAW_OBS_CCPA: true + REMOVE_RAW_OBS_MRMS: true + REMOVE_RAW_OBS_NDAS: true + REMOVE_RAW_OBS_NOHRSC: true # # OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: # Template used to specify the names of the output NetCDF observation @@ -2470,6 +2459,13 @@ verification: # APCP, both for 1 hour and for > 1 hour accumulation periods, in NetCDF # format.) # + # OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT: + # Template used to specify the names of the output NetCDF observation + # files generated by the worfklow verification tasks that call the METplus + # PcpCombine tool on NOHRSC observations. (These files will contain obs + # APCP, both for 1 hour and for > 1 hour accumulation periods, in NetCDF + # format.) + # # OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: # Template used to specify the names of the output NetCDF observation # files generated by the worfklow verification tasks that call the diff --git a/ush/get_obs.py b/ush/get_obs.py index 84e49e6f40..b70d8c3ea9 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -229,7 +229,7 @@ def get_obs(config, obtype, yyyymmdd_task): # Get the base directory for the observations. key = obtype + '_OBS_DIR' - obs_dir = config['platform'][key] + obs_dir = vx_config[key] # For each observation type, set the group of fields contained in those # observation files that we need for verification. Each group of fields @@ -603,7 +603,7 @@ def get_obs(config, obtype, yyyymmdd_task): # Whether to remove raw observations after processed directories have # been created from them. key = 'REMOVE_RAW_OBS_' + obtype - remove_raw_obs = config['platform'][key] + remove_raw_obs = vx_config[key] # If the raw directories and files are to be removed at the end of this # script, no need to copy the files since the raw directories are going # to be removed anyway. From 9ac85a30cf2cb20682398c9b488acb160e34ee08 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 7 Oct 2024 17:21:33 -0600 Subject: [PATCH 137/260] Fixes to WE2E test config files to reflect moving of variables from "platform" section to "verification" section in config_defaults.yaml. --- ...g.custom_ESGgrid_Great_Lakes_snow_8km.yaml | 5 +-- ...nsemble_verification_only_vx_time_lag.yaml | 10 ++--- ...7hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml | 37 ++++++++++--------- ...1hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml | 37 ++++++++++--------- ...24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml | 37 ++++++++++--------- ...24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml | 37 ++++++++++--------- ...24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 37 ++++++++++--------- ...24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml | 37 ++++++++++--------- ...96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 37 ++++++++++--------- ...c.init_00z_fcstlen_36hr.winter_wx.SRW.yaml | 37 ++++++++++--------- 10 files changed, 159 insertions(+), 152 deletions(-) diff --git a/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml b/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml index d773c632e2..0caffe5a46 100644 --- a/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml +++ b/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml @@ -55,10 +55,9 @@ task_run_fcst: task_run_post: POST_OUTPUT_DOMAIN_NAME: custom_ESGgrid_Michigan_Ontario verification: - VX_FCST_MODEL_NAME: Michigan_Ontario_snow_8km - VX_FIELDS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA", "ASNOW" ] -platform: CCPA_OBS_DIR: '{{ workflow.EXPTDIR }}/CCPA_obs' MRMS_OBS_DIR: '{{ workflow.EXPTDIR }}/MRMS_obs' NDAS_OBS_DIR: '{{ workflow.EXPTDIR }}/NDAS_obs' NOHRSC_OBS_DIR: '{{ workflow.EXPTDIR }}/NOHRSC_obs' + VX_FCST_MODEL_NAME: Michigan_Ontario_snow_8km + VX_FIELDS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA", "ASNOW" ] diff --git a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml index d0edccca01..f26ae7db21 100644 --- a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml @@ -28,14 +28,14 @@ global: NUM_ENS_MEMBERS: 2 ENS_TIME_LAG_HRS: '[ 0, 12 ]' -# If the following is commented out, then the obs files staged on each -# platform will be (found and) used. -platform: + +verification: + # If the following is commented out, then the obs files staged on each + # platform will be (found and) used. CCPA_OBS_DIR: '{{ workflow.EXPTDIR }}/obs_data/ccpa/proc' MRMS_OBS_DIR: '{{ workflow.EXPTDIR }}/obs_data/mrms/proc' NDAS_OBS_DIR: '{{ workflow.EXPTDIR }}/obs_data/ndas/proc' - -verification: + # VX_FCST_MODEL_NAME: FV3_GFS_v15p2_CONUS_25km VX_FCST_INPUT_BASEDIR: '{{ platform.get("TEST_VX_FCST_INPUT_BASEDIR") }}' VX_NDIGITS_ENSMEM_NAMES: 1 diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml index 418e47e95e..3286066021 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml @@ -15,24 +15,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. - # Note that when performing WE2E tests, the default behavior is not to - # get obs files from HPSS but to use staged obs files. This is done by - # setting these variables to the (platform-specific) locations of these - # staged files. To force the WE2E testing system to get the obs from - # HPSS, here we reset these variables to their default values in the SRW - # workflow configuration file config_defaults.yaml. - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - # Do not remove raw obs files to be able to verify that only the necessary - # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false workflow: PREEXISTING_DIR_METHOD: rename @@ -54,6 +36,25 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml index 913d5093bb..3963a616b4 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml @@ -17,24 +17,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. - # Note that when performing WE2E tests, the default behavior is not to - # get obs files from HPSS but to use staged obs files. This is done by - # setting these variables to the (platform-specific) locations of these - # staged files. To force the WE2E testing system to get the obs from - # HPSS, here we reset these variables to their default values in the SRW - # workflow configuration file config_defaults.yaml. - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - # Do not remove raw obs files to be able to verify that only the necessary - # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false workflow: PREEXISTING_DIR_METHOD: rename @@ -56,6 +38,25 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml index a859a03ac8..23035f3a92 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml @@ -15,24 +15,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. - # Note that when performing WE2E tests, the default behavior is not to - # get obs files from HPSS but to use staged obs files. This is done by - # setting these variables to the (platform-specific) locations of these - # staged files. To force the WE2E testing system to get the obs from - # HPSS, here we reset these variables to their default values in the SRW - # workflow configuration file config_defaults.yaml. - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - # Do not remove raw obs files to be able to verify that only the necessary - # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false workflow: PREEXISTING_DIR_METHOD: rename @@ -54,6 +36,25 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml index 2e180e2714..10ceddd9a8 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml @@ -17,24 +17,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. - # Note that when performing WE2E tests, the default behavior is not to - # get obs files from HPSS but to use staged obs files. This is done by - # setting these variables to the (platform-specific) locations of these - # staged files. To force the WE2E testing system to get the obs from - # HPSS, here we reset these variables to their default values in the SRW - # workflow configuration file config_defaults.yaml. - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - # Do not remove raw obs files to be able to verify that only the necessary - # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false workflow: PREEXISTING_DIR_METHOD: rename @@ -56,6 +38,25 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml index 37c3eceb24..c4f62a679d 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -16,24 +16,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. - # Note that when performing WE2E tests, the default behavior is not to - # get obs files from HPSS but to use staged obs files. This is done by - # setting these variables to the (platform-specific) locations of these - # staged files. To force the WE2E testing system to get the obs from - # HPSS, here we reset these variables to their default values in the SRW - # workflow configuration file config_defaults.yaml. - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - # Do not remove raw obs files to be able to verify that only the necessary - # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false workflow: PREEXISTING_DIR_METHOD: rename @@ -55,6 +37,25 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml index 563b8852a8..55cbf5b13f 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml @@ -15,24 +15,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. - # Note that when performing WE2E tests, the default behavior is not to - # get obs files from HPSS but to use staged obs files. This is done by - # setting these variables to the (platform-specific) locations of these - # staged files. To force the WE2E testing system to get the obs from - # HPSS, here we reset these variables to their default values in the SRW - # workflow configuration file config_defaults.yaml. - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - # Do not remove raw obs files to be able to verify that only the necessary - # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false workflow: PREEXISTING_DIR_METHOD: rename @@ -54,6 +36,25 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml index 514dbed8d3..20cab966ef 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -17,24 +17,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. - # Note that when performing WE2E tests, the default behavior is not to - # get obs files from HPSS but to use staged obs files. This is done by - # setting these variables to the (platform-specific) locations of these - # staged files. To force the WE2E testing system to get the obs from - # HPSS, here we reset these variables to their default values in the SRW - # workflow configuration file config_defaults.yaml. - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - # Do not remove raw obs files to be able to verify that only the necessary - # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false workflow: PREEXISTING_DIR_METHOD: rename @@ -56,6 +38,25 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml index 6069ce8212..10ff318dd9 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml @@ -16,24 +16,6 @@ user: platform: EXTRN_MDL_DATA_STORES: hpss - # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. - # Note that when performing WE2E tests, the default behavior is not to - # get obs files from HPSS but to use staged obs files. This is done by - # setting these variables to the (platform-specific) locations of these - # staged files. To force the WE2E testing system to get the obs from - # HPSS, here we reset these variables to their default values in the SRW - # workflow configuration file config_defaults.yaml. - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - # Do not remove raw obs files to be able to verify that only the necessary - # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false workflow: PREEXISTING_DIR_METHOD: rename @@ -56,6 +38,25 @@ task_run_post: POST_OUTPUT_DOMAIN_NAME: 'custom_ESGgrid_Michigan_Ontario' verification: + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the requied obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # VX_FCST_MODEL_NAME: 'Michigan_Ontario_snow_8km' VX_FIELDS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/SRW' From 09f8531580c6eee8f806aa6cb8bb99f110bdd7aa Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 7 Oct 2024 19:51:05 -0600 Subject: [PATCH 138/260] Fix bug found in latest develop branch. --- ush/get_crontab_contents.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/get_crontab_contents.py b/ush/get_crontab_contents.py index 5c651f3b0c..82bb350a0e 100644 --- a/ush/get_crontab_contents.py +++ b/ush/get_crontab_contents.py @@ -224,7 +224,7 @@ def _parse_args(argv): ) # Check that inputs are correct and consistent - args = parser._parse_args(argv) + args = parser.parse_args(argv) if args.remove: if args.line is None: From b43a9d223b5054e74dd7e0f6d2a3e89e78ed9574 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 8 Oct 2024 10:15:22 -0600 Subject: [PATCH 139/260] Fix up documentation and comments. Minor code changes. --- ush/config_defaults.yaml | 3 +- ush/get_obs.py | 465 ++++++++++++++++++++++----------------- 2 files changed, 263 insertions(+), 205 deletions(-) diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index e50e51406d..1e967ef9e4 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2419,7 +2419,8 @@ verification: # NOAA's HPSS. # # Note that MRMS files are in fact available every few minutes, but here - # we set the obs availability interval to 1 hour because the forecast + # we set the obs availability interval to 1 hour because currently that + # is the shortest output interval for the forecast, i.e. the forecast # cannot (yet) support sub-hourly output. # CCPA_OBS_AVAIL_INTVL_HRS: 1 diff --git a/ush/get_obs.py b/ush/get_obs.py index b70d8c3ea9..c831ad909e 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -110,92 +110,184 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): def get_obs(config, obtype, yyyymmdd_task): """ -This script performs several important tasks for preparing data for -verification tasks. Depending on the value of the environment variable -OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data -set. + This script checks for the existence of obs files of the specified type + at the locations specified by variables in the SRW App's configuration + file. If one or more of these files do not exist, it retrieves them from + a data store and places them in the locations specified by the configuration + variables, renaming them if necessary. -If data is not available on disk (in the location specified by -CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), -the script attempts to retrieve the data from HPSS using the retrieve_data.py -script. Depending on the data set, there are a few strange quirks and/or -bugs in the way data is organized; see in-line comments for details. - - -CCPA (Climatology-Calibrated Precipitation Analysis) precipitation accumulation obs ----------- -If data is available on disk, it must be in the following -directory structure and file name conventions expected by verification -tasks: - -{CCPA_OBS_DIR}/{YYYYMMDD}/ccpa.t{HH}z.01h.hrap.conus.gb2 - -If data is retrieved from HPSS, it will be automatically staged by this -script. - -Notes about the data and how it's used for verification: - -1. Accumulation is currently hardcoded to 01h. The verification will -use MET/pcp-combine to sum 01h files into desired accumulations. - -2. There is a problem with the valid time in the metadata for files -valid from 19 - 00 UTC (or files under the '00' directory). This is -accounted for in this script for data retrieved from HPSS, but if you -have manually staged data on disk you should be sure this is accounted -for. See in-line comments below for details. - - -MRMS (Multi-Radar Multi-Sensor) radar observations ----------- -If data is available on disk, it must be in the following -directory structure and file name conventions expected by verification -tasks: - -{MRMS_OBS_DIR}/{YYYYMMDD}/[PREFIX]{YYYYMMDD}-{HH}0000.grib2, - -Where [PREFIX] is MergedReflectivityQCComposite_00.50_ for reflectivity -data and EchoTop_18_00.50_ for echo top data. If data is not available -at the top of the hour, you should rename the file closest in time to -your hour(s) of interest to the above naming format. A script -"ush/mrms_pull_topofhour.py" is provided for this purpose. - -If data is retrieved from HPSS, it will automatically staged by this -this script. - - -NDAS (NAM Data Assimilation System) conventional observations ----------- -If data is available on disk, it must be in the following -directory structure and file name conventions expected by verification -tasks: - -{NDAS_OBS_DIR}/{YYYYMMDD}/prepbufr.ndas.{YYYYMMDDHH} - -Note that data retrieved from HPSS and other sources may be in a -different format: nam.t{hh}z.prepbufr.tm{prevhour}.nr, where hh is -either 00, 06, 12, or 18, and prevhour is the number of hours prior to -hh (00 through 05). If using custom staged data, you will have to -rename the files accordingly. - -If data is retrieved from HPSS, it will be automatically staged by this -this script. - - -NOHRSC snow accumulation observations ----------- -If data is available on disk, it must be in the following -directory structure and file name conventions expected by verification -tasks: - -{NOHRSC_OBS_DIR}/{YYYYMMDD}/sfav2_CONUS_{AA}h_{YYYYMMDD}{HH}_grid184.grb2 + Args: + config: + The final configuration dictionary (obtained from var_defns.yaml). -where AA is the 2-digit accumulation duration in hours: 06 or 24 + obtype: + The observation type. A string. -METplus is configured to verify snowfall using 06- and 24-h accumulated -snowfall from 6- and 12-hourly NOHRSC files, respectively. + yyyymmdd_task: + The date for which obs may be needed. A datetime object. -If data is retrieved from HPSS, it will automatically staged by this -this script. + Returns: + True if all goes well. + + + Detailed Description: + + In this script, the main (outer) loop to obtain obs files is over a + sequence of archive hours, where each archive hour in the sequence + represents one archive (tar) file in the data store, and archive hours + are with respect to hour 0 of the day. The number of archive hours in + this sequence depends on how the obs files are arranged into archives + for the given obs type. For example, if the obs files for a given day + are arranged into four archives, then the archive interval is 6 hours, + and in order to get all the obs files for that day, the loop must + iterate over a sequence of 4 hours, either [0, 6, 12, 18] or [6, 12, + 18, 24] (which of these it will be depends on how the obs files are + arranged into the archives). + + Below, we give a description of archive layout for each obs type and + give the archive hours to loop over for the case in which we need to + obtain all available obs for the current day. + + + CCPA (Climatology-Calibrated Precipitation Analysis) precipitation + accumulation obs: + ---------- + For CCPA, the archive interval is 6 hours, i.e. the obs files are bundled + into 6-hourly archives. The archives are organized such that each one + contains 6 files, so that the obs availability interval is + + obs_avail_intvl_hrs = (24 hrs)/[(4 archives)*(6 files/archive)] + = 1 hr/file + + i.e. there is one obs file for each hour of the day containing the + accumulation over that one hour. The archive corresponding to hour 0 + of the current day contains 6 files representing accumulations during + the 6 hours of the previous day. The archive corresponding to hour 6 + of the current day contains 6 files for the accumulations during the + first 6 hours of the current day, and the archives corresponding to + hours 12 and 18 of the current day each contain 6 files for accumulations + during hours 6-12 and 12-18, respectively, of the current day. Thus, + to obtain all the one-hour accumulations for the current day, we must + extract all the obs files from the three archives corresponding to hours + 6, 12, and 18 of the current day and from the archive corresponding to + hour 0 of the next day. This corresponds to an archive hour sequence + of [6, 12, 18, 24]. Thus, in the simplest case in which the observation + retrieval times include all hours of the current task's day at which + obs files are available and none of the obs files for this day already + exist on disk, this sequence will be [6, 12, 18, 24]. In other cases, + the sequence we loop over will be a subset of [6, 12, 18, 24]. + + Note that CCPA files for 1-hour accumulation have incorrect metadata in + the files under the "00" directory (i.e. for hours-of-day 19 to 00 of + the next day) from 20180718 to 20210504. This script corrects these + errors if getting CCPA obs at these times. + + + NOHRSC (National Operational Hydrologic Remote Sensing Center) snow + accumulation observations: + ---------- + For NOHRSC, the archive interval is 24 hours, i.e. the obs files are + bundled into 24-hourly archives. The archives are organized such that + each one contains 4 files, so that the obs availability interval is + + obs_avail_intvl_hrs = (24 hrs)/[(1 archive)*(4 files/archive)] + = 6 hr/file + + i.e. there is one obs file for each 6-hour interval of the day containing + the accumulation over those 6 hours. The 4 obs files within each archive + correspond to hours 0, 6, 12, and 18 of the current day. The obs file + for hour 0 contains accumulations during the last 6 hours of the previous + day, while those for hours 6, 12, and 18 contain accumulations for the + first, second, and third 6-hour chunks of the current day. Thus, to + obtain all the 6-hour accumulations for the current day, we must extract + from the archive for the current day the obs files for hours 6, 12, and + 18 and from the archive for the next day the obs file for hour 0. This + corresponds to an archive hour sequence of [0, 24]. Thus, in the simplest + case in which the observation retrieval times include all hours of the + current task's day at which obs files are available and none of the obs + files for this day already exist on disk, this sequence will be [0, 24]. + In other cases, the sequence we loop over will be a subset of [0, 24]. + + + MRMS (Multi-Radar Multi-Sensor) radar observations: + ---------- + For MRMS, the archive interval is 24 hours, i.e. the obs files are + bundled into 24-hourly archives. The archives are organized such that + each contains gzipped grib2 files for that day that are usually only a + few minutes apart. However, since the forecasts cannot (yet) perform + sub-hourly output, we filter this data in time by using only those obs + files that are closest to each hour of the day for which obs are needed. + This effectively sets the obs availability interval for MRMS to one + hour, i.e. + + obs_avail_intvl_hrs = 1 hr/file + + i.e. there is one obs file for each hour of the day containing values + at that hour (but only after filtering in time; also see notes for + MRMS_OBS_AVAIL_INTVL_HRS in config_defaults.yaml). Thus, to obtain the + obs at all hours of the day, we only need to extract files from one + archive. Thus, in the simplest case in which the observation retrieval + times include all hours of the current task's day at which obs files + are available and none of the obs files for this day already exist on + disk, the sequence of archive hours over which we loop will be just + [0]. Note that: + + * For cases in which MRMS data are not needed for all hours of the day, + we still need to retrieve and extract from this single daily archive. + Thus, the archive hour sequence over which we loop over will always + be just [0] for MRMS obs. + + * Because MRMS obs are split into two sets of archives -- one for + composite reflectivity (REFC) and another for echo top (RETOP) -- + on any given day (and with an archive hour of 0) we actually retrive + and extract two different archive files (one per field). + + + NDAS (NAM Data Assimilation System) conventional observations: + ---------- + For NDAS, the archive interval is 6 hours, i.e. the obs files are + bundled into 6-hourly archives. The archives are organized such that + each one contains 7 files (not say 6). The archive associated with + time yyyymmddhh_arcv contains the hourly files at + + yyyymmddhh_arcv - 6 hours + yyyymmddhh_arcv - 5 hours + ... + yyyymmddhh_arcv - 2 hours + yyyymmddhh_arcv - 1 hours + yyyymmddhh_arcv - 0 hours + + These are known as the tm06, tm05, ..., tm02, tm01, and tm00 files, + respectively. Thus, the tm06 file from the current archive, say the + one associated with time yyyymmddhh_arcv, has the same valid time as + the tm00 file from the previous archive, i.e. the one associated with + time (yyyymmddhh_arcv - 6 hours). It turns out that the tm06 file from + the current archive contains more/better observations than the tm00 + file from the previous archive. Thus, for a given archive time + yyyymmddhh_arcv, we use 6 of the 7 files at tm06, ..., tm01 but not + the one at tm00, effectively resulting in 6 files per archive for NDAS + obs. The obs availability interval is then + + obs_avail_intvl_hrs = (24 hrs)/[(4 archives)*(6 files/archive)] + = 1 hr/file + + i.e. there is one obs file for each hour of the day containing values + at that hour. The archive corresponding to hour 0 of the current day + contains 6 files valid at hours 18 through 23 of the previous day. The + archive corresponding to hour 6 of the current day contains 6 files + valid at hours 0 through 5 of the current day, and the archives + corresponding to hours 12 and 18 of the current day each contain 6 + files valid at hours 6 through 11 and 12 through 17 of the current day. + Thus, to obtain all the hourly values for the current day (from hour + 0 to hour 23), we must extract the 6 obs files (excluding the tm00 + ones) from the three archives corresponding to hours 6, 12, and 18 of + the current day and the archive corresponding to hour 0 of the next + day. This corresponds to an archive hour sequence set below of [6, 12, + 18, 24]. Thus, in the simplest case in which the observation retrieval + times include all hours of the current task's day at which obs files + are available and none of the obs files for this day already exist on + disk, this sequence will be [6, 12, 18, 24]. In other cases, the + sequence we loop over will be a subset of [6, 12, 18, 24]. """ # Convert obtype to upper case to simplify code below. @@ -355,100 +447,6 @@ def get_obs(config, obtype, yyyymmdd_task): # To generate this sequence, we first set the archive interval and then # set the starting and ending archive hour values. # - # For CCPA, the archive interval is 6 hours, i.e. the obs files are - # bundled into 6-hourly archives. This implies 4 archives per day. The - # archives are organized such that each one contains 6 files, so that the - # obs availability interval is - # - # obs_avail_intvl_hrs = (24 hrs)/[(4 archives)*(6 files/archive)] - # = 1 hr/file - # - # i.e. there is one obs file for each hour of the day containing the - # accumulation over that one hour. The archive corresponding to hour 0 - # of the current day contains 6 files representing accumulations during - # the 6 hours of the previous day. The archive corresponding to hour 6 - # of the current day corresponds to accumulations during the first 6 - # hours of the current day, and the archives corresponding to hours 12 - # and 18 of the current day correspond to accumulations during the 2nd - # and 3rd 6-hourly intervals of the current day. Thus, to obtain all the - # one-hour accumulations for the current day, we must extract all the obs - # files from the archives corresponding to hours 6, 12, and 18 of the - # current day and hour 0 of the next day. This corresponds to an archive - # hour sequence set below of [6, 12, 18, 24]. Thus, in the simplest case - # in which the observation retrieval times include all hours of the - # current task's day at which obs files are available and none of the obs - # files for this day already exist on disk, this sequence will be [6, 12, - # 18, 24]. In other cases, the sequence we loop over will be a subset of - # [6, 12, 18, 24]. - # - # For NOHRSC, the archive interval is 24 hours, i.e. the obs files are - # bundled into 24-hourly archives. This implies just 1 archive per day. - # The archives are organized such that each one contains 4 files, so that - # the obs availability interval is - # - # obs_avail_intvl_hrs = (24 hrs)/[(1 archive)*(4 files/archive)] - # = 6 hr/file - # - # i.e. there is one obs file for each 6-hour interval of the day containing - # the accumulation over those 6 hours. The 4 obs files within each archive - # correspond to hours 0, 6, 12, and 18 of the current day. The obs file - # for hour 0 contains accumulations during the last 6 hours of the previous - # day, while those for hours 6, 12, and 18 contain accumulations for the - # first, second, and third 6-hour chunks of the current day. Thus, to - # obtain all the 6-hour accumulations for the current day, we must extract - # from the archive for the current day the obs files for hours 6, 12, and - # 18 and from the archive for the next day the obs file for hour 0. This - # corresponds to an archive hour sequence set below of [0, 24]. Thus, in - # the simplest case in which the observation retrieval times include all - # hours of the current task's day at which obs files are available and - # none of the obs files for this day already exist on disk, this sequence - # will be [0, 24]. In other cases, the sequence we loop over will be a - # subset of [0, 24]. - # - # For NDAS, the archive interval is 6 hours, i.e. the obs files are - # bundled into 6-hourly archives. This implies 4 archives per day. The - # archives are organized such that each one contains 7 files (not say 6). - # The archive associated with time yyyymmddhh_arcv contains the hourly - # files at - # - # yyyymmddhh_arcv - 6 hours - # yyyymmddhh_arcv - 5 hours - # ... - # yyyymmddhh_arcv - 2 hours - # yyyymmddhh_arcv - 1 hours - # yyyymmddhh_arcv - 0 hours - # - # These are known as the tm06, tm05, ..., tm02, tm01, and tm00 files, - # respectively. Thus, the tm06 file from the current archive, say the - # one associated with time yyyymmddhh_arcv, has the same valid time as - # the tm00 file from the previous archive, i.e. the one associated with - # time (yyyymmddhh_arcv - 6 hours). It turns out the tm06 file from the - # current archive contains more/better observations than the tm00 file - # from the previous archive. Thus, for a given archive time yyyymmddhh_arcv, - # we use 6 of the 7 files at tm06, ..., tm01 but not the one at tm00, - # effectively resulting in an 6 files per archive for NDAS obs. The obs - # availability interval is then - # - # obs_avail_intvl_hrs = (24 hrs)/[(4 archives)*(6 files/archive)] - # = 1 hr/file - # - # i.e. there is one obs file for each hour of the day containing values - # at that hour. The archive corresponding to hour 0 of the current day - # contains 6 files valid at hours 18 through 23 of the previous day. The - # archive corresponding to hour 6 of the current day contains 6 files - # valid at hours 0 through 5 of the current day, and the archives - # corresponding to hours 12 and 18 of the current day each contain 6 - # files valid at hours 6 through 11 and 12 through 17 of the current day. - # Thus, to obtain all the hourly values for the current day (from hour - # 0 to hour 23), we must extract the 6 obs files (excluding the tm00 - # ones) from the archives corresponding to hours 6, 12, and 18 of the - # current day and the archive corresponding to hour 0 of the next day. - # This corresponds to an archive hour sequence set below of [6, 12, 18, - # 24]. Thus, in the simplest case in which the observation retrieval - # times include all hours of the current task's day at which obs files - # are available and none of the obs files for this day already exist on - # disk, this sequence will be [6, 12, 18, 24]. In other cases, the - # sequence we loop over will be a subset of [6, 12, 18, 24]. # #----------------------------------------------------------------------- # @@ -628,24 +626,62 @@ def get_obs(config, obtype, yyyymmdd_task): yyyymmddhh_arcv_str = dt.datetime.strftime(yyyymmddhh_arcv, '%Y%m%d%H') yyyymmdd_arcv_str = dt.datetime.strftime(yyyymmddhh_arcv, '%Y%m%d') - # Directory that will contain the files retrieved from the current archive - # file. We refer to this as the "raw" archive directory because it will - # contain the files as they are in the archive before any processing by - # this script. + # Set the subdirectory under the raw base directory that will contain the + # files retrieved from the current archive. We refer to this as the "raw" + # archive sudirectory because it will contain the files as they are in + # the archive before any processing by this script. Later below, this + # will be combined with the raw base directory (whose name depends on the + # year, month, and day of the current obs day) to obtain the full path to + # the raw archive directory (arcv_dir_raw). + # + # Notes on each obs type: + # + # CCPA: + # The raw subdirectory name must include the year, month, day, and hour + # in order to avoid get_obs tasks for different days clobbering each + # others' obs files. + # + # NOHRSC: + # The hour-of-day of the archive is irrelevant because there is only one + # archive per day, so we don't include it in the raw archive subdirectory's + # name. However, we still need a subdirectory that contains the year, + # month, and day information of the archive because in the simplest case + # of having to get the NOHRSC obs for all hours of the current obs day, + # we need to extract obs files from two archives -- one for the current + # day (which includes the files for accumulations over hours 0-6, 6-12, + # and 12-18 of the current day) and another for the next day (which + # includes the file for accumulations over hours 18-24 of the current + # day). To distinguish between the raw obs files from these two archives, + # we create an archive-time dependent raw subdirectory for each possible + # archive. + # + # MRMS: + # There is only one archive per day, and it contains all the raw obs + # files needed to generate processed obs files for all hours of the + # current day. Thus, we will only ever need this one archive, so there + # is no need to include the archive's hour information (there really + # isn't any) in the raw subdirectory name. In addition, the archive's + # year, month, and day is the same as that of the obs day's, so it is + # already included in the name of the raw base directory. Sine this is + # the only info we need to avoid differnt get_obs tasks clobbering each + # other's output obs files, for simplicity we simply do not create a raw + # archive subdirectory. + # + # NDAS: + # Same as for CCPA. if obtype == 'CCPA': - arcv_dir_raw = os.path.join(basedir_raw, yyyymmddhh_arcv_str) - # For NOHRSC, the hour-of-day for the archive is irrelevant since there - # is only one archive per day, so don't include it in the raw archive - # directory's name. + arcv_subdir_raw = yyyymmddhh_arcv_str elif obtype == 'NOHRSC': - arcv_dir_raw = os.path.join(basedir_raw, yyyymmdd_arcv_str) - # Since for MRMS data there is only one archive per day, that directory - # is redundant, so for simplicity we set arcv_dir_raw to just basedir_raw. + arcv_subdir_raw = yyyymmdd_arcv_str elif obtype == 'MRMS': - arcv_dir_raw = basedir_raw + arcv_subdir_raw = '' elif obtype == 'NDAS': arcv_dir_raw = os.path.join(basedir_raw, yyyymmddhh_arcv_str) + # Combine the raw archive base directory with the raw archive subdirectory + # name to obtain the full path to the raw archive directory. + arcv_dir_raw = os.path.join(basedir_raw, arcv_subdir_raw) + # Check whether any of the obs retrieval times for the day associated with # this task fall in the time interval spanned by the current archive. If # so, set the flag (do_retrieve) to retrieve the files in the current @@ -739,23 +775,44 @@ def get_obs(config, obtype, yyyymmdd_task): # Loop over the raw obs files extracted from the current archive and # generate from them the processed obs files. # - # For CCPA obs, for most dates this consists of simply copying or moving - # the files from the raw archive directory to the processed directory, - # possibly renaming them in the process. However, for dates between - # 20180718 and 20210504 and hours-of-day 19 through the end of the day - # (i.e. hour 0 of the next day), it involves using wgrib2 to correct an + # Notes on each obs type: + # + # CCPA: + # For most dates, generating the processed obs files consists of simply + # copying or moving the files from the raw archive directory to the processed + # directory, possibly renaming them in the process. However, for dates + # between 20180718 and 20210504 and hours-of-day 19 through the end of the + # day (i.e. hour 0 of the next day), it involves using wgrib2 to correct an # error in the metadata of the raw file and writing the corrected data # to a new grib2 file in the processed location. - # - # For NOHRSC obs, this consists of simply copying or moving the files from - # the raw archive directory to the processed directory, possibly renaming - # them in the process. - # - # For NDAS obs, this consists of simply copying or moving the files from - # the raw archive directory to the processed directory, possibly renaming - # them in the process. Note that the tm06 file in a given archive contain - # more/better observations than the tm00 file in the next archive (their - # valid times are equivalent), so we use the tm06 files. + # + # NOHRSC: + # Generating the processed obs files consists of simply copying or moving + # the files from the raw archive directory to the processed directory, + # possibly renaming them in the process. + # + # MRMS: + # The MRMS obs are in fact available every few minutes, but the smallest + # value we allow the obs availability interval to be set to is 1 hour + # because the forecasts cannot (yet) perform sub-hourly output (also see + # notes for MRMS_OBS_AVAIL_INTVL_HRS in config_defaults.yaml). For this + # reason, MRMS obs require an extra processing step on the raw files (before + # creating the processed files). In this step, at each obs retrieval time + # we first generate an intermediate grib2 file from the set of all raw (and + # gzipped) grib2 files for the current day (the latter usually being only a + # few minutes apart) the file that is nearest in time to the obs retrieval + # time. After selecting this gzipped grib2 file, we unzip it and place it + # in a temporary subdirectory under the raw base directory. Only after this + # step do we then generate the processed file by moving this intermediate + # file to the processed directory, possibly renaming it in the process. + # + # NDAS: + # Generating the processed obs files consists of simply copying or moving + # the files from the raw archive directory to the processed directory, + # possibly renaming them in the process. Note that for a given NDAS archive, + # the tm06 file in a contains more/better observations than the tm00 file + # in the previous archive (their valid times being equivalent), so we always + # use the tm06 files. for yyyymmddhh in obs_times_in_arcv: # Create the processed obs file from the raw one (by moving, copying, or @@ -870,7 +927,7 @@ def get_obs(config, obtype, yyyymmdd_task): def parse_args(argv): - """Parse command line arguments""" + """Parse command line arguments.""" parser = argparse.ArgumentParser( description="Get observations." ) From 50729f5dc02a20c982653a97025bbdfecd256d90 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 8 Oct 2024 10:56:36 -0600 Subject: [PATCH 140/260] Bug fix. --- ush/get_obs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/get_obs.py b/ush/get_obs.py index c831ad909e..d7833a70f7 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -676,7 +676,7 @@ def get_obs(config, obtype, yyyymmdd_task): elif obtype == 'MRMS': arcv_subdir_raw = '' elif obtype == 'NDAS': - arcv_dir_raw = os.path.join(basedir_raw, yyyymmddhh_arcv_str) + arcv_subdir_raw = yyyymmddhh_arcv_str # Combine the raw archive base directory with the raw archive subdirectory # name to obtain the full path to the raw archive directory. From 601284359644fd5547d91be0dc6c828bfa26814a Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 8 Oct 2024 11:30:29 -0600 Subject: [PATCH 141/260] Remove commented-out lines; remove trailing whitespace. --- parm/metplus/PcpCombine.conf | 1 - ush/get_obs.py | 76 ++++++++++++++++++------------------ 2 files changed, 38 insertions(+), 39 deletions(-) diff --git a/parm/metplus/PcpCombine.conf b/parm/metplus/PcpCombine.conf index de99871bed..04562dc14b 100644 --- a/parm/metplus/PcpCombine.conf +++ b/parm/metplus/PcpCombine.conf @@ -126,7 +126,6 @@ FCST_PCP_COMBINE_RUN = False # # Accumulation interval available in the input data. # -#{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_ACCUMS = 01 {{FCST_OR_OBS}}_PCP_COMBINE_INPUT_ACCUMS = {{input_accum_hh}} # # Accumulation interval to generate in the output file. diff --git a/ush/get_obs.py b/ush/get_obs.py index d7833a70f7..50b7c45ae3 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -42,7 +42,7 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): hod: The hour of the day. An integer. This must be between 0 and 23. For - cumulative fields (CCPA and NOHRSC), hour 0 is treated as that of the + cumulative fields (CCPA and NOHRSC), hour 0 is treated as that of the next day, i.e. as the 24th hour of the current day. Returns: @@ -132,7 +132,7 @@ def get_obs(config, obtype, yyyymmdd_task): Detailed Description: - In this script, the main (outer) loop to obtain obs files is over a + In this script, the main (outer) loop to obtain obs files is over a sequence of archive hours, where each archive hour in the sequence represents one archive (tar) file in the data store, and archive hours are with respect to hour 0 of the day. The number of archive hours in @@ -143,22 +143,22 @@ def get_obs(config, obtype, yyyymmdd_task): iterate over a sequence of 4 hours, either [0, 6, 12, 18] or [6, 12, 18, 24] (which of these it will be depends on how the obs files are arranged into the archives). - + Below, we give a description of archive layout for each obs type and give the archive hours to loop over for the case in which we need to obtain all available obs for the current day. - - + + CCPA (Climatology-Calibrated Precipitation Analysis) precipitation accumulation obs: ---------- For CCPA, the archive interval is 6 hours, i.e. the obs files are bundled into 6-hourly archives. The archives are organized such that each one contains 6 files, so that the obs availability interval is - + obs_avail_intvl_hrs = (24 hrs)/[(4 archives)*(6 files/archive)] = 1 hr/file - + i.e. there is one obs file for each hour of the day containing the accumulation over that one hour. The archive corresponding to hour 0 of the current day contains 6 files representing accumulations during @@ -176,23 +176,23 @@ def get_obs(config, obtype, yyyymmdd_task): obs files are available and none of the obs files for this day already exist on disk, this sequence will be [6, 12, 18, 24]. In other cases, the sequence we loop over will be a subset of [6, 12, 18, 24]. - + Note that CCPA files for 1-hour accumulation have incorrect metadata in the files under the "00" directory (i.e. for hours-of-day 19 to 00 of the next day) from 20180718 to 20210504. This script corrects these errors if getting CCPA obs at these times. - - + + NOHRSC (National Operational Hydrologic Remote Sensing Center) snow accumulation observations: ---------- For NOHRSC, the archive interval is 24 hours, i.e. the obs files are bundled into 24-hourly archives. The archives are organized such that - each one contains 4 files, so that the obs availability interval is - + each one contains 4 files, so that the obs availability interval is + obs_avail_intvl_hrs = (24 hrs)/[(1 archive)*(4 files/archive)] = 6 hr/file - + i.e. there is one obs file for each 6-hour interval of the day containing the accumulation over those 6 hours. The 4 obs files within each archive correspond to hours 0, 6, 12, and 18 of the current day. The obs file @@ -201,14 +201,14 @@ def get_obs(config, obtype, yyyymmdd_task): first, second, and third 6-hour chunks of the current day. Thus, to obtain all the 6-hour accumulations for the current day, we must extract from the archive for the current day the obs files for hours 6, 12, and - 18 and from the archive for the next day the obs file for hour 0. This + 18 and from the archive for the next day the obs file for hour 0. This corresponds to an archive hour sequence of [0, 24]. Thus, in the simplest case in which the observation retrieval times include all hours of the current task's day at which obs files are available and none of the obs files for this day already exist on disk, this sequence will be [0, 24]. In other cases, the sequence we loop over will be a subset of [0, 24]. - - + + MRMS (Multi-Radar Multi-Sensor) radar observations: ---------- For MRMS, the archive interval is 24 hours, i.e. the obs files are @@ -219,9 +219,9 @@ def get_obs(config, obtype, yyyymmdd_task): files that are closest to each hour of the day for which obs are needed. This effectively sets the obs availability interval for MRMS to one hour, i.e. - + obs_avail_intvl_hrs = 1 hr/file - + i.e. there is one obs file for each hour of the day containing values at that hour (but only after filtering in time; also see notes for MRMS_OBS_AVAIL_INTVL_HRS in config_defaults.yaml). Thus, to obtain the @@ -231,33 +231,33 @@ def get_obs(config, obtype, yyyymmdd_task): are available and none of the obs files for this day already exist on disk, the sequence of archive hours over which we loop will be just [0]. Note that: - + * For cases in which MRMS data are not needed for all hours of the day, we still need to retrieve and extract from this single daily archive. Thus, the archive hour sequence over which we loop over will always be just [0] for MRMS obs. - + * Because MRMS obs are split into two sets of archives -- one for composite reflectivity (REFC) and another for echo top (RETOP) -- on any given day (and with an archive hour of 0) we actually retrive and extract two different archive files (one per field). - - + + NDAS (NAM Data Assimilation System) conventional observations: ---------- For NDAS, the archive interval is 6 hours, i.e. the obs files are bundled into 6-hourly archives. The archives are organized such that each one contains 7 files (not say 6). The archive associated with - time yyyymmddhh_arcv contains the hourly files at - + time yyyymmddhh_arcv contains the hourly files at + yyyymmddhh_arcv - 6 hours yyyymmddhh_arcv - 5 hours ... yyyymmddhh_arcv - 2 hours yyyymmddhh_arcv - 1 hours yyyymmddhh_arcv - 0 hours - - These are known as the tm06, tm05, ..., tm02, tm01, and tm00 files, + + These are known as the tm06, tm05, ..., tm02, tm01, and tm00 files, respectively. Thus, the tm06 file from the current archive, say the one associated with time yyyymmddhh_arcv, has the same valid time as the tm00 file from the previous archive, i.e. the one associated with @@ -267,10 +267,10 @@ def get_obs(config, obtype, yyyymmdd_task): yyyymmddhh_arcv, we use 6 of the 7 files at tm06, ..., tm01 but not the one at tm00, effectively resulting in 6 files per archive for NDAS obs. The obs availability interval is then - + obs_avail_intvl_hrs = (24 hrs)/[(4 archives)*(6 files/archive)] = 1 hr/file - + i.e. there is one obs file for each hour of the day containing values at that hour. The archive corresponding to hour 0 of the current day contains 6 files valid at hours 18 through 23 of the previous day. The @@ -327,7 +327,7 @@ def get_obs(config, obtype, yyyymmdd_task): # observation files that we need for verification. Each group of fields # is one that is verified together in the workflow. We assume there is # a separate set of obs files for each such field group in the observations, - # and in the code below we loop over these sets of files as necessary. + # and in the code below we loop over these sets of files as necessary. # There are several scenarios to consider: # # * An obs type consists of only one set of files containing only one @@ -336,7 +336,7 @@ def get_obs(config, obtype, yyyymmdd_task): # set of files that contain APCP data, and NOHRSC obs consist of only # one set of files that contain ASNOW data. # - # * An obs type consists of more than one set of files, with each file + # * An obs type consists of more than one set of files, with each file # containing a different field. # This is the case for MRMS obs. These consist of two sets of files. # The first set contains REFC data, and the second contains RETOP data. @@ -344,13 +344,13 @@ def get_obs(config, obtype, yyyymmdd_task): # * An obs type consists of only one set of files, but each file contains # multiple groups of fields needed for verification. # This is the case for NDAS obs. These consist of a single set of files, - # but each file contains both the ADPSFC fields (like 2-m temperature) + # but each file contains both the ADPSFC fields (like 2-m temperature) # and ADPUPA fields (like 500-mb temperature) that are verified separately # in the workflow tasks and thus are considered separate field groups. # # Other obs type and field group scenarios are also possible, but we do # not describe them since they are not applicable to any of the obs types - # considered here. + # considered here. if obtype == 'CCPA': field_groups_in_obs = ['APCP'] elif obtype == 'NOHRSC': @@ -659,7 +659,7 @@ def get_obs(config, obtype, yyyymmdd_task): # There is only one archive per day, and it contains all the raw obs # files needed to generate processed obs files for all hours of the # current day. Thus, we will only ever need this one archive, so there - # is no need to include the archive's hour information (there really + # is no need to include the archive's hour information (there really # isn't any) in the raw subdirectory name. In addition, the archive's # year, month, and day is the same as that of the obs day's, so it is # already included in the name of the raw base directory. Sine this is @@ -785,12 +785,12 @@ def get_obs(config, obtype, yyyymmdd_task): # day (i.e. hour 0 of the next day), it involves using wgrib2 to correct an # error in the metadata of the raw file and writing the corrected data # to a new grib2 file in the processed location. - # + # # NOHRSC: # Generating the processed obs files consists of simply copying or moving # the files from the raw archive directory to the processed directory, # possibly renaming them in the process. - # + # # MRMS: # The MRMS obs are in fact available every few minutes, but the smallest # value we allow the obs availability interval to be set to is 1 hour @@ -800,12 +800,12 @@ def get_obs(config, obtype, yyyymmdd_task): # creating the processed files). In this step, at each obs retrieval time # we first generate an intermediate grib2 file from the set of all raw (and # gzipped) grib2 files for the current day (the latter usually being only a - # few minutes apart) the file that is nearest in time to the obs retrieval + # few minutes apart) the file that is nearest in time to the obs retrieval # time. After selecting this gzipped grib2 file, we unzip it and place it # in a temporary subdirectory under the raw base directory. Only after this # step do we then generate the processed file by moving this intermediate # file to the processed directory, possibly renaming it in the process. - # + # # NDAS: # Generating the processed obs files consists of simply copying or moving # the files from the raw archive directory to the processed directory, @@ -832,7 +832,7 @@ def get_obs(config, obtype, yyyymmdd_task): # For MRMS obs, first select from the set of raw files for the current day # those that are nearest in time to the current hour. Unzip these in a # temporary subdirectory under the raw base directory. - # + # # Note that the script we call to do this (mrms_pull_topofhour.py) assumes # a certain file naming convention. That convention must match the names # of the files that the retrieve_data.py script called above ends up From 1c924a896664e6a815398c11299709bf4bf82465 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 8 Oct 2024 14:13:08 -0600 Subject: [PATCH 142/260] Remove unnecessary bash utility function. --- ush/bash_utils/ceil.sh | 122 --------------------------------------- ush/source_util_funcs.sh | 9 --- 2 files changed, 131 deletions(-) delete mode 100644 ush/bash_utils/ceil.sh diff --git a/ush/bash_utils/ceil.sh b/ush/bash_utils/ceil.sh deleted file mode 100644 index dc8a21c90d..0000000000 --- a/ush/bash_utils/ceil.sh +++ /dev/null @@ -1,122 +0,0 @@ -# -#----------------------------------------------------------------------- -# -# This function returns the ceiling of the quotient of two numbers. The -# ceiling of a number is the number rounded up to the nearest integer. -# -#----------------------------------------------------------------------- -# -function ceil() { -# -#----------------------------------------------------------------------- -# -# Save current shell options (in a global array). Then set new options -# for this script/function. -# -#----------------------------------------------------------------------- -# - { save_shell_opts; . ${USHdir}/preamble.sh; } > /dev/null 2>&1 -# -#----------------------------------------------------------------------- -# -# Get the full path to the file in which this script/function is located -# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in -# which the file is located (scrfunc_dir). -# -#----------------------------------------------------------------------- -# - local scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) - local scrfunc_fn=$( basename "${scrfunc_fp}" ) - local scrfunc_dir=$( dirname "${scrfunc_fp}" ) -# -#----------------------------------------------------------------------- -# -# Get the name of this function. -# -#----------------------------------------------------------------------- -# - local func_name="${FUNCNAME[0]}" -# -#----------------------------------------------------------------------- -# -# Check number of arguments. -# -#----------------------------------------------------------------------- -# - if [ "$#" -ne 2 ]; then - - print_err_msg_exit " -Incorrect number of arguments specified: - - Function name: \"${func_name}\" - Number of arguments specified: $# - -Usage: - - ${func_name} numer denom - -where denom is a nonnegative integer and denom is a positive integer. -" - - fi -# -#----------------------------------------------------------------------- -# -# Make sure arguments are of the right form. -# -#----------------------------------------------------------------------- -# - local numer="$1" - local denom="$2" - - if ! [[ "${numer}" =~ ^[0-9]+$ ]]; then - print_err_msg_exit " -The first argument to the \"${func_name}\" function (numer) must be a nonnegative -integer but isn't: - numer = ${numer} -" - fi - - if [[ "${denom}" -eq 0 ]]; then - print_err_msg_exit " -The second argument to the \"${func_name}\" function (denom) cannot be zero: - denom = ${denom} -" - fi - - if ! [[ "${denom}" =~ ^[0-9]+$ ]]; then - print_err_msg_exit " -The second argument to the \"${func_name}\" function (denom) must be a positive -integer but isn't: - denom = ${denom} -" - fi -# -#----------------------------------------------------------------------- -# -# Let ceil(a,b) denote the ceiling of the quotient of a and b. It can be -# shown that for two positive integers a and b, we have: -# -# ceil(a,b) = floor((a+b-1)/b) -# -# where floor(a,b) is the integer obtained by rounding the quotient of -# a and b (i.e. a/b) down to the nearest integer. Since in bash a -# division returns only the integer part of the result, it is effectively -# the floor function. Thus the following. -# -#----------------------------------------------------------------------- -# - result=$(( (numer+denom-1)/denom )) - print_info_msg "${result}" -# -#----------------------------------------------------------------------- -# -# Restore the shell options saved at the beginning of this script/func- -# tion. -# -#----------------------------------------------------------------------- -# - { restore_shell_opts; } > /dev/null 2>&1 - -} - diff --git a/ush/source_util_funcs.sh b/ush/source_util_funcs.sh index ef7c669910..9feceaf68e 100644 --- a/ush/source_util_funcs.sh +++ b/ush/source_util_funcs.sh @@ -96,15 +96,6 @@ function source_util_funcs() { # #----------------------------------------------------------------------- # -# Source the file containing the function that returns the ceiling of -# the quotient of two positive integers. -# -#----------------------------------------------------------------------- -# - . ${bashutils_dir}/ceil.sh -# -#----------------------------------------------------------------------- -# # Source the file containing the functions that will echo given strings # as uppercase or lowercase # From 9435f7f29accbee047ac5a8c0c679996e03ac9d7 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 8 Oct 2024 14:14:10 -0600 Subject: [PATCH 143/260] Clean up comments, remove commented-out code. --- ush/run_eval_METplus_timestr_tmpl.sh | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/ush/run_eval_METplus_timestr_tmpl.sh b/ush/run_eval_METplus_timestr_tmpl.sh index b2df03c56c..f5438be2f4 100755 --- a/ush/run_eval_METplus_timestr_tmpl.sh +++ b/ush/run_eval_METplus_timestr_tmpl.sh @@ -1,21 +1,13 @@ #!/usr/bin/env bash - # #----------------------------------------------------------------------- # -# Source the variable definitions file and the bash utility functions. +# This script is simply a wrapper to the eval_METplus_timestr_tmpl bash +# function. It is needed in order to enable the function to be called +# from a python script. # #----------------------------------------------------------------------- # -#OBS_DIR="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/expt_dirs/get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW/obs_data/ccpa" -#OBS_CCPA_APCP_FN_TEMPLATE="{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2" - -#USHdir="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/ufs-srweather-app/ush" -#yyyymmdd_task="20230217" -#lhr="22" -#METplus_timestr_tmpl="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/expt_dirs/get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW/obs_data/ccpa/{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2" - -#USHdir="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/ufs-srweather-app/ush"; yyyymmdd_task="20230217"; lhr="22"; METplus_timestr_tmpl="/scratch2/BMC/fv3lam/Gerard.Ketefian/AGILE/expt_dirs/get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW/obs_data/ccpa/{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2" set -u . $USHdir/source_util_funcs.sh eval_METplus_timestr_tmpl \ @@ -24,5 +16,3 @@ eval_METplus_timestr_tmpl \ METplus_timestr_tmpl="${METplus_timestr_tmpl}" \ outvarname_evaluated_timestr="fp_proc" echo "${fp_proc}" - -# METplus_timestr_tmpl="${OBS_DIR}/${OBS_CCPA_APCP_FN_TEMPLATE}" \ From 2218ca4e1ceabc9949fe8c1901066f8dcb1b0899 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 8 Oct 2024 14:44:40 -0600 Subject: [PATCH 144/260] Remove unneeded variable from task. --- parm/wflow/verify_pre.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 567f045188..c239eae8d3 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -52,7 +52,6 @@ task_get_obs_mrms: envars: <<: *default_vars OBTYPE: 'MRMS' - MRMS_FIELDS: 'REFC RETOP' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" From ee5566b1aac7ee9ca27a2f98d50a3159c58031bd Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 9 Oct 2024 10:22:19 -0600 Subject: [PATCH 145/260] Fix typo. --- ush/set_cycle_and_obs_timeinfo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index 634e646745..cae3bc37ee 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -72,7 +72,7 @@ def check_temporal_consistency_cumul_fields( This function reads in a subset of the parameters in the verification configuration dictionary and ensures that certain temporal constraints on these parameters are satisfied. It then returns an updated version of - the verification configuration dictionary that satisfies these constranints. + the verification configuration dictionary that satisfies these constraints. The constraints are on the accumulation intervals associated with the cumulative forecast fields and corresponding observation type pairs that From befe769c7a8fe8c01b3119f18ee17744713fc7e3 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 9 Oct 2024 16:43:31 -0600 Subject: [PATCH 146/260] Fix typo. --- ...ulticyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml | 2 +- ...ulticyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 2 +- ...multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml | 2 +- ...multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml | 2 +- ...do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml index 3286066021..ced46215d0 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml @@ -37,7 +37,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. + # do not contain the required obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to # get obs files from HPSS but to use staged obs files. This is done by # setting these variables to the (platform-specific) locations of these diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml index 3963a616b4..97e1393864 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml @@ -39,7 +39,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. + # do not contain the required obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to # get obs files from HPSS but to use staged obs files. This is done by # setting these variables to the (platform-specific) locations of these diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml index 23035f3a92..3ce4ff5f08 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml @@ -37,7 +37,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. + # do not contain the required obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to # get obs files from HPSS but to use staged obs files. This is done by # setting these variables to the (platform-specific) locations of these diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml index 10ceddd9a8..3264c93eca 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml @@ -39,7 +39,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. + # do not contain the required obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to # get obs files from HPSS but to use staged obs files. This is done by # setting these variables to the (platform-specific) locations of these diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml index c4f62a679d..a7af3f27c9 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -38,7 +38,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. + # do not contain the required obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to # get obs files from HPSS but to use staged obs files. This is done by # setting these variables to the (platform-specific) locations of these diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml index 55cbf5b13f..a0f10d8b05 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml @@ -37,7 +37,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. + # do not contain the required obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to # get obs files from HPSS but to use staged obs files. This is done by # setting these variables to the (platform-specific) locations of these diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml index 20cab966ef..429e8e0086 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml @@ -39,7 +39,7 @@ task_run_post: verification: METPLUS_VERBOSITY_LEVEL: 5 # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. + # do not contain the required obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to # get obs files from HPSS but to use staged obs files. This is done by # setting these variables to the (platform-specific) locations of these diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml index 10ff318dd9..aa4b731e3a 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml @@ -39,7 +39,7 @@ task_run_post: verification: # Base directories in which to look for obs. If these do not exist and/or - # do not contain the requied obs, create and populate them. + # do not contain the required obs, create and populate them. # Note that when performing WE2E tests, the default behavior is not to # get obs files from HPSS but to use staged obs files. This is done by # setting these variables to the (platform-specific) locations of these From 6dd8e20723f39d5e2cf628d18729614bea824b99 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 10 Oct 2024 09:01:14 -0600 Subject: [PATCH 147/260] Modify old test for set_cycle_dates to fit new version of this function. This includes adding a new test for the case in which the output should be a list of datetime objects (the default is for the output to be a list of strings). --- tests/test_python/test_set_cycle_dates.py | 39 +++++++++++++++++++---- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/tests/test_python/test_set_cycle_dates.py b/tests/test_python/test_set_cycle_dates.py index eb76f579c6..8baae643ac 100644 --- a/tests/test_python/test_set_cycle_dates.py +++ b/tests/test_python/test_set_cycle_dates.py @@ -1,20 +1,22 @@ """ Test set_cycle_dates.py """ -from datetime import datetime +from datetime import datetime, timedelta import unittest -from set_cycle_dates import set_cycle_dates +from set_cycle_and_obs_timeinfo import set_cycle_dates class Testing(unittest.TestCase): """ Define the tests""" - def test_set_cycle_dates(self): + + def test_set_cycle_dates_string(self): """ Test that the proper list of dates are produced given the - intput data""" + input data and return_type left to its default value (so the + output should be a list of strings)""" cdates = set_cycle_dates( - date_start=datetime(2022, 1, 1, 6), - date_end=datetime(2022, 1, 2, 12), - incr_cycl_freq=6, + start_time_first_cycl=datetime(2022, 1, 1, 6), + start_time_last_cycl=datetime(2022, 1, 2, 12), + cycl_intvl=timedelta(hours=6), ) self.assertEqual( cdates, @@ -27,3 +29,26 @@ def test_set_cycle_dates(self): "2022010212", ], ) + + def test_set_cycle_dates_datetime(self): + + """ Test that the proper list of dates are produced given the + input data and return_type left set to "datetime" (so the output + should be a list of datetime objects)""" + cdates = set_cycle_dates( + start_time_first_cycl=datetime(2022, 1, 1, 6), + start_time_last_cycl=datetime(2022, 1, 2, 12), + cycl_intvl=timedelta(hours=6), + return_type="datetime", + ) + self.assertEqual( + cdates, + [ + datetime(2022, 1, 1, 6), + datetime(2022, 1, 1, 12), + datetime(2022, 1, 1, 18), + datetime(2022, 1, 2, 0), + datetime(2022, 1, 2, 6), + datetime(2022, 1, 2, 12), + ], + ) From 03d2ab6f4b7ae2d9de74fe355019d9cf8611f6d4 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 10 Oct 2024 12:04:35 -0600 Subject: [PATCH 148/260] First attempt at modifying documentation to see if I can view it in the PR page. --- .../CustomizingTheWorkflow/ConfigWorkflow.rst | 85 ++++++++++--------- 1 file changed, 43 insertions(+), 42 deletions(-) diff --git a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst index 4d88173028..14fccdd5e5 100644 --- a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst +++ b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst @@ -168,48 +168,6 @@ These settings define platform-specific run commands. Users should set run comma ``PRE_TASK_CMDS``: (Default: "") Pre-task commands such as ``ulimit`` needed by tasks. For example: ``'{ ulimit -s unlimited; ulimit -a; }'`` -METplus Parameters ----------------------- - -:ref:`METplus ` is a scientific verification framework that spans a wide range of temporal and spatial scales. Many of the METplus parameters are described below, but additional documentation for the METplus components is available on the `METplus website `__. - -.. _METParamNote: - -.. note:: - Where a date field is required: - * ``YYYY`` refers to the 4-digit valid year - * ``MM`` refers to the 2-digit valid month - * ``DD`` refers to the 2-digit valid day of the month - * ``HH`` refers to the 2-digit valid hour of the day - * ``mm`` refers to the 2-digit valid minutes of the hour - * ``SS`` refers to the two-digit valid seconds of the hour - -``CCPA_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/ccpa/proc"``) - User-specified location of the directory where :term:`CCPA` hourly precipitation files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in file ``scripts/exregional_get_verif_obs.sh`` for more details about files and directory structure, as well as important caveats about errors in the metadata and file names. - - .. attention:: - Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. - -``NOHRSC_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/nohrsc/proc"``) - User-specified location of top-level directory where NOHRSC 6- and 24-hour snowfall accumulation files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in file scripts/exregional_get_verif_obs.sh for more details about files and directory structure - - .. attention:: - Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. - - .. note:: - Due to limited availability of NOHRSC observation data on NOAA :term:`HPSS` and the likelihood that snowfall accumulation verification will not be desired outside of winter cases, this verification option is currently not present in the workflow by default. In order to use it, the verification environment variable ``VX_FIELDS`` should be updated to include ``ASNOW``. This will allow the related workflow tasks to be run. - -``MRMS_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/mrms/proc"``) - User-specified location of the directory where :term:`MRMS` composite reflectivity and echo top files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in the ``scripts/exregional_get_verif_obs.sh`` for more details about files and directory structure. - - .. attention:: - Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. - -``NDAS_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/ndas/proc"``) - User-specified location of top-level directory where :term:`NDAS` prepbufr files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in file ``scripts/exregional_get_verif_obs.sh`` for more details about files and directory structure. - - .. attention:: - Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. Other Platform-Specific Directories -------------------------------------- @@ -1635,6 +1593,49 @@ General Verification Parameters ``METPLUS_VERBOSITY_LEVEL``: (Default: ``2``) Logging verbosity level used by METplus verification tools. Valid values: 0 to 5, with 0 quiet and 5 loud. +METplus Parameters +---------------------- + +:ref:`METplus ` is a scientific verification framework that spans a wide range of temporal and spatial scales. Many of the METplus parameters are described below, but additional documentation for the METplus components is available on the `METplus website `__. + +.. _METParamNote: + +.. note:: + Where a date field is required: + * ``YYYY`` refers to the 4-digit valid year + * ``MM`` refers to the 2-digit valid month + * ``DD`` refers to the 2-digit valid day of the month + * ``HH`` refers to the 2-digit valid hour of the day + * ``mm`` refers to the 2-digit valid minutes of the hour + * ``SS`` refers to the two-digit valid seconds of the hour + +``CCPA_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/ccpa/proc"``) + User-specified location of the directory where :term:`CCPA` hourly precipitation files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in file ``scripts/exregional_get_verif_obs.sh`` for more details about files and directory structure, as well as important caveats about errors in the metadata and file names. + + .. attention:: + Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. + +``NOHRSC_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/nohrsc/proc"``) + User-specified location of top-level directory where NOHRSC 6- and 24-hour snowfall accumulation files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in file scripts/exregional_get_verif_obs.sh for more details about files and directory structure + + .. attention:: + Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. + + .. note:: + Due to limited availability of NOHRSC observation data on NOAA :term:`HPSS` and the likelihood that snowfall accumulation verification will not be desired outside of winter cases, this verification option is currently not present in the workflow by default. In order to use it, the verification environment variable ``VX_FIELDS`` should be updated to include ``ASNOW``. This will allow the related workflow tasks to be run. + +``MRMS_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/mrms/proc"``) + User-specified location of the directory where :term:`MRMS` composite reflectivity and echo top files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in the ``scripts/exregional_get_verif_obs.sh`` for more details about files and directory structure. + + .. attention:: + Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. + +``NDAS_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/ndas/proc"``) + User-specified location of top-level directory where :term:`NDAS` prepbufr files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in file ``scripts/exregional_get_verif_obs.sh`` for more details about files and directory structure. + + .. attention:: + Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. + Templates for Observation Files --------------------------------- From c0a841e712a3555a93677cc1e6ad982efc9f5303 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 11 Oct 2024 06:42:51 -0600 Subject: [PATCH 149/260] Bug fix. --- ush/set_cycle_and_obs_timeinfo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index cae3bc37ee..9a7644ed29 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -261,7 +261,7 @@ def check_temporal_consistency_cumul_fields( field_fcst = {field_fcst} obtype = {obtype} accum_hrs = {accum_hrs} hr - fcst_output_intvl_hrs = {forecast_output_intvl} hr + fcst_output_intvl_hrs = {fcst_output_intvl} hr accum_hrs % fcst_output_intvl_hrs = {rem_fcst} Thus, this forecast field cannot be accumulated over this interval. Will remove this accumulation interval from the list of accumulation From d3485729fec1b4699ecb6f5f4f045c34a67fdfd7 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 11 Oct 2024 09:23:43 -0600 Subject: [PATCH 150/260] Fix up comments. --- ush/set_cycle_and_obs_timeinfo.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index 9a7644ed29..52271d2362 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -75,7 +75,7 @@ def check_temporal_consistency_cumul_fields( the verification configuration dictionary that satisfies these constraints. The constraints are on the accumulation intervals associated with the - cumulative forecast fields and corresponding observation type pairs that + cumulative forecast fields (and corresponding observation type pairs) that are to be verified. The constraints on each such accumulation interval are as follows: @@ -85,14 +85,18 @@ def check_temporal_consistency_cumul_fields( 2) The obs availability interval evenly divides the accumulation interval. This ensures that the obs can be added together to obtain accumulated - values of the obs field, e.g. the 6-hourly NOHRSC obs can be added - to obtain 24-hour observed snowfall accumulations. + values of the obs field, e.g. the 6-hourly NOHRSC obs can be added to + obtain 24-hour observed snowfall accumulations. Note that this also + ensures that the accumulation interval is greater than or equal to the + obs availability interval. 3) The forecast output interval evenly divides the accumulation interval. This ensures that the forecast output can be added together to obtain accumulated values of the forecast field, e.g. if the forecast output - interval is 3 hours, the resulting 3-hourly APCP outputs from the - forecast can be added to obtain 6-hourly forecast APCP. + interval is 3 hours, the resulting 3-hourly APCP outputs from the forecast + can be added to obtain 6-hourly forecast APCP. Note that this also ensures + that the accumulation interval is greater than or equal to the forecast + output interval. 4) The hour-of-day at which the accumulated forecast values will be available are a subset of the ones at which the accumulated obs @@ -207,7 +211,8 @@ def check_temporal_consistency_cumul_fields( # Initialize a sub-sub-dictionary in one of the dictionaries to be returned. fcst_obs_matched_times_all_cycles_cumul[field_fcst][accum_hh] = [] # - # Check that accumulation inervals are shorter than the forecast length. + # Make sure that the accumulation interval is less than or equal to the + # forecast length. # if accum_hrs > fcst_len_hrs: msg = dedent(f""" @@ -225,7 +230,7 @@ def check_temporal_consistency_cumul_fields( logging.info(msg) accum_intvls_hrs.remove(accum_hrs) # - # Check that accumulation inervals are evenly divisible by the observation + # Make sure that accumulation interval is evenly divisible by the observation # availability interval. # if accum_hrs in accum_intvls_hrs: @@ -248,7 +253,7 @@ def check_temporal_consistency_cumul_fields( logging.info(msg) accum_intvls_hrs.remove(accum_hrs) # - # Check that accumulation inervals are evenly divisible by the forecast + # Make sure that accumulation interval is evenly divisible by the forecast # output interval. # if accum_hrs in accum_intvls_hrs: @@ -270,9 +275,9 @@ def check_temporal_consistency_cumul_fields( logging.info(msg) accum_intvls_hrs.remove(accum_hrs) # - # Check that the hours-of-day at which the current cumulative field will - # be output are a subset of the hours-of-day at which the corresponding - # obs type is output. + # Make sure that the hours-of-day at which the current cumulative field + # will be output are a subset of the hours-of-day at which the corresponding + # obs type is available. # if accum_hrs in accum_intvls_hrs: From 28140699caa8045b246998993d8183f11ffc6c9b Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 11 Oct 2024 16:24:33 -0600 Subject: [PATCH 151/260] In config.community.yaml, move [CCPA|MRMS|NDAS]_OBS_DIR variables from the "platform" to the "verification" section to be consistent with the changes in config_defaults.yaml. --- ush/config.community.yaml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ush/config.community.yaml b/ush/config.community.yaml index 417b9edb91..f380bd28cc 100644 --- a/ush/config.community.yaml +++ b/ush/config.community.yaml @@ -5,10 +5,6 @@ user: RUN_ENVIR: community MACHINE: hera ACCOUNT: an_account -platform: - CCPA_OBS_DIR: "" - MRMS_OBS_DIR: "" - NDAS_OBS_DIR: "" workflow: USE_CRON_TO_RELAUNCH: false EXPT_SUBDIR: test_community @@ -35,6 +31,9 @@ global: DO_ENSEMBLE: false NUM_ENS_MEMBERS: 2 verification: + CCPA_OBS_DIR: "" + MRMS_OBS_DIR: "" + NDAS_OBS_DIR: "" VX_FCST_MODEL_NAME: FV3_GFS_v16_CONUS_25km rocoto: tasks: From 52ebd99c75daf96097df4579c92adf49d0a0adce Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 15 Oct 2024 14:00:19 -0600 Subject: [PATCH 152/260] Bug fix: the get_obs_nohrsc tasks need to be based on obs days for cumulative fields, not obs days for instantaneous fields (which is the default cycledef in verify_pre.yaml). --- parm/wflow/verify_pre.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index c239eae8d3..a3b49cc169 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -37,6 +37,9 @@ task_get_obs_ccpa: task_get_obs_nohrsc: <<: *default_task_verify_pre + attrs: + cycledefs: cycledefs_obs_days_cumul + maxtries: '1' command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars From 42c3d6c06f0ebdc4c6b6b4111d5e410b40ded419 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 16 Oct 2024 13:13:28 -0600 Subject: [PATCH 153/260] Add logging statements when exceptions occur; fix comments and code indentation. --- ush/set_cycle_and_obs_timeinfo.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index 52271d2362..ded2f92fe2 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -49,6 +49,7 @@ def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, Valid values are: valid_values = {valid_values} """) + logging.error(msg) raise Exception(msg) # iterate over cycles @@ -184,6 +185,7 @@ def check_temporal_consistency_cumul_fields( obs_avail_intvl_hrs = {obs_avail_intvl_hrs} 24 % obs_avail_intvl_hrs = {remainder}" """) + logging.error(msg) raise Exception(msg) # Assume that the obs are available at hour 0 of the day regardless # of obs type. @@ -630,12 +632,12 @@ def get_obs_retrieve_times_by_day( """ # Convert string contents of input dictionaries to datetime objects. for time_type in ['cumul', 'inst']: - fcst_output_times_all_cycles[time_type] \ - = [datetime.strptime(fcst_output_times_all_cycles[time_type][i], "%Y%m%d%H") - for i in range(len(fcst_output_times_all_cycles[time_type]))] - obs_days_all_cycles[time_type] \ - = [datetime.strptime(obs_days_all_cycles[time_type][i], "%Y%m%d") - for i in range(len(obs_days_all_cycles[time_type]))] + fcst_output_times_all_cycles[time_type] \ + = [datetime.strptime(fcst_output_times_all_cycles[time_type][i], "%Y%m%d%H") + for i in range(len(fcst_output_times_all_cycles[time_type]))] + obs_days_all_cycles[time_type] \ + = [datetime.strptime(obs_days_all_cycles[time_type][i], "%Y%m%d") + for i in range(len(obs_days_all_cycles[time_type]))] # Get list of forecast fields to be verified. vx_fields = vx_config['VX_FIELDS'] @@ -650,8 +652,9 @@ def get_obs_retrieve_times_by_day( {'obtype': 'NDAS', 'fcst_fields': ['ADPSFC', 'ADPUPA']}] } - # Keep only those items in the dictionary above that have forecast fields - # that appear in the list of forecast fields to be verified. + # Keep only those items in the dictionary vx_field_info defined above + # that have forecast fields that appear in the list of forecast fields to + # be verified. for obs_time_type, obtypes_to_fcst_fields_dict_list in vx_field_info.copy().items(): for obtypes_to_fcst_fields_dict in obtypes_to_fcst_fields_dict_list.copy(): obtype = obtypes_to_fcst_fields_dict['obtype'] From 5a6da53c82088208589f8361eee302708e384257 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 18 Oct 2024 10:03:31 -0600 Subject: [PATCH 154/260] Minor moving of config variable. --- ush/config_defaults.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 1e967ef9e4..b0a6438111 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2428,7 +2428,7 @@ verification: MRMS_OBS_AVAIL_INTVL_HRS: 1 NDAS_OBS_AVAIL_INTVL_HRS: 1 # - # REMOVE_RAW_OBS_DIRS_[CCPA|MRMS|NDAS|NOHRSC]: + # REMOVE_RAW_OBS_DIRS_[CCPA|NOHRSC|MRMS|NDAS]: # Boolean flag specifying whether to remove the "raw" observation # directories after pulling the specified type of obs (CCPA, NOHRSC, # MRMS, or NOHRSC). The raw directories are the ones in which the @@ -2438,9 +2438,9 @@ verification: # structure). # REMOVE_RAW_OBS_CCPA: true + REMOVE_RAW_OBS_NOHRSC: true REMOVE_RAW_OBS_MRMS: true REMOVE_RAW_OBS_NDAS: true - REMOVE_RAW_OBS_NOHRSC: true # # OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: # Template used to specify the names of the output NetCDF observation From 7dc7db309eee5de53087e74d273647b182a8701d Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 9 Oct 2024 15:59:07 -0600 Subject: [PATCH 155/260] Add new parameter VX_FCST_OUTPUT_INTVL_HRS into config_defaults.yaml and use it as the forecast output interval when performing vx. --- scripts/exregional_check_post_output.sh | 2 +- ...egional_run_met_genensprod_or_ensemblestat.sh | 2 +- ...xregional_run_met_gridstat_or_pointstat_vx.sh | 2 +- ...l_run_met_gridstat_or_pointstat_vx_ensmean.sh | 2 +- ...l_run_met_gridstat_or_pointstat_vx_ensprob.sh | 2 +- scripts/exregional_run_met_pcpcombine.sh | 2 +- ush/config_defaults.yaml | 9 +++++++++ ush/setup.py | 16 ++++++---------- 8 files changed, 21 insertions(+), 16 deletions(-) diff --git a/scripts/exregional_check_post_output.sh b/scripts/exregional_check_post_output.sh index 433aba1e4e..f176c9a12e 100755 --- a/scripts/exregional_check_post_output.sh +++ b/scripts/exregional_check_post_output.sh @@ -126,7 +126,7 @@ set_leadhrs \ yyyymmddhh_init="${CDATE}" \ lhr_min="0" \ lhr_max="${FCST_LEN_HRS}" \ - lhr_intvl="${FCST_OUTPUT_INTVL_HRS}" \ + lhr_intvl="${VX_FCST_OUTPUT_INTVL_HRS}" \ base_dir="${VX_FCST_INPUT_BASEDIR}" \ fn_template="${FCST_INPUT_FN_TEMPLATE}" \ num_missing_files_max="${NUM_MISSING_FCST_FILES_MAX}" \ diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 67ae70c8b9..475417ee53 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -226,7 +226,7 @@ case "$OBTYPE" in vx_hr_start="${vx_intvl}" ;; *) - vx_intvl="$((${FCST_OUTPUT_INTVL_HRS}))" + vx_intvl="$((${VX_FCST_OUTPUT_INTVL_HRS}))" vx_hr_start="0" ;; esac diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index e16b06cb46..a6130ba50d 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -227,7 +227,7 @@ case "$OBTYPE" in vx_hr_start="${vx_intvl}" ;; *) - vx_intvl="$((${FCST_OUTPUT_INTVL_HRS}))" + vx_intvl="$((${VX_FCST_OUTPUT_INTVL_HRS}))" vx_hr_start="0" ;; esac diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index adecb68bcd..75332e4929 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -170,7 +170,7 @@ case "$OBTYPE" in vx_hr_start="${vx_intvl}" ;; *) - vx_intvl="$((${FCST_OUTPUT_INTVL_HRS}))" + vx_intvl="$((${VX_FCST_OUTPUT_INTVL_HRS}))" vx_hr_start="0" ;; esac diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 2c27a9a597..382bd71ac8 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -169,7 +169,7 @@ case "$OBTYPE" in vx_hr_start="${vx_intvl}" ;; *) - vx_intvl="$((${FCST_OUTPUT_INTVL_HRS}))" + vx_intvl="$((${VX_FCST_OUTPUT_INTVL_HRS}))" vx_hr_start="0" ;; esac diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 43da23ca2e..590ceb43ef 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -229,7 +229,7 @@ set_leadhrs_no_missing \ if [ "${FCST_OR_OBS}" = "FCST" ]; then base_dir="${FCST_INPUT_DIR}" fn_template="${FCST_INPUT_FN_TEMPLATE}" - subintvl="${FCST_OUTPUT_INTVL_HRS}" + subintvl="${VX_FCST_OUTPUT_INTVL_HRS}" elif [ "${FCST_OR_OBS}" = "OBS" ]; then base_dir="${OBS_INPUT_DIR}" fn_template="${OBS_INPUT_FN_TEMPLATE}" diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index b0a6438111..b216ccdd72 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2498,6 +2498,15 @@ verification: VX_APCP_ACCUMS_HRS: [ 1, 3, 6, 24 ] VX_ASNOW_ACCUMS_HRS: [ 6, 24 ] # + # Set the forecast output interval to use for verification purposes. + # If the forecasts to be verified are being run in the SRW (i.e. they + # are not staged from another forecast model), then this should be set + # set to the SRW's forecast output interval, but such a variable is + # currently not available in this configuration file. Instead, for + # now we set it to a default value of 1 hour. + # + VX_FCST_OUTPUT_INTVL_HRS: 1 + # # VX_FCST_INPUT_BASEDIR: # Template for top-level directory containing forecast (but not obs) # files that will be used as input into METplus for verification. diff --git a/ush/setup.py b/ush/setup.py index 3a034f7476..dfc59ffaba 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -578,22 +578,19 @@ def _remove_tag(tasks, tag): # # ----------------------------------------------------------------------- # + vx_config = expt_config["verification"] + date_first_cycl = workflow_config.get("DATE_FIRST_CYCL") date_last_cycl = workflow_config.get("DATE_LAST_CYCL") incr_cycl_freq = int(workflow_config.get("INCR_CYCL_FREQ")) fcst_len_hrs = workflow_config.get("FCST_LEN_HRS") - - # Set the forecast output interval. Ideally, this should be obtained - # from the SRW App's configuration file, but such a variable doesn't - # yet exist in that file. - fcst_output_intvl_hrs = 1 - workflow_config['FCST_OUTPUT_INTVL_HRS'] = fcst_output_intvl_hrs + vx_fcst_output_intvl_hrs = vx_config.get("VX_FCST_OUTPUT_INTVL_HRS") # To enable arithmetic with dates and times, convert various time # intervals from integer to datetime.timedelta objects. cycl_intvl_dt = datetime.timedelta(hours=incr_cycl_freq) fcst_len_dt = datetime.timedelta(hours=fcst_len_hrs) - fcst_output_intvl_dt = datetime.timedelta(hours=fcst_output_intvl_hrs) + vx_fcst_output_intvl_dt = datetime.timedelta(hours=vx_fcst_output_intvl_hrs) # # ----------------------------------------------------------------------- # @@ -605,12 +602,11 @@ def _remove_tag(tasks, tag): # # ----------------------------------------------------------------------- # - vx_config = expt_config["verification"] vx_config, fcst_obs_matched_times_all_cycles_cumul \ = check_temporal_consistency_cumul_fields( vx_config, date_first_cycl, date_last_cycl, cycl_intvl_dt, - fcst_len_dt, fcst_output_intvl_dt) + fcst_len_dt, vx_fcst_output_intvl_dt) expt_config["verification"] = vx_config # # ----------------------------------------------------------------------- @@ -628,7 +624,7 @@ def _remove_tag(tasks, tag): fcst_output_times_all_cycles, obs_days_all_cycles, \ = set_fcst_output_times_and_obs_days_all_cycles( date_first_cycl, date_last_cycl, cycl_intvl_dt, - fcst_len_dt, fcst_output_intvl_dt) + fcst_len_dt, vx_fcst_output_intvl_dt) workflow_config['OBS_DAYS_ALL_CYCLES_INST'] = obs_days_all_cycles['inst'] workflow_config['OBS_DAYS_ALL_CYCLES_CUMUL'] = obs_days_all_cycles['cumul'] From 57fcbc6e04ce57569fb10b00ff66861611c7279b Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 16 Oct 2024 13:29:26 -0600 Subject: [PATCH 156/260] Change arguments so the cycle start times don't need to be called multiple times by different functions. --- ush/set_cycle_and_obs_timeinfo.py | 12 +++--------- ush/setup.py | 18 +++++++++++++++--- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index ded2f92fe2..f345008f04 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -361,8 +361,7 @@ def check_temporal_consistency_cumul_fields( def set_fcst_output_times_and_obs_days_all_cycles( - start_time_first_cycl, start_time_last_cycl, cycl_intvl, - fcst_len, fcst_output_intvl): + cycle_start_times, fcst_len, fcst_output_intvl): """ This function returns forecast output times and observation days (i.e. days on which obs are needed because there is forecast output on those @@ -401,12 +400,6 @@ def set_fcst_output_times_and_obs_days_all_cycles( Each element of these lists is a string of the form 'YYYYMMDD'. """ - # Get the list containing the starting times of the cycles. Each element - # of the list will be a datetime object. - cycle_start_times \ - = set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, - return_type='datetime') - # Get the number of forecast output times per cycle/forecast. num_fcst_output_times_per_cycle = int(fcst_len/fcst_output_intvl + 1) @@ -603,7 +596,8 @@ def set_rocoto_cycledefs_for_obs_days(obs_days_all_cycles): def get_obs_retrieve_times_by_day( - vx_config, fcst_output_times_all_cycles, obs_days_all_cycles): + vx_config, cycle_start_times, fcst_len, + fcst_output_times_all_cycles, obs_days_all_cycles): """ This function generates dictionary of dictionaries that, for each combination of obs type needed and each obs day, contains a string list diff --git a/ush/setup.py b/ush/setup.py index dfc59ffaba..ce5dad28f3 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -611,6 +611,18 @@ def _remove_tag(tasks, tag): # # ----------------------------------------------------------------------- # + # Generate a list containing the starting times of the cycles. This will + # be needed in checking that the hours-of-day of the forecast output match + # those of the observations. + # + # ----------------------------------------------------------------------- + # + cycle_start_times \ + = set_cycle_dates(date_first_cycl, date_last_cycl, cycl_intvl_dt, + return_type='datetime') + # + # ----------------------------------------------------------------------- + # # Generate a list of forecast output times and a list of obs days (i.e. # days on which observations are needed to perform verification because # there is forecast output on those days) over all cycles, both for @@ -623,8 +635,7 @@ def _remove_tag(tasks, tag): # fcst_output_times_all_cycles, obs_days_all_cycles, \ = set_fcst_output_times_and_obs_days_all_cycles( - date_first_cycl, date_last_cycl, cycl_intvl_dt, - fcst_len_dt, vx_fcst_output_intvl_dt) + cycle_start_times, fcst_len_dt, vx_fcst_output_intvl_dt) workflow_config['OBS_DAYS_ALL_CYCLES_INST'] = obs_days_all_cycles['inst'] workflow_config['OBS_DAYS_ALL_CYCLES_CUMUL'] = obs_days_all_cycles['cumul'] @@ -658,7 +669,8 @@ def _remove_tag(tasks, tag): vx_config = expt_config["verification"] obs_retrieve_times_by_day \ = get_obs_retrieve_times_by_day( - vx_config, fcst_output_times_all_cycles, obs_days_all_cycles) + vx_config, cycle_start_times, fcst_len_dt, + fcst_output_times_all_cycles, obs_days_all_cycles) for obtype, obs_days_dict in obs_retrieve_times_by_day.items(): for obs_day, obs_retrieve_times in obs_days_dict.items(): From a3a7996844acb26f440a4da5278ecc828d983f01 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 16 Oct 2024 14:02:49 -0600 Subject: [PATCH 157/260] Further changes to avoid calling the function that calculates the cycle start times multiple times. --- ush/set_cycle_and_obs_timeinfo.py | 40 +++++++++++-------------------- ush/setup.py | 28 ++++++++++------------ 2 files changed, 27 insertions(+), 41 deletions(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index f345008f04..a354139352 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -66,9 +66,7 @@ def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, def check_temporal_consistency_cumul_fields( - vx_config, - start_time_first_cycl, start_time_last_cycl, cycl_intvl, - fcst_len, fcst_output_intvl): + vx_config, cycle_start_times, fcst_len, fcst_output_intvl): """ This function reads in a subset of the parameters in the verification configuration dictionary and ensures that certain temporal constraints on @@ -113,14 +111,9 @@ def check_temporal_consistency_cumul_fields( vx_config: The verification configuration dictionary. - start_time_first_cycl: - Starting time of first cycle; a datetime object. - - start_time_last_cycl: - Starting time of last cycle; a datetime object. - - cycl_intvl: - Time interval between cycle starting times; a timedelta object. + cycle_start_times: + List containing the starting times of the cycles in the experiment; + each list element is a datetime object. fcst_len: The length of each forecast; a timedelta object. @@ -147,13 +140,6 @@ def check_temporal_consistency_cumul_fields( fcst_len_hrs = int(fcst_len/one_hour) fcst_output_intvl_hrs = int(fcst_output_intvl/one_hour) - # Generate a list containing the starting times of the cycles. This will - # be needed in checking that the hours-of-day of the forecast output match - # those of the observations. - cycle_start_times \ - = set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, - return_type='datetime') - # Initialize one of the variables that will be returned to an empty # dictionary. fcst_obs_matched_times_all_cycles_cumul = dict() @@ -372,14 +358,9 @@ def set_fcst_output_times_and_obs_days_all_cycles( accumulation interval smaller than this are obviously not allowed). Args: - start_time_first_cycl: - Starting time of first cycle; a datetime object. - - start_time_last_cycl: - Starting time of last cycle; a datetime object. - - cycl_intvl: - Time interval between cycle starting times; a timedelta object. + cycle_start_times: + List containing the starting times of the cycles in the experiment; + each list element is a datetime object. fcst_len: The length of each forecast; a timedelta object. @@ -608,6 +589,13 @@ def get_obs_retrieve_times_by_day( vx_config: The verification configuration dictionary. + cycle_start_times: + List containing the starting times of the cycles in the experiment; + each list element is a datetime object. + + fcst_len: + The length of each forecast; a timedelta object. + fcst_output_times_all_cycles: Dictionary containing a list of forecast output times over all cycles for instantaneous fields and a second analogous list for cumulative fields. diff --git a/ush/setup.py b/ush/setup.py index ce5dad28f3..a4ba2f0001 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -594,6 +594,18 @@ def _remove_tag(tasks, tag): # # ----------------------------------------------------------------------- # + # Generate a list containing the starting times of the cycles. This will + # be needed in checking that the hours-of-day of the forecast output match + # those of the observations. + # + # ----------------------------------------------------------------------- + # + cycle_start_times \ + = set_cycle_dates(date_first_cycl, date_last_cycl, cycl_intvl_dt, + return_type='datetime') + # + # ----------------------------------------------------------------------- + # # Ensure that the configuration parameters associated with cumulative # fields (e.g. APCP) in the verification section of the experiment # dicitonary are temporally consistent, e.g. that accumulation intervals @@ -604,25 +616,11 @@ def _remove_tag(tasks, tag): # vx_config, fcst_obs_matched_times_all_cycles_cumul \ = check_temporal_consistency_cumul_fields( - vx_config, - date_first_cycl, date_last_cycl, cycl_intvl_dt, - fcst_len_dt, vx_fcst_output_intvl_dt) + vx_config, cycle_start_times, fcst_len_dt, vx_fcst_output_intvl_dt) expt_config["verification"] = vx_config # # ----------------------------------------------------------------------- # - # Generate a list containing the starting times of the cycles. This will - # be needed in checking that the hours-of-day of the forecast output match - # those of the observations. - # - # ----------------------------------------------------------------------- - # - cycle_start_times \ - = set_cycle_dates(date_first_cycl, date_last_cycl, cycl_intvl_dt, - return_type='datetime') - # - # ----------------------------------------------------------------------- - # # Generate a list of forecast output times and a list of obs days (i.e. # days on which observations are needed to perform verification because # there is forecast output on those days) over all cycles, both for From 2685e37382d1da9a5a5de07bb3b5b917636ee115 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 18 Oct 2024 13:43:00 -0600 Subject: [PATCH 158/260] Remove trailing whitespace. --- ush/setup.py | 56 ++++++++++++++++++++++++++-------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/ush/setup.py b/ush/setup.py index a4ba2f0001..899f05586f 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -51,24 +51,24 @@ from link_fix import link_fix def load_config_for_setup(ushdir, default_config, user_config): - """Updates a Python dictionary in place with experiment configuration settings from the - default, machine, and user configuration files. + """Updates a Python dictionary in place with experiment configuration settings from the + default, machine, and user configuration files. Args: ushdir (str): Path to the ``ush`` directory for the SRW App default_config (str): Path to ``config_defaults.yaml`` - user_config (str): Path to the user-provided config YAML (usually named + user_config (str): Path to the user-provided config YAML (usually named ``config.yaml``) Returns: None - + Raises: - FileNotFoundError: If the user-provided configuration file or the machine file does not + FileNotFoundError: If the user-provided configuration file or the machine file does not exist. - Exception: If (1) the user-provided configuration file cannot be loaded or (2) it contains - invalid sections/keys or (3) it does not contain mandatory information or (4) - an invalid datetime format is used. + Exception: If (1) the user-provided configuration file cannot be loaded or (2) it contains + invalid sections/keys or (3) it does not contain mandatory information or (4) + an invalid datetime format is used. """ # Load the default config. @@ -268,7 +268,7 @@ def _add_jobname(tasks): Mandatory variable "{val}" not found in: user config file {user_config} OR - machine file {machine_file} + machine file {machine_file} """ ) ) @@ -300,17 +300,17 @@ def set_srw_paths(ushdir, expt_config): Other paths for the SRW App are set as defaults in ``config_defaults.yaml``. Args: - ushdir (str) : Path to the system location of the ``ush`` directory under the + ushdir (str) : Path to the system location of the ``ush`` directory under the SRW App clone expt_config (dict): Contains the configuration settings for the user-defined experiment Returns: Dictionary of configuration settings and system paths as keys/values - + Raises: - KeyError: If the external repository required is not listed in the externals + KeyError: If the external repository required is not listed in the externals configuration file (e.g., ``Externals.cfg``) - FileNotFoundError: If the ``ufs-weather-model`` code containing the FV3 source code has + FileNotFoundError: If the ``ufs-weather-model`` code containing the FV3 source code has not been cloned properly """ @@ -371,23 +371,23 @@ def setup(USHdir, user_config_fn="config.yaml", debug: bool = False): time. Args: - USHdir (str): The full path of the ``ush/`` directory where this script + USHdir (str): The full path of the ``ush/`` directory where this script (``setup.py``) is located - user_config_fn (str): The name of a user-provided configuration YAML (usually + user_config_fn (str): The name of a user-provided configuration YAML (usually ``config.yaml``) debug (bool): Enable extra output for debugging Returns: None - - Raises: - ValueError: If checked configuration values are invalid (e.g., forecast length, + + Raises: + ValueError: If checked configuration values are invalid (e.g., forecast length, ``EXPTDIR`` path) - FileExistsError: If ``EXPTDIR`` already exists, and ``PREEXISTING_DIR_METHOD`` is not + FileExistsError: If ``EXPTDIR`` already exists, and ``PREEXISTING_DIR_METHOD`` is not set to a compatible handling method - FileNotFoundError: If the path to a particular file does not exist or if the file itself + FileNotFoundError: If the path to a particular file does not exist or if the file itself does not exist at the expected path - TypeError: If ``USE_CUSTOM_POST_CONFIG_FILE`` or ``USE_CRTM`` are set to true but no + TypeError: If ``USE_CUSTOM_POST_CONFIG_FILE`` or ``USE_CRTM`` are set to true but no corresponding custom configuration file or CRTM fix file directory is set KeyError: If an invalid value is provided (i.e., for ``GRID_GEN_METHOD``) """ @@ -480,7 +480,7 @@ def setup(USHdir, user_config_fn="config.yaml", debug: bool = False): f""" EXPTDIR ({exptdir}) already exists, and PREEXISTING_DIR_METHOD = {preexisting_dir_method} - To ignore this error, delete the directory, or set + To ignore this error, delete the directory, or set PREEXISTING_DIR_METHOD = delete, or PREEXISTING_DIR_METHOD = rename in your config file. @@ -667,7 +667,7 @@ def _remove_tag(tasks, tag): vx_config = expt_config["verification"] obs_retrieve_times_by_day \ = get_obs_retrieve_times_by_day( - vx_config, cycle_start_times, fcst_len_dt, + vx_config, cycle_start_times, fcst_len_dt, fcst_output_times_all_cycles, obs_days_all_cycles) for obtype, obs_days_dict in obs_retrieve_times_by_day.items(): @@ -922,7 +922,7 @@ def _get_location(xcs, fmt, expt_cfg): if num_cycles != len(fcst_len_cycl): logger.error(f""" The number of entries in FCST_LEN_CYCL does not divide evenly into a 24 hour day or the number of cycles - in your experiment! + in your experiment! FCST_LEN_CYCL = {fcst_len_cycl} """ ) @@ -1303,7 +1303,7 @@ def _get_location(xcs, fmt, expt_cfg): post_output_domain_name = lowercase(post_output_domain_name) # Write updated value of POST_OUTPUT_DOMAIN_NAME back to dictionary - post_config["POST_OUTPUT_DOMAIN_NAME"] = post_output_domain_name + post_config["POST_OUTPUT_DOMAIN_NAME"] = post_output_domain_name # # ----------------------------------------------------------------------- @@ -1591,7 +1591,7 @@ def _dict_find(user_dict, substring): workflow_config["SDF_USES_THOMPSON_MP"] = has_tag_with_value(ccpp_suite_xml, "scheme", "mp_thompson") if workflow_config["SDF_USES_THOMPSON_MP"]: - + logging.debug(f'Selected CCPP suite ({workflow_config["CCPP_PHYS_SUITE"]}) uses Thompson MP') logging.debug(f'Setting up links for additional fix files') @@ -1701,8 +1701,8 @@ def clean_rocoto_dict(rocotodict): 1. A task dictionary containing no "command" key 2. A metatask dictionary containing no task dictionaries - - Args: + + Args: rocotodict (dict): A dictionary containing Rocoto workflow settings """ From dbcbcaf679e404b7d1a6553ed4e3c95042816eca Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 18 Oct 2024 13:45:29 -0600 Subject: [PATCH 159/260] Remove trailing whitespace. --- ush/set_cycle_and_obs_timeinfo.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index a354139352..108615516c 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -39,7 +39,7 @@ def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, """ print_input_args(locals()) - + valid_values = ['string', 'datetime'] if return_type not in valid_values: raise ValueError("Invalid value for 'a'. Expected 1, 2, or 3.") @@ -72,7 +72,7 @@ def check_temporal_consistency_cumul_fields( configuration dictionary and ensures that certain temporal constraints on these parameters are satisfied. It then returns an updated version of the verification configuration dictionary that satisfies these constraints. - + The constraints are on the accumulation intervals associated with the cumulative forecast fields (and corresponding observation type pairs) that are to be verified. The constraints on each such accumulation interval @@ -181,7 +181,7 @@ def check_temporal_consistency_cumul_fields( obs_avail_hrs_of_day = [hr for hr in range(obs_avail_hr_start, obs_avail_hr_end, obs_avail_intvl_hrs)] obs_avail_hrs_of_day_str = ['%02d' % int(hr) for hr in obs_avail_hrs_of_day] # - # Get the array of accumulation intervals for the current cumulative field. + # Get the array of accumulation intervals for the current cumulative field. # Then loop over them to ensure that the constraints listed above are # satisfied. If for a given accumulation one or more of the constraints # is not satisfied, remove that accumulation from the list of accumulations @@ -199,7 +199,7 @@ def check_temporal_consistency_cumul_fields( # Initialize a sub-sub-dictionary in one of the dictionaries to be returned. fcst_obs_matched_times_all_cycles_cumul[field_fcst][accum_hh] = [] # - # Make sure that the accumulation interval is less than or equal to the + # Make sure that the accumulation interval is less than or equal to the # forecast length. # if accum_hrs > fcst_len_hrs: @@ -386,7 +386,7 @@ def set_fcst_output_times_and_obs_days_all_cycles( # Initialize dictionaries that will contain the various forecast output # time and obs day information. Note that we initialize the contents of - # these dictionaries as sets because that better suites the data manipulation + # these dictionaries as sets because that better suites the data manipulation # we will need to do, but these sets will later be converted to lists. fcst_output_times_all_cycles = dict() fcst_output_times_all_cycles['inst'] = set() From 7545d253888786b6d6ee3d8171dbd0b48ca29bdd Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 16 Oct 2024 14:19:52 -0600 Subject: [PATCH 160/260] In order for the temporal consistency checks on various vx parameters and corresponding adjustments to them to be effective (i.e. in order for any necessary adjustments to make it into the rocoto xml file), move the call to the function that performs these checks and adjustments to a place BEFORE the call to extend_yaml() that "freezes" (hard-codes) the accumulations for which the PcpCombine and other tasks are run (this freezing should happen AFTER any adjustments are made to the list of user-specified accumulations). --- ush/setup.py | 59 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 15 deletions(-) diff --git a/ush/setup.py b/ush/setup.py index 899f05586f..975c22264a 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -241,7 +241,46 @@ def _add_jobname(tasks): except: pass cfg_d["workflow"]["EXPT_BASEDIR"] = os.path.abspath(expt_basedir) + # + # ----------------------------------------------------------------------- + # + # Ensure that the configuration parameters associated with cumulative + # fields (e.g. APCP) in the verification section of the experiment + # dicitonary are temporally consistent, e.g. that accumulation intervals + # are less than or equal to the forecast length. Update the verification + # section of the dictionary to remove inconsistencies. + # + # ----------------------------------------------------------------------- + # + vx_config = cfg_d["verification"] + workflow_config = cfg_d["workflow"] + + date_first_cycl = workflow_config.get("DATE_FIRST_CYCL") + date_last_cycl = workflow_config.get("DATE_LAST_CYCL") + incr_cycl_freq = int(workflow_config.get("INCR_CYCL_FREQ")) + fcst_len_hrs = workflow_config.get("FCST_LEN_HRS") + vx_fcst_output_intvl_hrs = vx_config.get("VX_FCST_OUTPUT_INTVL_HRS") + + # Convert various times and time intervals from integers or strings to + # datetime or timedelta objects. + date_first_cycl_dt = datetime.datetime.strptime(date_first_cycl, "%Y%m%d%H") + date_last_cycl_dt = datetime.datetime.strptime(date_last_cycl, "%Y%m%d%H") + cycl_intvl_dt = datetime.timedelta(hours=incr_cycl_freq) + fcst_len_dt = datetime.timedelta(hours=fcst_len_hrs) + vx_fcst_output_intvl_dt = datetime.timedelta(hours=vx_fcst_output_intvl_hrs) + + # Generate a list containing the starting times of the cycles. + cycle_start_times \ + = set_cycle_dates(date_first_cycl_dt, date_last_cycl_dt, cycl_intvl_dt, + return_type='datetime') + + # Call function that runs the consistency checks on the vx parameters. + vx_config, fcst_obs_matched_times_all_cycles_cumul \ + = check_temporal_consistency_cumul_fields( + vx_config, cycle_start_times, fcst_len_dt, vx_fcst_output_intvl_dt) + + cfg_d['verification'] = vx_config extend_yaml(cfg_d) # Do any conversions of data types @@ -603,21 +642,11 @@ def _remove_tag(tasks, tag): cycle_start_times \ = set_cycle_dates(date_first_cycl, date_last_cycl, cycl_intvl_dt, return_type='datetime') - # - # ----------------------------------------------------------------------- - # - # Ensure that the configuration parameters associated with cumulative - # fields (e.g. APCP) in the verification section of the experiment - # dicitonary are temporally consistent, e.g. that accumulation intervals - # are less than or equal to the forecast length. Update the verification - # section of the dictionary to remove inconsistencies. - # - # ----------------------------------------------------------------------- - # - vx_config, fcst_obs_matched_times_all_cycles_cumul \ - = check_temporal_consistency_cumul_fields( - vx_config, cycle_start_times, fcst_len_dt, vx_fcst_output_intvl_dt) - expt_config["verification"] = vx_config + print(f"") + print(f"IIIIIIIIIIIIIII") + print(f"cycle_start_times = ") + pprint(cycle_start_times) + #mnmnmnmnmnmnmn # # ----------------------------------------------------------------------- # From 21374ca6c643363cc09f8094a7704774a1816921 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Fri, 18 Oct 2024 13:54:27 -0600 Subject: [PATCH 161/260] Remove debugging code and add a blank line. --- ush/set_cycle_and_obs_timeinfo.py | 1 + ush/setup.py | 5 ----- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index 108615516c..ddc948b583 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -612,6 +612,7 @@ def get_obs_retrieve_times_by_day( Dictionary of dictionaries containing times at which each type of obs is needed on each obs day. """ + # Convert string contents of input dictionaries to datetime objects. for time_type in ['cumul', 'inst']: fcst_output_times_all_cycles[time_type] \ diff --git a/ush/setup.py b/ush/setup.py index 975c22264a..0aae872b68 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -642,11 +642,6 @@ def _remove_tag(tasks, tag): cycle_start_times \ = set_cycle_dates(date_first_cycl, date_last_cycl, cycl_intvl_dt, return_type='datetime') - print(f"") - print(f"IIIIIIIIIIIIIII") - print(f"cycle_start_times = ") - pprint(cycle_start_times) - #mnmnmnmnmnmnmn # # ----------------------------------------------------------------------- # From 5401569f1904d9d1940b7a532b05bd3a778325b5 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 20 Oct 2024 08:28:23 -0600 Subject: [PATCH 162/260] Drop the "_NDAS" and "_ndas" suffixes from pb2nc tasks since prepbufr files can come from sources other than NDAS (e.g. GDAS). --- ...IONAL_RUN_MET_PB2NC_OBS_NDAS => JREGIONAL_RUN_MET_PB2NC_OBS} | 2 +- parm/wflow/verify_pre.yaml | 2 +- ...un_met_pb2nc_obs_ndas.sh => exregional_run_met_pb2nc_obs.sh} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename jobs/{JREGIONAL_RUN_MET_PB2NC_OBS_NDAS => JREGIONAL_RUN_MET_PB2NC_OBS} (98%) rename scripts/{exregional_run_met_pb2nc_obs_ndas.sh => exregional_run_met_pb2nc_obs.sh} (100%) diff --git a/jobs/JREGIONAL_RUN_MET_PB2NC_OBS_NDAS b/jobs/JREGIONAL_RUN_MET_PB2NC_OBS similarity index 98% rename from jobs/JREGIONAL_RUN_MET_PB2NC_OBS_NDAS rename to jobs/JREGIONAL_RUN_MET_PB2NC_OBS index a6ed90a1a3..89c9bb73f4 100755 --- a/jobs/JREGIONAL_RUN_MET_PB2NC_OBS_NDAS +++ b/jobs/JREGIONAL_RUN_MET_PB2NC_OBS @@ -76,7 +76,7 @@ NDAS observations. # #----------------------------------------------------------------------- # -$SCRIPTSdir/exregional_run_met_pb2nc_obs_ndas.sh || \ +$SCRIPTSdir/exregional_run_met_pb2nc_obs.sh || \ print_err_msg_exit "\ Call to ex-script corresponding to J-job \"${scrfunc_fn}\" failed." # diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index a3b49cc169..d5ce7885e2 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -76,7 +76,7 @@ task_run_MET_Pb2nc_obs_NDAS: attrs: cycledefs: cycledefs_obs_days_inst maxtries: '2' - command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PB2NC_OBS_NDAS"' + command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PB2NC_OBS"' envars: <<: *default_vars VAR: ADPSFC diff --git a/scripts/exregional_run_met_pb2nc_obs_ndas.sh b/scripts/exregional_run_met_pb2nc_obs.sh similarity index 100% rename from scripts/exregional_run_met_pb2nc_obs_ndas.sh rename to scripts/exregional_run_met_pb2nc_obs.sh From 88e48e29bb06f2c829ce2eb5119bf3fbe1a39bf7 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 21 Oct 2024 15:53:10 -0600 Subject: [PATCH 163/260] Modifications to address Mike K's PR review comments. --- scripts/exregional_get_verif_obs.sh | 3 +- ush/get_obs.py | 207 ++++++++++++---------------- ush/set_cycle_and_obs_timeinfo.py | 81 ++++++----- ush/setup.py | 1 - 4 files changed, 126 insertions(+), 166 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index d1ee4116e8..a07deecc25 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -61,9 +61,8 @@ Valid observation types are: " fi -script_bn="get_obs" cmd="\ -python3 -u ${USHdir}/${script_bn}.py \ +python3 -u ${USHdir}/get_obs.py \ --var_defns_path "${GLOBAL_VAR_DEFNS_FP}" \ --obtype ${OBTYPE} \ --obs_day ${PDY}" diff --git a/ush/get_obs.py b/ush/get_obs.py index 50b7c45ae3..666c6f1298 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -11,6 +11,7 @@ from pprint import pprint from math import ceil, floor import subprocess +import retrieve_data from python_utils import ( load_yaml_config, ) @@ -26,7 +27,7 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): Note that for cumulative fields (like CCPA and NOHRSC, as opposed to instantaneous ones like MRMS and NDAS), the archive files corresponding to hour 0 of the day represent accumulations over the previous day. Thus, - here, we never return an achive hour of 0 for cumulative fields. Instead, + here, we never return an archive hour of 0 for cumulative fields. Instead, if the specified hour-of-day is 0, we consider that to represent the 0th hour of the NEXT day (i.e. the 24th hour of the current day) and set the archive hour to 24. @@ -57,32 +58,32 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): msg = dedent(f""" The specified observation type (after converting to upper case) is not supported: - obtype_upper = {obtype_upper} + {obtype_upper = } Valid observation types are: {valid_obtypes} """) logging.error(msg) - raise Exception(msg) + raise ValueError(msg) - # Ensure that the archive inerval divides evenly into 24 hours. + # Ensure that the archive interval divides evenly into 24 hours. remainder = 24 % arcv_intvl_hrs if remainder != 0: msg = dedent(f""" The archive interval for obs of type {obtype} must divide evenly into 24 but doesn't: - arcv_intvl_hrs = {arcv_intvl_hrs} + {arcv_intvl_hrs = } 24 % arcv_intvl_hrs = {remainder} """) logging.error(msg) - raise Exception(msg) + raise ValueError(msg) if (hod < 0) or (hod > 23): msg = dedent(f""" The specified hour-of-day must be between 0 and 23, inclusive, but isn't: - hod = {hod} + {hod = } """) logging.error(msg) - raise Exception(msg) + raise ValueError(msg) # Set the archive hour. This depends on the obs type because each obs # type can organize its observation files into archives in a different @@ -113,8 +114,10 @@ def get_obs(config, obtype, yyyymmdd_task): This script checks for the existence of obs files of the specified type at the locations specified by variables in the SRW App's configuration file. If one or more of these files do not exist, it retrieves them from - a data store and places them in the locations specified by the configuration - variables, renaming them if necessary. + a data store (using the retrieve_data.py script and as specified by the + configuration file parm/data_locations.yml for that script) and places + them in the locations specified by the App's configuration variables, + renaming them if necessary. Args: config: @@ -298,10 +301,9 @@ def get_obs(config, obtype, yyyymmdd_task): vx_config = cfg['verification'] # Get the time interval (in hours) at which the obs are available. - key = obtype + '_OBS_AVAIL_INTVL_HRS' - obs_avail_intvl_hrs = vx_config[key] + obs_avail_intvl_hrs = vx_config[f'{obtype}_OBS_AVAIL_INTVL_HRS'] - # The obs availability inerval must divide evenly into 24 hours. Otherwise, + # The obs availability interval must divide evenly into 24 hours. Otherwise, # different days would have obs available at different hours-of-day. Make # sure this is the case. remainder = 24 % obs_avail_intvl_hrs @@ -309,19 +311,18 @@ def get_obs(config, obtype, yyyymmdd_task): msg = dedent(f""" The obs availability interval for obs of type {obtype} must divide evenly into 24 but doesn't: - obs_avail_intvl_hrs = {obs_avail_intvl_hrs} + {obs_avail_intvl_hrs = } 24 % obs_avail_intvl_hrs = {remainder} """) logging.error(msg) - raise Exception(msg) + raise ValueError(msg) # For convenience, convert the obs availability interval to a datetime # object. obs_avail_intvl = dt.timedelta(hours=obs_avail_intvl_hrs) # Get the base directory for the observations. - key = obtype + '_OBS_DIR' - obs_dir = vx_config[key] + obs_dir = vx_config[f'{obtype}_OBS_DIR'] # For each observation type, set the group of fields contained in those # observation files that we need for verification. Each group of fields @@ -368,8 +369,7 @@ def get_obs(config, obtype, yyyymmdd_task): # locations, they will be retrieved from HPSS and placed at these locations. fp_proc_templates = [] for fg in field_groups_in_obs: - key = 'OBS_' + obtype + '_' + fg + '_FN_TEMPLATE' - fn_proc_template = vx_config[key] + fn_proc_template = vx_config[f'OBS_{obtype}_{fg}_FN_TEMPLATE'] fp_proc_templates.append(os.path.join(obs_dir, fn_proc_template)) # #----------------------------------------------------------------------- @@ -393,45 +393,41 @@ def get_obs(config, obtype, yyyymmdd_task): fields_in_filenames = [] levels_in_filenames = [] if obtype == 'MRMS': + valid_mrms_field_groups = ['REFC', 'RETOP'] for fg in field_groups_in_obs: + if fg not in valid_mrms_field_groups: + msg = dedent(f""" + Invalid field group specified for obs type: + {obtype = } + {fg = } + Valid field group are: + {valid_mrms_field_groups} + """) + logging.error(msg) + raise ValueError(msg) if fg == 'REFC': fields_in_filenames.append('MergedReflectivityQCComposite') levels_in_filenames.append('00.50') elif fg == 'RETOP': fields_in_filenames.append('EchoTop') levels_in_filenames.append('18_00.50') - else: - msg = dedent(f""" - Invalid field specified for obs type: - obtype = {obtype} - field = {field} - """) - logging.error(msg) - raise Exception(msg) # CCPA files for 1-hour accumulation have incorrect metadata in the files # under the "00" directory from 20180718 to 20210504. Set these starting # and ending dates as datetime objects for later use. - yyyymmdd_bad_metadata_start_str = None - yyyymmdd_bad_metadata_end_str = None - yyyymmdd_bad_metadata_start = None - yyyymmdd_bad_metadata_end = None - if obtype == 'CCPA': - yyyymmdd_bad_metadata_start_str = '20180718' - yyyymmdd_bad_metadata_end_str = '20210504' - yyyymmdd_bad_metadata_start = dt.datetime.strptime(yyyymmdd_bad_metadata_start_str, '%Y%m%d') - yyyymmdd_bad_metadata_end = dt.datetime.strptime(yyyymmdd_bad_metadata_end_str, '%Y%m%d') + ccpa_bad_metadata_start = dt.datetime.strptime('20180718', '%Y%m%d') + ccpa_bad_metadata_end = dt.datetime.strptime('20210504', '%Y%m%d') + # #----------------------------------------------------------------------- # - # Get the list of all the times in the current day at which to retrieve - # obs. This is an array with elements having format "YYYYMMDDHH". + # Form a string list of all the times in the current day (each in the + # format "YYYYMMDDHH") at which to retrieve obs. # #----------------------------------------------------------------------- # yyyymmdd_task_str = dt.datetime.strftime(yyyymmdd_task, '%Y%m%d') - key = 'OBS_RETRIEVE_TIMES_' + obtype + '_' + yyyymmdd_task_str - obs_retrieve_times_crnt_day_str = vx_config[key] + obs_retrieve_times_crnt_day_str = vx_config[f'OBS_RETRIEVE_TIMES_{obtype}_{yyyymmdd_task_str}'] obs_retrieve_times_crnt_day \ = [dt.datetime.strptime(yyyymmddhh_str, '%Y%m%d%H') for yyyymmddhh_str in obs_retrieve_times_crnt_day_str] # @@ -465,17 +461,14 @@ def get_obs(config, obtype, yyyymmdd_task): # Initial guess for starting archive hour. This is set to the archive # hour containing obs at the first obs retrieval time of the day. - hod_first = obs_retrieve_times_crnt_day[0].hour - arcv_hr_start = get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod_first) + arcv_hr_start = get_obs_arcv_hr(obtype, arcv_intvl_hrs, obs_retrieve_times_crnt_day[0].hour) # Ending archive hour. This is set to the archive hour containing obs at # the last obs retrieval time of the day. - hod_last = obs_retrieve_times_crnt_day[-1].hour - arcv_hr_end = get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod_last) + arcv_hr_end = get_obs_arcv_hr(obtype, arcv_intvl_hrs, obs_retrieve_times_crnt_day[-1].hour) # Set other variables needed below when evaluating the METplus template for # the full path to the processed observation files. - one_hour = dt.timedelta(hours=1) ushdir = config['user']['USHdir'] # Create dictionary containing the paths to all the processed obs files @@ -491,7 +484,7 @@ def get_obs(config, obtype, yyyymmdd_task): for yyyymmddhh in obs_retrieve_times_crnt_day: # Set the lead hour, i.e. the number of hours from the beginning of the # day at which the file is valid. - lhr = int((yyyymmddhh - yyyymmdd_task)/one_hour) + lhr = int((yyyymmddhh - yyyymmdd_task)/dt.timedelta(hours=1)) # Call a bash script to evaluate the template for the full path to the # file containing METplus timestrings at the current time. This should # be upgraded to a python script at some point. @@ -517,18 +510,17 @@ def get_obs(config, obtype, yyyymmdd_task): num_existing_files += 1 msg = dedent(f""" File already exists on disk: - fp_proc = {fp_proc} + {fp_proc = } """) - logging.info(msg) + logging.debug(msg) else: - hod = yyyymmddhh.hour - arcv_hr_start = get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod) + arcv_hr_start = get_obs_arcv_hr(obtype, arcv_intvl_hrs, yyyymmddhh.hour) msg = dedent(f""" File does not exist on disk: - fp_proc = {fp_proc} + {fp_proc = } Setting the hour (since hour 0 of the current task day) of the first archive to retrieve to: - arcv_hr_start = {arcv_hr_start} + {arcv_hr_start = } """) logging.info(msg) do_break = True @@ -537,14 +529,13 @@ def get_obs(config, obtype, yyyymmdd_task): # If the number of obs files that already exist on disk is equal to the # number of obs files needed, then there is no need to retrieve any files. - num_obs_retrieve_times_crnt_day = len(obs_retrieve_times_crnt_day) - num_files_needed = num_obs_retrieve_times_crnt_day*num_field_groups + num_files_needed = len(obs_retrieve_times_crnt_day)*num_field_groups if num_existing_files == num_files_needed: msg = dedent(f""" All obs files needed for the current day (yyyymmdd_task) already exist on disk: - yyyymmdd_task = {yyyymmdd_task} + {yyyymmdd_task = } Thus, there is no need to retrieve any files. """) logging.info(msg) @@ -554,22 +545,20 @@ def get_obs(config, obtype, yyyymmdd_task): # the number of obs files needed, then we will need to retrieve files. # In this case, set the sequence of hours corresponding to the archives # from which files will be retrieved. - else: - - arcv_hrs = [hr for hr in range(arcv_hr_start, arcv_hr_end+arcv_intvl_hrs, arcv_intvl_hrs)] - msg = dedent(f""" - At least some obs files needed needed for the current day (yyyymmdd_task) - do not exist on disk: - yyyymmdd_task = {yyyymmdd_task} - The number of obs files needed for the current day is: - num_files_needed = {num_files_needed} - The number of obs files that already exist on disk is: - num_existing_files = {num_existing_files} - Will retrieve remaining files by looping over archives corresponding to - the following hours (since hour 0 of the current day): - arcv_hrs = {arcv_hrs} - """) - logging.info(msg) + arcv_hrs = [hr for hr in range(arcv_hr_start, arcv_hr_end+arcv_intvl_hrs, arcv_intvl_hrs)] + msg = dedent(f""" + At least some obs files needed needed for the current day (yyyymmdd_task) + do not exist on disk: + {yyyymmdd_task = } + The number of obs files needed for the current day is: + {num_files_needed = } + The number of obs files that already exist on disk is: + {num_existing_files = } + Will retrieve remaining files by looping over archives corresponding to + the following hours (since hour 0 of the current day): + {arcv_hrs = } + """) + logging.info(msg) # #----------------------------------------------------------------------- # @@ -595,18 +584,9 @@ def get_obs(config, obtype, yyyymmdd_task): #----------------------------------------------------------------------- # - # Whether to move the files or copy them from their raw to their processed - # locations. - mv_or_cp = 'cp' # Whether to remove raw observations after processed directories have # been created from them. - key = 'REMOVE_RAW_OBS_' + obtype - remove_raw_obs = vx_config[key] - # If the raw directories and files are to be removed at the end of this - # script, no need to copy the files since the raw directories are going - # to be removed anyway. - if remove_raw_obs: - mv_or_cp = 'mv' + remove_raw_obs = vx_config[f'REMOVE_RAW_OBS_{obtype}'] # Base directory that will contain the archive subdirectories in which # the files extracted from each archive (tar) file will be placed. We @@ -657,15 +637,9 @@ def get_obs(config, obtype, yyyymmdd_task): # # MRMS: # There is only one archive per day, and it contains all the raw obs - # files needed to generate processed obs files for all hours of the - # current day. Thus, we will only ever need this one archive, so there - # is no need to include the archive's hour information (there really - # isn't any) in the raw subdirectory name. In addition, the archive's - # year, month, and day is the same as that of the obs day's, so it is - # already included in the name of the raw base directory. Sine this is - # the only info we need to avoid differnt get_obs tasks clobbering each - # other's output obs files, for simplicity we simply do not create a raw - # archive subdirectory. + # files needed to generate processed obs files for the current day. + # Since we will only ever need this one archive for a given day, + # for simplicity we simply do not create a raw archive subdirectory. # # NDAS: # Same as for CCPA. @@ -703,8 +677,8 @@ def get_obs(config, obtype, yyyymmdd_task): for obs_retrieve_time in obs_retrieve_times_crnt_day: if (obs_retrieve_time >= arcv_contents_start) and \ (obs_retrieve_time <= arcv_contents_end): - do_retrieve = True - break + do_retrieve = True + break if not do_retrieve: msg = dedent(f""" @@ -712,10 +686,10 @@ def get_obs(config, obtype, yyyymmdd_task): hour 0 of the next day if considering a cumulative obs type) fall in the range spanned by the current {arcv_intvl_hrs}-hourly archive file. The bounds of the data in the current archive are: - arcv_contents_start = {arcv_contents_start} - arcv_contents_end = {arcv_contents_end} + {arcv_contents_start = } + {arcv_contents_end = } The times at which obs need to be retrieved are: - obs_retrieve_times_crnt_day = {obs_retrieve_times_crnt_day} + {obs_retrieve_times_crnt_day = } """) logging.info(msg) @@ -747,18 +721,15 @@ def get_obs(config, obtype, yyyymmdd_task): # files in the current archive, although we will make use of only 6 of # these (we will not use the tm00 file). parmdir = config['user']['PARMdir'] - cmd = ' '.join(['python3', \ - '-u', os.path.join(ushdir, 'retrieve_data.py'), \ - '--debug', \ - '--file_set', 'obs', \ - '--config', os.path.join(parmdir, 'data_locations.yml'), \ - '--cycle_date', yyyymmddhh_arcv_str, \ - '--data_stores', 'hpss', \ - '--data_type', obtype + '_obs', \ - '--output_path', arcv_dir_raw, \ - '--summary_file', 'retrieve_data.log']) - result = subprocess.run(cmd, shell=True, capture_output=True, text=True) - rc = result.returncode + args = ['--debug', \ + '--file_set', 'obs', \ + '--config', os.path.join(parmdir, 'data_locations.yml'), \ + '--cycle_date', yyyymmddhh_arcv_str, \ + '--data_stores', 'hpss', \ + '--data_type', obtype + '_obs', \ + '--output_path', arcv_dir_raw, \ + '--summary_file', 'retrieve_data.log'] + retrieve_data.main(args) # Get the list of times corresponding to the obs files in the current # archive. This is a list of datetime objects. @@ -836,7 +807,7 @@ def get_obs(config, obtype, yyyymmdd_task): # Note that the script we call to do this (mrms_pull_topofhour.py) assumes # a certain file naming convention. That convention must match the names # of the files that the retrieve_data.py script called above ends up - # retrieving. The list of possibile templates for these names is given + # retrieving. The list of possible templates for these names is given # in parm/data_locations.yml, but which of those is actually used is not # known until retrieve_data.py completes. Thus, that information needs # to be passed back by retrieve_data.py and then passed to mrms_pull_topofhour.py. @@ -854,7 +825,7 @@ def get_obs(config, obtype, yyyymmdd_task): rc = result.returncode # The raw file name needs to be the same as what the retrieve_data.py - # script called above ends up retrieving. The list of possibile templates + # script called above ends up retrieving. The list of possible templates # for this name is given in parm/data_locations.yml, but which of those # is actually used is not known until retrieve_data.py completes. Thus, # that information needs to be passed back by the script and used here. @@ -893,20 +864,20 @@ def get_obs(config, obtype, yyyymmdd_task): {fp_raw} ... """) - logging.info(msg) + logging.debug(msg) yyyymmdd = yyyymmddhh.replace(hour=0, minute=0, second=0) # CCPA files for 1-hour accumulation have incorrect metadata in the files # under the "00" directory from 20180718 to 20210504. After the data is # pulled, reorganize into correct yyyymmdd structure. if (obtype == 'CCPA') and \ - ((yyyymmdd >= yyyymmdd_bad_metadata_start) and (yyyymmdd <= yyyymmdd_bad_metadata_end)) and \ + ((yyyymmdd >= ccpa_bad_metadata_start) and (yyyymmdd <= ccpa_bad_metadata_end)) and \ (((hr >= 19) and (hr <= 23)) or (hr == 0)): cmd = ' '.join(['wgrib2', fp_raw, '-set_date -24hr -grib', fp_proc, '-s']) result = subprocess.run(cmd, shell=True, capture_output=True, text=True) - elif mv_or_cp == 'mv': + elif remove_raw_obs: shutil.move(fp_raw, fp_proc) - elif mv_or_cp == 'cp': + else: shutil.copy(fp_raw, fp_proc) # #----------------------------------------------------------------------- @@ -916,10 +887,7 @@ def get_obs(config, obtype, yyyymmdd_task): #----------------------------------------------------------------------- # if remove_raw_obs: - msg = dedent(f""" - Removing raw obs directories ..." - """) - logging.info(msg) + logging.info("Removing raw obs directories ...") shutil.rmtree(basedir_raw) return True @@ -934,7 +902,6 @@ def parse_args(argv): parser.add_argument( "--obtype", - dest="obtype", type=str, required=True, choices=['CCPA', 'NOHRSC', 'MRMS', 'NDAS'], @@ -943,7 +910,6 @@ def parse_args(argv): parser.add_argument( "--obs_day", - dest="obs_day", type=lambda d: dt.datetime.strptime(d, '%Y%m%d'), required=True, help="Date of observation day, in the form 'YYYMMDD'.", @@ -951,7 +917,6 @@ def parse_args(argv): parser.add_argument( "--var_defns_path", - dest="var_defns_path", type=str, required=True, help="Path to variable definitions file.", @@ -961,7 +926,6 @@ def parse_args(argv): for pair in (str.lower(lvl), str.upper(lvl))] parser.add_argument( "--log_level", - dest="log_level", type=str, required=False, default='info', @@ -972,7 +936,6 @@ def parse_args(argv): parser.add_argument( "--log_fp", - dest="log_fp", type=str, required=False, default='', diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index ddc948b583..9029731a94 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -42,15 +42,14 @@ def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, valid_values = ['string', 'datetime'] if return_type not in valid_values: - raise ValueError("Invalid value for 'a'. Expected 1, 2, or 3.") msg = dedent(f""" Invalid value for optional argument "return_type": - return_type = {return_type} + {return_type = } Valid values are: - valid_values = {valid_values} + {valid_values = } """) logging.error(msg) - raise Exception(msg) + raise ValueError(msg) # iterate over cycles all_cdates = [] @@ -168,17 +167,17 @@ def check_temporal_consistency_cumul_fields( msg = dedent(f""" The obs availability interval for obs of type {obtype} must divide evenly into 24 but doesn't: - obs_avail_intvl_hrs = {obs_avail_intvl_hrs} - 24 % obs_avail_intvl_hrs = {remainder}" + {obs_avail_intvl_hrs = } + 24 % obs_avail_intvl_hrs = {remainder}" """) logging.error(msg) - raise Exception(msg) + raise ValueError(msg) # Assume that the obs are available at hour 0 of the day regardless # of obs type. obs_avail_hr_start = 0 obs_avail_hr_end = obs_avail_hr_start + 24 # Construct list of obs availability hours-of-day. - obs_avail_hrs_of_day = [hr for hr in range(obs_avail_hr_start, obs_avail_hr_end, obs_avail_intvl_hrs)] + obs_avail_hrs_of_day = list(range(obs_avail_hr_start, obs_avail_hr_end, obs_avail_intvl_hrs)) obs_avail_hrs_of_day_str = ['%02d' % int(hr) for hr in obs_avail_hrs_of_day] # # Get the array of accumulation intervals for the current cumulative field. @@ -207,13 +206,13 @@ def check_temporal_consistency_cumul_fields( The accumulation interval (accum_hrs) for the current cumulative forecast field (field_fcst) and corresponding observation type (obtype) is greater than the forecast length (fcst_len_hrs): - field_fcst = {field_fcst} - obtype = {obtype} - accum_hrs = {accum_hrs} - fcst_len_hrs = {fcst_len_hrs} - Thus, this forecast field cannot be accumulated over this interval. - Will remove this accumulation interval from the list of accumulation - intervals to verify for this field/obtype. + {field_fcst = } + {obtype = } + {accum_hrs = } + {fcst_len_hrs = } + Thus, this forecast field cannot be accumulated over this interval. Will + remove this accumulation interval from the list of accumulation intervals + to verify for this field/obtype. """) logging.info(msg) accum_intvls_hrs.remove(accum_hrs) @@ -229,11 +228,11 @@ def check_temporal_consistency_cumul_fields( field (field_fcst) and corresponding observation type (obtype) is not evenly divisible by the observation type's availability interval (obs_avail_intvl_hrs): - field_fcst = {field_fcst} - obtype = {obtype} - accum_hrs = {accum_hrs} - obs_avail_intvl_hrs = {obs_avail_intvl_hrs} - accum_hrs % obs_avail_intvl_hrs = {rem_obs} + {field_fcst = } + {obtype = } + {accum_hrs = } + {obs_avail_intvl_hrs = } + accum_hrs % obs_avail_intvl_hrs = {rem_obs} Thus, this observation type cannot be accumulated over this interval. Will remove this accumulation interval from the list of accumulation intervals to verify for this field/obtype. @@ -251,14 +250,14 @@ def check_temporal_consistency_cumul_fields( The accumulation interval (accum_hrs) for the current cumulative forecast field (field_fcst) and corresponding observation type (obtype) is not evenly divisible by the forecast output interval (fcst_output_intvl): - field_fcst = {field_fcst} - obtype = {obtype} - accum_hrs = {accum_hrs} hr - fcst_output_intvl_hrs = {fcst_output_intvl} hr - accum_hrs % fcst_output_intvl_hrs = {rem_fcst} - Thus, this forecast field cannot be accumulated over this interval. - Will remove this accumulation interval from the list of accumulation - intervals to verify for this field/obtype. + {field_fcst = } + {obtype = } + {accum_hrs = } + {fcst_output_intvl_hrs = } + accum_hrs % fcst_output_intvl_hrs = {rem_fcst} + Thus, this forecast field cannot be accumulated over this interval. Will + remove this accumulation interval from the list of accumulation intervals + to verify for this field/obtype. """) logging.info(msg) accum_intvls_hrs.remove(accum_hrs) @@ -303,17 +302,17 @@ def check_temporal_consistency_cumul_fields( field (field_fcst) is such that the forecast will output the field on at least one of hour-of-day on which the corresponding observation type is not available: - field_fcst = {field_fcst} - obtype = {obtype} - accum_hrs = {accum_hrs} hr + {field_fcst = } + {obtype = } + {accum_hrs = } The forecast output hours-of-day for this field/accumulation interval combination are: - fcst_output_hrs_of_day_str = {fcst_output_hrs_of_day_str} + {fcst_output_hrs_of_day_str = } The hours-of-day at which the obs are available are: - obs_avail_hrs_of_day_str = {obs_avail_hrs_of_day_str} - Thus, at least some of the forecast output cannot be verified. - Will remove this accumulation interval from the list of accumulation - intervals to verify for this field/obtype. + {obs_avail_hrs_of_day_str = } + Thus, at least some of the forecast output cannot be verified. Will remove + this accumulation interval from the list of accumulation intervals to + verify for this field/obtype. """) logging.info(msg) accum_intvls_hrs.remove(accum_hrs) @@ -335,8 +334,8 @@ def check_temporal_consistency_cumul_fields( msg = dedent(f""" The list of accumulation intervals (accum_intvls_hrs) for the current cumulative field to verify (field_fcst) is empty: - field_fcst = {field_fcst} - accum_intvls_hrs = {accum_intvls_hrs} + {field_fcst = } + {accum_intvls_hrs = } Removing this field from the list of fields to verify. The updated list is: {vx_config["VX_FIELDS"]} @@ -683,10 +682,10 @@ def get_obs_retrieve_times_by_day( msg = dedent(f""" The obs availability interval for obs of type {obtype} must divide evenly into 24 but doesn't: - obs_avail_intvl_hrs = {obs_avail_intvl_hrs} - 24 % obs_avail_intvl_hrs = {remainder}" + {obs_avail_intvl_hrs = } + 24 % obs_avail_intvl_hrs = {remainder}" """) - raise Exception(msg) + raise ValueError(msg) obs_avail_intvl = timedelta(hours=obs_avail_intvl_hrs) num_obs_avail_times_per_day = int(24/obs_avail_intvl_hrs) diff --git a/ush/setup.py b/ush/setup.py index 0aae872b68..703bc094fd 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -740,7 +740,6 @@ def _remove_tag(tasks, tag): # If there are no vx fields specified, remove those tasks that are necessary # for all observation types. - vx_config = expt_config["verification"] vx_fields = vx_config["VX_FIELDS"] if not vx_fields: metatask = "metatask_check_post_output_all_mems" From eb06d428e9590c693f988739804e8cae0e90d622 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 22 Oct 2024 09:12:25 -0600 Subject: [PATCH 164/260] Additional mods for Mike K.'s PR review. --- scripts/exregional_get_verif_obs.sh | 2 +- ush/get_obs.py | 16 +++++++--------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index a07deecc25..d457a6b5d8 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -68,7 +68,7 @@ python3 -u ${USHdir}/get_obs.py \ --obs_day ${PDY}" print_info_msg " CALLING: ${cmd}" -${cmd} || print_err_msg_exit "Error calling ${script_bn}.py." +${cmd} || print_err_msg_exit "Error calling get_obs.py" # #----------------------------------------------------------------------- # diff --git a/ush/get_obs.py b/ush/get_obs.py index 666c6f1298..f88ab9a27e 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -53,12 +53,10 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): """ valid_obtypes = ['CCPA', 'NOHRSC', 'MRMS', 'NDAS'] - obtype_upper = obtype.upper() - if obtype_upper not in valid_obtypes: + if obtype not in valid_obtypes: msg = dedent(f""" - The specified observation type (after converting to upper case) is not - supported: - {obtype_upper = } + The specified observation type is not supported: + {obtype = } Valid observation types are: {valid_obtypes} """) @@ -91,19 +89,19 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): # through 6 of the day in the archive labeled with hour 6 while an # instantaneous obs type may put the obs files for hours 0 through 5 of # the day in the archive labeled with hour 6. - if obtype_upper in ['CCPA']: + if obtype in ['CCPA']: if hod == 0: arcv_hr = 24 else: arcv_hr = ceil(hod/arcv_intvl_hrs)*arcv_intvl_hrs - elif obtype_upper in ['NOHRSC']: + elif obtype in ['NOHRSC']: if hod == 0: arcv_hr = 24 else: arcv_hr = floor(hod/arcv_intvl_hrs)*arcv_intvl_hrs - elif obtype_upper in ['MRMS']: + elif obtype in ['MRMS']: arcv_hr = (floor(hod/arcv_intvl_hrs))*arcv_intvl_hrs - elif obtype_upper in ['NDAS']: + elif obtype in ['NDAS']: arcv_hr = (floor(hod/arcv_intvl_hrs) + 1)*arcv_intvl_hrs return arcv_hr From 4f6bdda1b038b3a1078cf160adc666308e2fac4e Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 22 Oct 2024 09:12:25 -0600 Subject: [PATCH 165/260] Additional mods for Mike K.'s PR review. --- scripts/exregional_get_verif_obs.sh | 2 +- ush/get_obs.py | 16 +++++++--------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index a07deecc25..d457a6b5d8 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -68,7 +68,7 @@ python3 -u ${USHdir}/get_obs.py \ --obs_day ${PDY}" print_info_msg " CALLING: ${cmd}" -${cmd} || print_err_msg_exit "Error calling ${script_bn}.py." +${cmd} || print_err_msg_exit "Error calling get_obs.py" # #----------------------------------------------------------------------- # diff --git a/ush/get_obs.py b/ush/get_obs.py index 666c6f1298..f88ab9a27e 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -53,12 +53,10 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): """ valid_obtypes = ['CCPA', 'NOHRSC', 'MRMS', 'NDAS'] - obtype_upper = obtype.upper() - if obtype_upper not in valid_obtypes: + if obtype not in valid_obtypes: msg = dedent(f""" - The specified observation type (after converting to upper case) is not - supported: - {obtype_upper = } + The specified observation type is not supported: + {obtype = } Valid observation types are: {valid_obtypes} """) @@ -91,19 +89,19 @@ def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): # through 6 of the day in the archive labeled with hour 6 while an # instantaneous obs type may put the obs files for hours 0 through 5 of # the day in the archive labeled with hour 6. - if obtype_upper in ['CCPA']: + if obtype in ['CCPA']: if hod == 0: arcv_hr = 24 else: arcv_hr = ceil(hod/arcv_intvl_hrs)*arcv_intvl_hrs - elif obtype_upper in ['NOHRSC']: + elif obtype in ['NOHRSC']: if hod == 0: arcv_hr = 24 else: arcv_hr = floor(hod/arcv_intvl_hrs)*arcv_intvl_hrs - elif obtype_upper in ['MRMS']: + elif obtype in ['MRMS']: arcv_hr = (floor(hod/arcv_intvl_hrs))*arcv_intvl_hrs - elif obtype_upper in ['NDAS']: + elif obtype in ['NDAS']: arcv_hr = (floor(hod/arcv_intvl_hrs) + 1)*arcv_intvl_hrs return arcv_hr From 105d1d839a33a927ecd60f41eeb41b38346b6ca8 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 23 Oct 2024 14:37:09 -0600 Subject: [PATCH 166/260] Change name of App variable VX_FIELDS to VX_FIELD_GROUPS to more correctly represent its contents since it contains a list of field groups (not just fields) to verify; fix comments and output messages related to this issue; rename some local variables related to this issue. --- parm/wflow/verify_det.yaml | 4 +- parm/wflow/verify_ens.yaml | 8 +- ...g.custom_ESGgrid_Great_Lakes_snow_8km.yaml | 2 +- ...g.MET_ensemble_verification_winter_wx.yaml | 2 +- ...c.init_00z_fcstlen_36hr.winter_wx.SRW.yaml | 2 +- ush/config_defaults.yaml | 25 +-- ush/set_cycle_and_obs_timeinfo.py | 145 +++++++++--------- ush/setup.py | 93 +++++------ ush/valid_param_vals.yaml | 2 +- 9 files changed, 144 insertions(+), 139 deletions(-) diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index a08fe69e3e..f416ce7974 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -94,7 +94,7 @@ metatask_GridStat_MRMS_all_mems: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' metatask_GridStat_MRMS_mem#mem#: var: - VAR: '{% for var in verification.VX_FIELDS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + VAR: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' task_run_MET_GridStat_vx_#VAR#_mem#mem#: <<: *default_task_verify_det command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX"' @@ -141,7 +141,7 @@ metatask_PointStat_NDAS_all_mems: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' metatask_PointStat_NDAS_mem#mem#: var: - VAR: '{% for var in verification.VX_FIELDS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + VAR: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' task_run_MET_PointStat_vx_#VAR#_mem#mem#: <<: *default_task_verify_det command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX"' diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index f92aef4c60..8aed2d02b3 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -92,7 +92,7 @@ metatask_GenEnsProd_EnsembleStat_NOHRSC: metatask_GenEnsProd_EnsembleStat_MRMS: var: - VAR: '{% for var in verification.VX_FIELDS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + VAR: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' task_run_MET_GenEnsProd_vx_#VAR#: &task_GenEnsProd_MRMS <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GENENSPROD_OR_ENSEMBLESTAT"' @@ -144,7 +144,7 @@ metatask_GenEnsProd_EnsembleStat_MRMS: metatask_GenEnsProd_EnsembleStat_NDAS: var: - VAR: '{% for var in verification.VX_FIELDS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + VAR: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' task_run_MET_GenEnsProd_vx_#VAR#: &task_GenEnsProd_NDAS <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GENENSPROD_OR_ENSEMBLESTAT"' @@ -248,7 +248,7 @@ metatask_GridStat_NOHRSC_ensmeanprob_all_accums: metatask_GridStat_MRMS_ensprob: var: - VAR: '{% for var in verification.VX_FIELDS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + VAR: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' task_run_MET_GridStat_vx_ensprob_#VAR#: <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX_ENSPROB"' @@ -275,7 +275,7 @@ metatask_PointStat_NDAS_ensmeanprob: statlc: mean prob metatask_PointStat_NDAS_ens#statlc#: var: - VAR: '{% for var in verification.VX_FIELDS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + VAR: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' task_run_MET_PointStat_vx_ens#statlc#_#VAR#: <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX_ENS#stat#"' diff --git a/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml b/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml index 0caffe5a46..a55cc5f91a 100644 --- a/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml +++ b/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml @@ -60,4 +60,4 @@ verification: NDAS_OBS_DIR: '{{ workflow.EXPTDIR }}/NDAS_obs' NOHRSC_OBS_DIR: '{{ workflow.EXPTDIR }}/NOHRSC_obs' VX_FCST_MODEL_NAME: Michigan_Ontario_snow_8km - VX_FIELDS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA", "ASNOW" ] + VX_FIELD_GROUPS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA", "ASNOW" ] diff --git a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml index 1845255f54..018b8abbc6 100644 --- a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml @@ -31,7 +31,7 @@ global: DO_ENSEMBLE: true NUM_ENS_MEMBERS: 10 verification: - VX_FIELDS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] + VX_FIELD_GROUPS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] OBS_NOHRSC_ASNOW_FN_TEMPLATE: '{%- set data_intvl_hrs = "%02d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} {{- "{valid?fmt=%Y%m%d}/sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml index aa4b731e3a..11eaf7b63c 100644 --- a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml +++ b/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml @@ -58,5 +58,5 @@ verification: REMOVE_RAW_OBS_NOHRSC: false # VX_FCST_MODEL_NAME: 'Michigan_Ontario_snow_8km' - VX_FIELDS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] + VX_FIELD_GROUPS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/SRW' diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index b216ccdd72..220c5e6a2b 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2473,28 +2473,29 @@ verification: # This is used in forming the names of the verification output files as # well as in the contents of those files. # - # VX_FIELDS: - # The fields or groups of fields on which to run verification. Because - # accumulated snow (ASNOW) is often not of interest in non-winter cases - # and because observation files for ASNOW are not available on NOAA - # HPSS for retrospective cases before March 2020, by default ASNOW is - # not included VX_FIELDS, but it may be added to this list in order to - # include the verification tasks for ASNOW in the workflow. + # VX_FIELD_GROUPS: + # The groups of fields (some of which may consist of a single field) on + # which to run verification. Because accumulated snow (ASNOW) is often + # not of interest in non-winter cases and because observation files for + # ASNOW are not available on NOAA HPSS for retrospective cases before + # March 2020, by default ASNOW is not included VX_FIELD_GROUPS, but it + # may be added to this list in order to include the verification tasks + # for ASNOW in the workflow. # # VX_APCP_ACCUMS_HRS: # The 2-digit accumulation periods (in units of hours) to consider for - # APCP (accumulated precipitation). If VX_FIELDS contains "APCP", then - # VX_APCP_ACCUMS_HRS must contain at least one element. If not, + # APCP (accumulated precipitation). If VX_FIELD_GROUPS contains "APCP", + # then VX_APCP_ACCUMS_HRS must contain at least one element. If not, # VX_APCP_ACCUMS_HRS will be ignored. # # VX_ASNOW_ACCUMS_HRS: # The 2-digit accumulation periods (in units of hours) to consider for - # ASNOW (accumulated snowfall). If VX_FIELDS contains "ASNOW", then - # VX_ASNOW_ACCUMS_HRS must contain at least one element. If not, + # ASNOW (accumulated snowfall). If VX_FIELD_GROUPS contains "ASNOW", + # then VX_ASNOW_ACCUMS_HRS must contain at least one element. If not, # VX_ASNOW_ACCUMS_HRS will be ignored. # VX_FCST_MODEL_NAME: '{{ nco.NET_default }}.{{ task_run_post.POST_OUTPUT_DOMAIN_NAME }}' - VX_FIELDS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] + VX_FIELD_GROUPS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] VX_APCP_ACCUMS_HRS: [ 1, 3, 6, 24 ] VX_ASNOW_ACCUMS_HRS: [ 6, 24 ] # diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index 9029731a94..2130ad99ea 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -73,13 +73,13 @@ def check_temporal_consistency_cumul_fields( the verification configuration dictionary that satisfies these constraints. The constraints are on the accumulation intervals associated with the - cumulative forecast fields (and corresponding observation type pairs) that + cumulative field groups (and the corresponding observation types) that are to be verified. The constraints on each such accumulation interval are as follows: - 1) The accumulation interval is less than or equal to the forecast length - (since otherwise, the forecast field cannot be accumulated over that - interval). + 1) The accumulation interval is less than or equal to the forecast length. + This ensures that the forecast(s) can accumulate the field(s) in the + field group over that interval. 2) The obs availability interval evenly divides the accumulation interval. This ensures that the obs can be added together to obtain accumulated @@ -90,11 +90,11 @@ def check_temporal_consistency_cumul_fields( 3) The forecast output interval evenly divides the accumulation interval. This ensures that the forecast output can be added together to obtain - accumulated values of the forecast field, e.g. if the forecast output - interval is 3 hours, the resulting 3-hourly APCP outputs from the forecast - can be added to obtain 6-hourly forecast APCP. Note that this also ensures - that the accumulation interval is greater than or equal to the forecast - output interval. + accumulated values of the fields in the field group. For example, if + the forecast output interval is 3 hours, the resulting 3-hourly APCP + outputs from the forecast can be added to obtain 6-hourly forecast APCP. + Note that this also ensures that the accumulation interval is greater + than or equal to the forecast output interval. 4) The hour-of-day at which the accumulated forecast values will be available are a subset of the ones at which the accumulated obs @@ -129,10 +129,11 @@ def check_temporal_consistency_cumul_fields( which various field/accumlation combinations are output and at which the corresponding obs type is also available. """ - # Set dictionary containing all cumulative fields (i.e. whether or not - # they are to be verified). The keys are the observation types and the - # values are the field names in the forecasts. - vx_cumul_fields_all = {"CCPA": "APCP", "NOHRSC": "ASNOW"} + + # Set dictionary containing all field groups that consist of cumulative + # fields (i.e. whether or not those field groups are to be verified). + # The keys are the observation types and the field groups. + obtype_to_fg_dict_cumul = {"CCPA": "APCP", "NOHRSC": "ASNOW"} # Convert from datetime.timedelta objects to integers. one_hour = timedelta(hours=1) @@ -143,15 +144,15 @@ def check_temporal_consistency_cumul_fields( # dictionary. fcst_obs_matched_times_all_cycles_cumul = dict() - for obtype, field_fcst in vx_cumul_fields_all.items(): + for obtype, fg in obtype_to_fg_dict_cumul.items(): # If the current cumulative field is not in the list of fields to be # verified, just skip to the next field. - if field_fcst not in vx_config["VX_FIELDS"]: + if fg not in vx_config["VX_FIELD_GROUPS"]: continue # Initialize a sub-dictionary in one of the dictionaries to be returned. - fcst_obs_matched_times_all_cycles_cumul.update({field_fcst: {}}) + fcst_obs_matched_times_all_cycles_cumul.update({fg: {}}) # # Get the availability interval of the current observation type from the @@ -186,7 +187,7 @@ def check_temporal_consistency_cumul_fields( # is not satisfied, remove that accumulation from the list of accumulations # for the current field. # - accum_intvls_array_name = "".join(["VX_", field_fcst, "_ACCUMS_HRS"]) + accum_intvls_array_name = "".join(["VX_", fg, "_ACCUMS_HRS"]) accum_intvls_hrs = vx_config[accum_intvls_array_name] # # Loop through the accumulation intervals and check the temporal constraints @@ -196,23 +197,23 @@ def check_temporal_consistency_cumul_fields( accum_hh = f"{accum_hrs:02d}" # Initialize a sub-sub-dictionary in one of the dictionaries to be returned. - fcst_obs_matched_times_all_cycles_cumul[field_fcst][accum_hh] = [] + fcst_obs_matched_times_all_cycles_cumul[fg][accum_hh] = [] # # Make sure that the accumulation interval is less than or equal to the # forecast length. # if accum_hrs > fcst_len_hrs: msg = dedent(f""" - The accumulation interval (accum_hrs) for the current cumulative forecast - field (field_fcst) and corresponding observation type (obtype) is greater - than the forecast length (fcst_len_hrs): - {field_fcst = } + The accumulation interval (accum_hrs) for the current cumulative field + group (fg) and corresponding observation type (obtype) is greater than + the forecast length (fcst_len_hrs): + {fg = } {obtype = } {accum_hrs = } {fcst_len_hrs = } - Thus, this forecast field cannot be accumulated over this interval. Will - remove this accumulation interval from the list of accumulation intervals - to verify for this field/obtype. + Thus, the forecast(s) cannot accumulate the field(s) in this field group + over this interval. Will remove this accumulation interval from the list + of accumulation intervals to verify for this field group/obtype. """) logging.info(msg) accum_intvls_hrs.remove(accum_hrs) @@ -224,18 +225,17 @@ def check_temporal_consistency_cumul_fields( rem_obs = accum_hrs % obs_avail_intvl_hrs if rem_obs != 0: msg = dedent(f""" - The accumulation interval (accum_hrs) for the current cumulative forecast - field (field_fcst) and corresponding observation type (obtype) is not - evenly divisible by the observation type's availability interval - (obs_avail_intvl_hrs): - {field_fcst = } + The accumulation interval (accum_hrs) for the current cumulative field + group (fg) and corresponding observation type (obtype) is not evenly + divisible by the observation type's availability interval (obs_avail_intvl_hrs): + {fg = } {obtype = } {accum_hrs = } {obs_avail_intvl_hrs = } accum_hrs % obs_avail_intvl_hrs = {rem_obs} Thus, this observation type cannot be accumulated over this interval. Will remove this accumulation interval from the list of accumulation - intervals to verify for this field/obtype. + intervals to verify for this field group/obtype. """) logging.info(msg) accum_intvls_hrs.remove(accum_hrs) @@ -247,17 +247,17 @@ def check_temporal_consistency_cumul_fields( rem_fcst = accum_hrs % fcst_output_intvl_hrs if rem_fcst != 0: msg = dedent(f""" - The accumulation interval (accum_hrs) for the current cumulative forecast - field (field_fcst) and corresponding observation type (obtype) is not - evenly divisible by the forecast output interval (fcst_output_intvl): - {field_fcst = } + The accumulation interval (accum_hrs) for the current cumulative field + group (fg) and corresponding observation type (obtype) is not evenly + divisible by the forecast output interval (fcst_output_intvl): + {fg = } {obtype = } {accum_hrs = } {fcst_output_intvl_hrs = } accum_hrs % fcst_output_intvl_hrs = {rem_fcst} - Thus, this forecast field cannot be accumulated over this interval. Will - remove this accumulation interval from the list of accumulation intervals - to verify for this field/obtype. + Thus, the forecast(s) cannot accumulate the field(s) in this field group + over this interval. Will remove this accumulation interval from the list + of accumulation intervals to verify for this field group/obtype. """) logging.info(msg) accum_intvls_hrs.remove(accum_hrs) @@ -298,26 +298,26 @@ def check_temporal_consistency_cumul_fields( # interval from the list of intervals to verify. if not set(fcst_output_hrs_of_day_str) <= set(obs_avail_hrs_of_day_str): msg = dedent(f""" - The accumulation interval (accum_hrs) for the current cumulative forecast - field (field_fcst) is such that the forecast will output the field on at - least one of hour-of-day on which the corresponding observation type is - not available: - {field_fcst = } + The accumulation interval (accum_hrs) for the current cumulative field + group (fg) is such that the forecast will output the field(s) in the + field group at at least one hour-of-day at which the corresponding + observation type is not available: + {fg = } {obtype = } {accum_hrs = } - The forecast output hours-of-day for this field/accumulation interval + The forecast output hours-of-day for this field group/accumulation interval combination are: {fcst_output_hrs_of_day_str = } The hours-of-day at which the obs are available are: {obs_avail_hrs_of_day_str = } Thus, at least some of the forecast output cannot be verified. Will remove - this accumulation interval from the list of accumulation intervals to - verify for this field/obtype. + this accumulation interval from the list of accumulation intervals to verify + for this field group/obtype. """) logging.info(msg) accum_intvls_hrs.remove(accum_hrs) else: - fcst_obs_matched_times_all_cycles_cumul[field_fcst][accum_hh] = fcst_output_times_all_cycles_str + fcst_obs_matched_times_all_cycles_cumul[fg][accum_hh] = fcst_output_times_all_cycles_str # # Update the value in the experiment configuration dictionary of the list # of accumulation intervals to verify for this cumulative field (since @@ -330,15 +330,15 @@ def check_temporal_consistency_cumul_fields( # verification configuration dictionary. # if not accum_intvls_hrs: - vx_config["VX_FIELDS"].remove(field_fcst) + vx_config["VX_FIELD_GROUPS"].remove(fg) msg = dedent(f""" The list of accumulation intervals (accum_intvls_hrs) for the current - cumulative field to verify (field_fcst) is empty: - {field_fcst = } + cumulative field group to verify (fg) is empty: + {fg = } {accum_intvls_hrs = } Removing this field from the list of fields to verify. The updated list is: - {vx_config["VX_FIELDS"]} + {vx_config["VX_FIELD_GROUPS"]} """) logging.info(msg) @@ -621,36 +621,35 @@ def get_obs_retrieve_times_by_day( = [datetime.strptime(obs_days_all_cycles[time_type][i], "%Y%m%d") for i in range(len(obs_days_all_cycles[time_type]))] - # Get list of forecast fields to be verified. - vx_fields = vx_config['VX_FIELDS'] + # Get list of field groups to be verified. + vx_field_groups = vx_config['VX_FIELD_GROUPS'] - # Define dictionary containing information about all fields that may - # possibly be verified. This information includes their temporal + # Define dictionary containing information about all field groups that + # can possibly be verified. This information includes their temporal # characteristics (cumulative vs. instantaneous) and the mapping between - # the observation type and the forecast field. - vx_field_info = {'cumul': [{'obtype': 'CCPA', 'fcst_fields': ['APCP']}, - {'obtype': 'NOHRSC', 'fcst_fields': ['ASNOW']}], - 'inst': [{'obtype': 'MRMS', 'fcst_fields': ['REFC', 'RETOP']}, - {'obtype': 'NDAS', 'fcst_fields': ['ADPSFC', 'ADPUPA']}] + # the observation type and the field group. + vx_field_info = {'cumul': [{'obtype': 'CCPA', 'field_groups': ['APCP']}, + {'obtype': 'NOHRSC', 'field_groups': ['ASNOW']}], + 'inst': [{'obtype': 'MRMS', 'field_groups': ['REFC', 'RETOP']}, + {'obtype': 'NDAS', 'field_groups': ['ADPSFC', 'ADPUPA']}] } - # Keep only those items in the dictionary vx_field_info defined above - # that have forecast fields that appear in the list of forecast fields to - # be verified. - for obs_time_type, obtypes_to_fcst_fields_dict_list in vx_field_info.copy().items(): - for obtypes_to_fcst_fields_dict in obtypes_to_fcst_fields_dict_list.copy(): - obtype = obtypes_to_fcst_fields_dict['obtype'] - fcst_fields = obtypes_to_fcst_fields_dict['fcst_fields'] - fcst_fields = [field for field in fcst_fields if field in vx_fields] - obtypes_to_fcst_fields_dict['fcst_fields'] = fcst_fields - if not fcst_fields: obtypes_to_fcst_fields_dict_list.remove(obtypes_to_fcst_fields_dict) - if not obtypes_to_fcst_fields_dict_list: vx_field_info.pop(obs_time_type) + # Keep only those items in the dictionary vx_field_info defined above that + # have field groups that appear in the list of field groups to verify. + for obs_time_type, obtypes_to_field_groups_dict_list in vx_field_info.copy().items(): + for obtypes_to_field_groups_dict in obtypes_to_field_groups_dict_list.copy(): + obtype = obtypes_to_field_groups_dict['obtype'] + field_groups = obtypes_to_field_groups_dict['field_groups'] + field_groups = [fg for fg in field_groups if fg in vx_field_groups] + obtypes_to_field_groups_dict['field_groups'] = field_groups + if not field_groups: obtypes_to_field_groups_dict_list.remove(obtypes_to_field_groups_dict) + if not obtypes_to_field_groups_dict_list: vx_field_info.pop(obs_time_type) # Create dictionary containing the temporal characteristics as keys and # a string list of obs types to verify as the values. obs_time_type_to_obtypes_dict = dict() - for obs_time_type, obtypes_to_fcst_fields_dict_list in vx_field_info.items(): - obtype_list = [the_dict['obtype'] for the_dict in obtypes_to_fcst_fields_dict_list] + for obs_time_type, obtypes_to_field_groups_dict_list in vx_field_info.items(): + obtype_list = [a_dict['obtype'] for a_dict in obtypes_to_field_groups_dict_list] obs_time_type_to_obtypes_dict[obs_time_type] = obtype_list # Initialize the return variable. diff --git a/ush/setup.py b/ush/setup.py index 703bc094fd..8a1b8e21c5 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -706,57 +706,62 @@ def _remove_tag(tasks, tag): # # ----------------------------------------------------------------------- # - vx_fields_all = {} - vx_metatasks_all = {} - - vx_fields_all["CCPA"] = ["APCP"] - vx_metatasks_all["CCPA"] = ["task_get_obs_ccpa", - "metatask_PcpCombine_obs_APCP_all_accums_CCPA", - "metatask_PcpCombine_fcst_APCP_all_accums_all_mems", - "metatask_GridStat_CCPA_all_accums_all_mems", - "metatask_GenEnsProd_EnsembleStat_CCPA", - "metatask_GridStat_CCPA_ensmeanprob_all_accums"] - - vx_fields_all["NOHRSC"] = ["ASNOW"] - vx_metatasks_all["NOHRSC"] = ["task_get_obs_nohrsc", - "metatask_PcpCombine_obs_ASNOW_all_accums_NOHRSC", - "metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems", - "metatask_GridStat_NOHRSC_all_accums_all_mems", - "metatask_GenEnsProd_EnsembleStat_NOHRSC", - "metatask_GridStat_NOHRSC_ensmeanprob_all_accums"] - - vx_fields_all["MRMS"] = ["REFC", "RETOP"] - vx_metatasks_all["MRMS"] = ["task_get_obs_mrms", - "metatask_GridStat_MRMS_all_mems", - "metatask_GenEnsProd_EnsembleStat_MRMS", - "metatask_GridStat_MRMS_ensprob"] - - vx_fields_all["NDAS"] = ["ADPSFC", "ADPUPA"] - vx_metatasks_all["NDAS"] = ["task_get_obs_ndas", - "task_run_MET_Pb2nc_obs_NDAS", - "metatask_PointStat_NDAS_all_mems", - "metatask_GenEnsProd_EnsembleStat_NDAS", - "metatask_PointStat_NDAS_ensmeanprob"] - - # If there are no vx fields specified, remove those tasks that are necessary - # for all observation types. - vx_fields = vx_config["VX_FIELDS"] - if not vx_fields: + vx_field_groups_all_by_obtype = {} + vx_metatasks_all_by_obtype = {} + + vx_field_groups_all_by_obtype["CCPA"] = ["APCP"] + vx_metatasks_all_by_obtype["CCPA"] \ + = ["task_get_obs_ccpa", + "metatask_PcpCombine_obs_APCP_all_accums_CCPA", + "metatask_PcpCombine_fcst_APCP_all_accums_all_mems", + "metatask_GridStat_CCPA_all_accums_all_mems", + "metatask_GenEnsProd_EnsembleStat_CCPA", + "metatask_GridStat_CCPA_ensmeanprob_all_accums"] + + vx_field_groups_all_by_obtype["NOHRSC"] = ["ASNOW"] + vx_metatasks_all_by_obtype["NOHRSC"] \ + = ["task_get_obs_nohrsc", + "metatask_PcpCombine_obs_ASNOW_all_accums_NOHRSC", + "metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems", + "metatask_GridStat_NOHRSC_all_accums_all_mems", + "metatask_GenEnsProd_EnsembleStat_NOHRSC", + "metatask_GridStat_NOHRSC_ensmeanprob_all_accums"] + + vx_field_groups_all_by_obtype["MRMS"] = ["REFC", "RETOP"] + vx_metatasks_all_by_obtype["MRMS"] \ + = ["task_get_obs_mrms", + "metatask_GridStat_MRMS_all_mems", + "metatask_GenEnsProd_EnsembleStat_MRMS", + "metatask_GridStat_MRMS_ensprob"] + + vx_field_groups_all_by_obtype["NDAS"] = ["ADPSFC", "ADPUPA"] + vx_metatasks_all_by_obtype["NDAS"] \ + = ["task_get_obs_ndas", + "task_run_MET_Pb2nc_obs_NDAS", + "metatask_PointStat_NDAS_all_mems", + "metatask_GenEnsProd_EnsembleStat_NDAS", + "metatask_PointStat_NDAS_ensmeanprob"] + + # If there are no field groups specified for verification, remove those + # tasks that are common to all observation types. + vx_field_groups = vx_config["VX_FIELD_GROUPS"] + if not vx_field_groups: metatask = "metatask_check_post_output_all_mems" rocoto_config['tasks'].pop(metatask) - # If for a given obstype no fields are specified, remove all vx metatasks - # for that obstype. - for obstype in vx_fields_all: - vx_fields_by_obstype = [field for field in vx_fields if field in vx_fields_all[obstype]] - if not vx_fields_by_obstype: - for metatask in vx_metatasks_all[obstype]: + # If for a given obs type none of its field groups are specified for + # verification, remove all vx metatasks for that obs type. + for obtype in vx_field_groups_all_by_obtype: + #vx_field_groups_crnt_obtype = [field for field in vx_fields if field in vx_fields_all[obtype]] + vx_field_groups_crnt_obtype = list(set(vx_field_groups) & set(vx_field_groups_all_by_obtype[obtype])) + if not vx_field_groups_crnt_obtype: + for metatask in vx_metatasks_all_by_obtype[obtype]: if metatask in rocoto_config['tasks']: logging.info(dedent( f""" - Removing verification [meta]task + Removing verification (meta)task "{metatask}" - from workflow since no fields belonging to observation type "{obstype}" + from workflow since no fields belonging to observation type "{obtype}" are specified for verification.""" )) rocoto_config['tasks'].pop(metatask) diff --git a/ush/valid_param_vals.yaml b/ush/valid_param_vals.yaml index 18bc4d453f..017404aa2e 100644 --- a/ush/valid_param_vals.yaml +++ b/ush/valid_param_vals.yaml @@ -76,6 +76,6 @@ valid_vals_DO_AQM_CHEM_LBCS: [True, False] valid_vals_DO_AQM_GEFS_LBCS: [True, False] valid_vals_DO_AQM_SAVE_AIRNOW_HIST: [True, False] valid_vals_COLDSTART: [True, False] -valid_vals_VX_FIELDS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] +valid_vals_VX_FIELD_GROUPS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] valid_vals_VX_APCP_ACCUMS_HRS: [ 1, 3, 6, 24 ] valid_vals_VX_ASNOW_ACCUMS_HRS: [ 6, 12, 18, 24 ] From 3817033cbdcd83484b101cdc97c446430d3a2202 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 24 Oct 2024 06:25:31 -0600 Subject: [PATCH 167/260] Rename and reformat the App variables containing METplus templates for obs files so that they are lists of strings (rather than just string scalars) that mimic python dictionaries, with pairs of elements specifying the field group(s) and corresponding file name template, respectively; fix comments accordingly; rename some local variables for clarity. --- ...onal_run_met_genensprod_or_ensemblestat.sh | 4 +- ...gional_run_met_gridstat_or_pointstat_vx.sh | 4 +- ...un_met_gridstat_or_pointstat_vx_ensmean.sh | 4 +- ...un_met_gridstat_or_pointstat_vx_ensprob.sh | 4 +- scripts/exregional_run_met_pb2nc_obs.sh | 4 +- scripts/exregional_run_met_pcpcombine.sh | 2 +- ...g.MET_ensemble_verification_winter_wx.yaml | 5 +- ush/config_defaults.yaml | 185 ++++++++++++------ ush/get_obs.py | 109 +++++------ 9 files changed, 180 insertions(+), 141 deletions(-) diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 475417ee53..40cb510f3e 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -146,12 +146,12 @@ if [ "${grid_or_point}" = "grid" ]; then ;; "REFC") OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_REFC_FN_TEMPLATE}" + OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_FN_TEMPLATES[1]}" FCST_INPUT_DIR="${vx_fcst_input_basedir}" ;; "RETOP") OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_RETOP_FN_TEMPLATE}" + OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_FN_TEMPLATES[3]}" FCST_INPUT_DIR="${vx_fcst_input_basedir}" ;; esac diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index a6130ba50d..3cb3658588 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -185,13 +185,13 @@ if [ "${grid_or_point}" = "grid" ]; then ;; "REFC") OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_REFC_FN_TEMPLATE}" + OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_FN_TEMPLATES[1]}" FCST_INPUT_DIR="${vx_fcst_input_basedir}" FCST_INPUT_FN_TEMPLATE="${FCST_SUBDIR_TEMPLATE:+${FCST_SUBDIR_TEMPLATE}/}${FCST_FN_TEMPLATE}" ;; "RETOP") OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_RETOP_FN_TEMPLATE}" + OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_FN_TEMPLATES[3]}" FCST_INPUT_DIR="${vx_fcst_input_basedir}" FCST_INPUT_FN_TEMPLATE="${FCST_SUBDIR_TEMPLATE:+${FCST_SUBDIR_TEMPLATE}/}${FCST_FN_TEMPLATE}" ;; diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index 75332e4929..4d6ae4fedb 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -132,11 +132,11 @@ if [ "${grid_or_point}" = "grid" ]; then ;; "REFC") OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_REFC_FN_TEMPLATE}" + OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_FN_TEMPLATES[1]}" ;; "RETOP") OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_RETOP_FN_TEMPLATE}" + OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_FN_TEMPLATES[3]}" ;; esac FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/metprd/GenEnsProd" diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 382bd71ac8..32a987e96c 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -132,11 +132,11 @@ if [ "${grid_or_point}" = "grid" ]; then ;; "REFC") OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_REFC_FN_TEMPLATE}" + OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_FN_TEMPLATES[1]}" ;; "RETOP") OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_RETOP_FN_TEMPLATE}" + OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_FN_TEMPLATES[3]}" ;; esac diff --git a/scripts/exregional_run_met_pb2nc_obs.sh b/scripts/exregional_run_met_pb2nc_obs.sh index e93387ed0a..a7b4e691a8 100755 --- a/scripts/exregional_run_met_pb2nc_obs.sh +++ b/scripts/exregional_run_met_pb2nc_obs.sh @@ -136,7 +136,7 @@ set_vx_params \ vx_output_basedir=$( eval echo "${VX_OUTPUT_BASEDIR}" ) OBS_INPUT_DIR="${OBS_DIR}" -OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE} ) +OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_NDAS_FN_TEMPLATES[1]} ) OUTPUT_BASE="${vx_output_basedir}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_obs" @@ -163,7 +163,7 @@ for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do eval_METplus_timestr_tmpl \ init_time="${yyyymmdd_task}00" \ fhr="${lhr}" \ - METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE}" \ + METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_FN_TEMPLATES[1]}" \ outvarname_evaluated_timestr="fp" if [[ -f "${fp}" ]]; then diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 590ceb43ef..ee06ef8df7 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -195,7 +195,7 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then elif [ "${FCST_OR_OBS}" = "OBS" ]; then OBS_INPUT_DIR="${OBS_DIR}" - fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE}) + fn_template=$(eval echo \${OBS_${OBTYPE}_FN_TEMPLATES[1]}) OBS_INPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_obs_or_null}" diff --git a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml index 018b8abbc6..fc6c9f56af 100644 --- a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml @@ -32,6 +32,7 @@ global: NUM_ENS_MEMBERS: 10 verification: VX_FIELD_GROUPS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] - OBS_NOHRSC_ASNOW_FN_TEMPLATE: '{%- set data_intvl_hrs = "%02d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} - {{- "{valid?fmt=%Y%m%d}/sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' + OBS_NOHRSC_FN_TEMPLATES: [ 'ASNOW', + '{%- set data_intvl_hrs = "%02d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} + {{- "{valid?fmt=%Y%m%d}/sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' ] diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 220c5e6a2b..a4867261dd 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2372,48 +2372,108 @@ verification: MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" # - # OBS_[CCPA_APCP|NOHRSC_ASNOW|MRMS_[REFC|RETOP]|NDAS_ADPSFCandADPUPA]_FN_TEMPLATE: - # File name templates for various obs type and vx field group combinations. + # OBS_[CCPA|NOHRSC|MRMS|NDAS]_FN_TEMPLATES: + # File name templates for various obs types. These are meant to be used + # in METplus configuration files and thus contain METplus time formatting + # strings. Each of these variables is a python list containing pairs of + # values. The first element of each pair specifies the verification field + # group(s) for which the file name template will be needed, and the second + # element is the file name template itself, which may include a leading + # relative directory. (Here, by "verification field group" we mean a + # group of fields that is verified together in the workflow.) For example, + # for the CCPA obs type, the variable name is OBS_CCPA_FN_TEMPLATES, and + # its default value contains only one pair of values given by + # + # [ 'APCP', + # '{%- set obs_avail_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} + # {{- "{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z." ~ obs_avail_intvl_hrs ~ "h.hrap.conus.gb2" }}' ] + # + # Thus, if CCPA_OBS_AVAIL_INTVL_HRS is set to 1 above (i.e. the CCPA obs + # are available every 1 hour), then for a valid time of 2024042903, the + # obs file name (including a relative path) to look for and, if necessary, + # create, will be + # + # 20240429/ccpa.t03z.01h.hrap.conus.gb2 + # + # This file will be used in the verification of fields under the APCP + # field group (which consist of accumulated precipitation for various + # accumulation intervals). # # Notes: # - # * These are relative to the obs base directories + # * The file name templates are relative to the obs base directories given + # in the variables + # # [CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR - # defined above. Thus, the full template to the obs files is given, e.g. - # for CCPA obs, by {CCPA_OBS_DIR}/{OBS_CCPA_APCP_FN_TEMPLATE}. - # - # * These may represent file names only, or they may include relative paths - # before the file names. - # - # * These templates must contain full information about the year, month, - # day, and hour by including METplus time strings that serve as templates - # for this information. Some of this information may be in the relative - # directory portion and the rest in the file name, or there may be no - # relative directory portion and all of it may be in the file name, but - # all four pieces of timing information must be present somewhere in - # this template as METplus time strings. Otherwise, obs files created - # by the get_obs tasks for different days might overwrite each other. - # - # * If one or more of the obs files specified by this full path do not - # exist on disk, all the files will be created by first retrieving "raw" - # versions of them from a data store (e.g. NOAA's HPSS) and then placing - # these raw files in the locations specified by this full path template. + # + # defined above. Thus, the template for the full path to the obs files + # is given, e.g. for CCPA obs, by + # + # {CCPA_OBS_DIR}/{OBS_CCPA_FN_TEMPLATES[1]}, + # + # where the [1] indicates the second element of the list OBS_CCPA_FN_TEMPLATES. + # + # * The file name templates may represent file names only, or they may + # include leading relative directories. + # + # * The default values of these variables for the CCPA, NOHRSC, and NDAS + # obs types contain only one pair of values (because these obs types + # contain only one set of file that we use in the verification) while + # the default value for the MRMS obs type contains two pairs of values, + # one for obs files that contain composite reflectivity data and another + # for the ones that contain echo top data (simply because the MRMS obs + # do not group these two fields together in one set of file as do, for + # example, the NDAS obs). + # + # * Each template must contain full information about the year, month, + # day, and hour by including METplus time formatting strings that serve + # as templates for this information. Some of this information (e.g. + # the year, month, and day) may be in the relative directory portion + # of the template and the rest (e.g. the hour) in the file name, or + # there may be no relative directory portion and all of this information + # may be in the file name, but all four pieces of timing information + # must be present somewhere in each template as METplus time formatting + # strings. If not, obs files created by the get_obs tasks for different + # days might overwrite each other. + # + # * The workflow creates a get_obs task for each obs type that is needed + # in the verification and for each day on which that obs type is needed + # at at least some hours. That get_obs task first checks whether all + # the necessary obs files for that day already exist at the locations + # specified by the full path template(s) (which is formed by combining + # the base directory and the file name template). If for a given day + # one or more of these obs files do not exist on disk, the get_obs task + # will retrieve "raw" versions of these files from a data store (e.g. + # NOAA's HPSS) and will place them in a temporary "raw" directory. It + # will then move or copy these raw files to the locations specified by + # the full path template(s). # # * The raw obs files, i.e. the obs files as they are named and arranged - # in the data stores, may be different than the file path/name specified - # in these variables. The list of templates for raw files to search - # for in the data stores is given in the data retrieval configuration - # file at parm/data_locations.yml. Once retrieved, these raw files are - # renamed and relocated on disk to the locations specified by - # {..._OBS_DIR}/{..._FN_TEMPLATE}. - # - OBS_CCPA_APCP_FN_TEMPLATE: '{%- set data_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} - {{- "{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z." ~ data_intvl_hrs ~ "h.hrap.conus.gb2" }}' - OBS_NOHRSC_ASNOW_FN_TEMPLATE: '{%- set data_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} - {{- "sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' - OBS_MRMS_REFC_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' - OBS_MRMS_RETOP_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' - OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE: 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' + # in the data stores and retrieved to the raw directories, may be + # arranged differently and/or have names that are different from the + # ones specified in the file name templates. If so, they are renamed + # while being moved or copied from the raw directories to the locations + # specified by the full path template(s). (The lists of templates for + # searching for and retrieving files from the data stores is different + # than the METplus templates described here; the former are given in + # the data retrieval configuration file at parm/data_locations.yml.) + # + # * When the ex-scripts for the various vx tasks are converted from bash + # to python scripts, these variables should be converted from python + # lists to python dictionaries, where the first element of each pair + # becomes the key and the second becomes the value. This currently + # cannot be done due to limitations in the workflow on converting + # python dictionaries to bash variables. + # + OBS_CCPA_FN_TEMPLATES: [ 'APCP', + '{%- set obs_avail_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} + {{- "{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z." ~ obs_avail_intvl_hrs ~ "h.hrap.conus.gb2" }}' ] + OBS_NOHRSC_FN_TEMPLATES: [ 'ASNOW', + '{%- set obs_avail_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} + {{- "sfav2_CONUS_" ~ obs_avail_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' ] + OBS_MRMS_FN_TEMPLATES: [ 'REFC', '{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2', + 'RETOP', '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' ] + OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' ] # # Time interval (in hours) at which various types of obs are available on # NOAA's HPSS. @@ -2429,13 +2489,13 @@ verification: NDAS_OBS_AVAIL_INTVL_HRS: 1 # # REMOVE_RAW_OBS_DIRS_[CCPA|NOHRSC|MRMS|NDAS]: - # Boolean flag specifying whether to remove the "raw" observation - # directories after pulling the specified type of obs (CCPA, NOHRSC, - # MRMS, or NOHRSC). The raw directories are the ones in which the - # observation files are placed immediately after pulling them from - # a data store (e.g. NOAA's HPSS) but before performing any processing - # on them (e.g. renaming the files or reorganizing their directory - # structure). + # Flag specifying whether to remove the "raw" observation directories + # after retrieving the specified type of obs (CCPA, NOHRSC, MRMS, or + # NOHRSC) from a data store (e.g. NOAA's HPSS). The raw directories + # are the ones in which the observation files are placed immediately + # after pulling them from the data store but before performing any + # processing on them such as renaming the files and/or reorganizing + # their directory structure. # REMOVE_RAW_OBS_CCPA: true REMOVE_RAW_OBS_NOHRSC: true @@ -2443,30 +2503,29 @@ verification: REMOVE_RAW_OBS_NDAS: true # # OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: - # Template used to specify the names of the output NetCDF observation - # files generated by the worfklow verification tasks that call the METplus - # PcpCombine tool on CCPA observations. (These files will contain obs - # APCP, both for 1 hour and for > 1 hour accumulation periods, in NetCDF - # format.) + # METplus file name template used to specify the names of the NetCDF + # files generated by the worfklow verification tasks that call METplus's + # PcpCombine tool on CCPA observations. These files will contain observed + # accumulated precip in NetCDF format for various accumulation intervals. # # OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT: - # Template used to specify the names of the output NetCDF observation - # files generated by the worfklow verification tasks that call the METplus - # PcpCombine tool on NOHRSC observations. (These files will contain obs - # APCP, both for 1 hour and for > 1 hour accumulation periods, in NetCDF - # format.) + # METplus file name template used to specify the names of the NetCDF + # files generated by the worfklow verification tasks that call METplus's + # PcpCombine tool on NOHRSC observations. These files will contain + # observed observed accumulated snow for various accumulaton intervals. # # OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: - # Template used to specify the names of the output NetCDF observation - # files generated by the worfklow verification tasks that call the - # METplus Pb2nc tool on NDAS observations. (These files will contain - # obs ADPSFC or ADPUPA fields in NetCDF format.) - # - OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{%- set data_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} - {{- "ccpa.t{valid?fmt=%H}z." ~ data_intvl_hrs ~ "h.hrap.conus.gb2_a${ACCUM_HH}h.nc" }}' - OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{%- set data_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} - {{- "sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2_a${ACCUM_HH}h.nc" }}' - OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: '${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE}.nc' + # METplus file name template used to specify the names of the NetCDF + # files generated by the worfklow verification tasks that call METplus's + # Pb2nc tool on NDAS observations. These files will contain the observed + # ADPSFC or ADPUPA fields in NetCDF format (instead of NDAS's native + # prepbufr format). + # + OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{%- set obs_avail_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} + {{- "ccpa.t{valid?fmt=%H}z." ~ obs_avail_intvl_hrs ~ "h.hrap.conus.gb2_a${ACCUM_HH}h.nc" }}' + OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{%- set obs_avail_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} + {{- "sfav2_CONUS_" ~ obs_avail_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2_a${ACCUM_HH}h.nc" }}' + OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: '${OBS_NDAS_FN_TEMPLATES[1]}.nc' # # VX_FCST_MODEL_NAME: # String that specifies a descriptive name for the model being verified. diff --git a/ush/get_obs.py b/ush/get_obs.py index f88ab9a27e..4079b15cea 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -322,53 +322,30 @@ def get_obs(config, obtype, yyyymmdd_task): # Get the base directory for the observations. obs_dir = vx_config[f'{obtype}_OBS_DIR'] - # For each observation type, set the group of fields contained in those - # observation files that we need for verification. Each group of fields - # is one that is verified together in the workflow. We assume there is - # a separate set of obs files for each such field group in the observations, - # and in the code below we loop over these sets of files as necessary. - # There are several scenarios to consider: + # Get from the verification configuration dictionary the list of METplus + # file name template(s) corresponding to the obs type. + obs_fn_templates = vx_config[f'OBS_{obtype}_FN_TEMPLATES'] + + # Note that the list obs_fn_templates consists of pairs of elements such + # that the first element of the pair represents the verification field + # group(s) for which an obs file name template will be needed and the + # second element is the template itself. For convenience, convert this + # information to a dictionary in which the field groups are the keys and + # the templates are the values. # - # * An obs type consists of only one set of files containing only one - # field. - # This is the case for CCPA and NOHRSC obs. CCPA obs consist only one - # set of files that contain APCP data, and NOHRSC obs consist of only - # one set of files that contain ASNOW data. - # - # * An obs type consists of more than one set of files, with each file - # containing a different field. - # This is the case for MRMS obs. These consist of two sets of files. - # The first set contains REFC data, and the second contains RETOP data. - # - # * An obs type consists of only one set of files, but each file contains - # multiple groups of fields needed for verification. - # This is the case for NDAS obs. These consist of a single set of files, - # but each file contains both the ADPSFC fields (like 2-m temperature) - # and ADPUPA fields (like 500-mb temperature) that are verified separately - # in the workflow tasks and thus are considered separate field groups. - # - # Other obs type and field group scenarios are also possible, but we do - # not describe them since they are not applicable to any of the obs types - # considered here. - if obtype == 'CCPA': - field_groups_in_obs = ['APCP'] - elif obtype == 'NOHRSC': - field_groups_in_obs = ['ASNOW'] - elif obtype == 'MRMS': - field_groups_in_obs = ['REFC', 'RETOP'] - elif obtype == 'NDAS': - field_groups_in_obs = ['ADPSFCandADPUPA'] - num_field_groups = len(field_groups_in_obs) - - # For each field group in the observations, get the METplus file name - # template for the observation files. Then combine these with the base - # directory to get the METplus template for the full path on disk to - # the processed obs files. If obs files do not already exist at these - # locations, they will be retrieved from HPSS and placed at these locations. - fp_proc_templates = [] - for fg in field_groups_in_obs: - fn_proc_template = vx_config[f'OBS_{obtype}_{fg}_FN_TEMPLATE'] - fp_proc_templates.append(os.path.join(obs_dir, fn_proc_template)) + # Note: + # Once the ex-scripts for the vx tasks are converted from bash to python, + # the lists in the SRW App's configuration file containing the METplus + # obs file name template(s) (from which the variable obs_fn_templates + # was obtained above) can be converted to python dictionaries. Then the + # list-to-dictionary conversion step here will no longer be needed. + obs_fn_templates_by_fg = dict() + for i in range(0, len(obs_fn_templates), 2): + obs_fn_templates_by_fg[obs_fn_templates[i]] = obs_fn_templates[i+1] + + # For convenience, get the list of verification field groups for which + # the various obs file templates will be used. + field_groups_in_obs = obs_fn_templates_by_fg.keys() # #----------------------------------------------------------------------- # @@ -388,27 +365,25 @@ def get_obs(config, obtype, yyyymmdd_task): # For MRMS obs, set field-dependent parameters needed in forming grib2 # file names. - fields_in_filenames = [] - levels_in_filenames = [] + mrms_fields_in_obs_filenames = [] + mrms_levels_in_obs_filenames = [] if obtype == 'MRMS': - valid_mrms_field_groups = ['REFC', 'RETOP'] for fg in field_groups_in_obs: - if fg not in valid_mrms_field_groups: + if fg == 'REFC': + mrms_fields_in_obs_filenames.append('MergedReflectivityQCComposite') + mrms_levels_in_obs_filenames.append('00.50') + elif fg == 'RETOP': + mrms_fields_in_obs_filenames.append('EchoTop') + mrms_levels_in_obs_filenames.append('18_00.50') + else: msg = dedent(f""" - Invalid field group specified for obs type: + Field and level names have not been specified for this {obtype} field + group: {obtype = } {fg = } - Valid field group are: - {valid_mrms_field_groups} """) logging.error(msg) raise ValueError(msg) - if fg == 'REFC': - fields_in_filenames.append('MergedReflectivityQCComposite') - levels_in_filenames.append('00.50') - elif fg == 'RETOP': - fields_in_filenames.append('EchoTop') - levels_in_filenames.append('18_00.50') # CCPA files for 1-hour accumulation have incorrect metadata in the files # under the "00" directory from 20180718 to 20210504. Set these starting @@ -477,7 +452,8 @@ def get_obs(config, obtype, yyyymmdd_task): # files, i.e. the files as they are named and arranged within the archive # (tar) files on HPSS. all_fp_proc_dict = {} - for fg, fp_proc_templ in zip(field_groups_in_obs, fp_proc_templates): + for fg, fn_proc_tmpl in obs_fn_templates_by_fg.items(): + fp_proc_tmpl = os.path.join(obs_dir, fn_proc_tmpl) all_fp_proc_dict[fg] = [] for yyyymmddhh in obs_retrieve_times_crnt_day: # Set the lead hour, i.e. the number of hours from the beginning of the @@ -489,7 +465,7 @@ def get_obs(config, obtype, yyyymmdd_task): cmd = '; '.join(['export USHdir=' + ushdir, 'export yyyymmdd_task=' + yyyymmdd_task_str, 'export lhr=' + str(lhr), - 'export METplus_timestr_tmpl=' + fp_proc_templ, + 'export METplus_timestr_tmpl=' + fp_proc_tmpl, os.path.join(ushdir, 'run_eval_METplus_timestr_tmpl.sh')]) result = subprocess.run(cmd, shell=True, capture_output=True, text=True) fp_proc = result.stdout.strip() @@ -527,7 +503,10 @@ def get_obs(config, obtype, yyyymmdd_task): # If the number of obs files that already exist on disk is equal to the # number of obs files needed, then there is no need to retrieve any files. - num_files_needed = len(obs_retrieve_times_crnt_day)*num_field_groups + # The number of obs files needed (i.e. that need to be staged) is equal + # to the number of times in the current day that obs are needed times the + # number of sets of files that the current obs type contains. + num_files_needed = len(obs_retrieve_times_crnt_day)*len(obs_fn_templates_by_fg) if num_existing_files == num_files_needed: msg = dedent(f""" @@ -545,7 +524,7 @@ def get_obs(config, obtype, yyyymmdd_task): # from which files will be retrieved. arcv_hrs = [hr for hr in range(arcv_hr_start, arcv_hr_end+arcv_intvl_hrs, arcv_intvl_hrs)] msg = dedent(f""" - At least some obs files needed needed for the current day (yyyymmdd_task) + At least some obs files needed for the current day (yyyymmdd_task) do not exist on disk: {yyyymmdd_task = } The number of obs files needed for the current day is: @@ -817,7 +796,7 @@ def get_obs(config, obtype, yyyymmdd_task): '--valid_time', yyyymmddhh_str, \ '--source', basedir_raw, \ '--outdir', os.path.join(basedir_raw, 'topofhour'), \ - '--product', fields_in_filenames[i], \ + '--product', mrms_fields_in_obs_filenames[i], \ '--no-add_vdate_subdir']) result = subprocess.run(cmd, shell=True, capture_output=True, text=True) rc = result.returncode @@ -836,7 +815,7 @@ def get_obs(config, obtype, yyyymmdd_task): fn_raw = 'sfav2_CONUS_' + accum_obs_formatted + 'h_' + yyyymmddhh_str + '_grid184.grb2' elif obtype == 'MRMS': hr = yyyymmddhh.hour - fn_raw = fields_in_filenames[i] + '_' + levels_in_filenames[i] \ + fn_raw = mrms_fields_in_obs_filenames[i] + '_' + mrms_levels_in_obs_filenames[i] \ + '_' + yyyymmdd_task_str + '-' + f'{hr:02d}' + '0000.grib2' fn_raw = os.path.join('topofhour', fn_raw) elif obtype == 'NDAS': From c1bfb8f1cf2ee710d590b11fe0e07f29eded1c70 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 24 Oct 2024 08:11:53 -0600 Subject: [PATCH 168/260] Fix comments. --- ush/config_defaults.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index a4867261dd..6a2e31a94e 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2558,12 +2558,12 @@ verification: VX_APCP_ACCUMS_HRS: [ 1, 3, 6, 24 ] VX_ASNOW_ACCUMS_HRS: [ 6, 24 ] # - # Set the forecast output interval to use for verification purposes. - # If the forecasts to be verified are being run in the SRW (i.e. they - # are not staged from another forecast model), then this should be set - # set to the SRW's forecast output interval, but such a variable is - # currently not available in this configuration file. Instead, for - # now we set it to a default value of 1 hour. + # VX_FCST_OUTPUT_INTVL_HRS: + # The forecast output interval to use for verification purposes. The + # default value is currently 1 hour, but if/when a variable is created + # in this configuration file that specifies the forecast output interval + # for native SRW forecasts, then the default value of VX_FCST_OUTPUT_INTVL_HRS + # should be set to that. # VX_FCST_OUTPUT_INTVL_HRS: 1 # From 95a372c084a8ac7734634212b5ee75150b831919 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 24 Oct 2024 12:33:00 -0600 Subject: [PATCH 169/260] Add a check to make sure obs base directories are distinct for the various types of obs. --- ush/setup.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/ush/setup.py b/ush/setup.py index 8a1b8e21c5..a4bcab3b32 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -768,6 +768,36 @@ def _remove_tag(tasks, tag): # # ----------------------------------------------------------------------- # + # If there are at least some field groups to verify, then make sure that + # the base directories in which retrieved obs files will be placed are + # distinct for the different obs types. + # + # ----------------------------------------------------------------------- + # + if vx_field_groups: + obtypes_all = ['CCPA', 'NOHRSC', 'MRMS', 'NDAS'] + obs_basedir_var_names = [f'{obtype}_OBS_DIR' for obtype in obtypes_all] + obs_basedirs_dict = {key: vx_config[key] for key in obs_basedir_var_names} + obs_basedirs_orig = list(obs_basedirs_dict.values()) + obs_basedirs_uniq = list(set(obs_basedirs_orig)) + if len(obs_basedirs_orig) != len(obs_basedirs_uniq): + msg1 = dedent(f""" + The base directories for the obs files must be distinct, but at least two + are identical:""") + msg2 = '' + for obs_basedir_var_name, obs_dir in obs_basedirs_dict.items(): + msg2 = msg2 + dedent(f""" + {obs_basedir_var_name} = {obs_dir}""") + msg3 = dedent(f""" + Modify these in the SRW App's user configuration file to make them distinct + and rerun. + """) + msg = msg1 + ' '.join(msg2.splitlines(True)) + msg3 + logging.error(msg) + raise ValueError(msg) + # + # ----------------------------------------------------------------------- + # # The "cycled_from_second" cycledef in the default workflow configuration # file (default_workflow.yaml) requires the starting date of the second # cycle. That is difficult to calculate in the yaml file itself because From e47cfe60aabe33b255cec636ba421084371b0f87 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sat, 26 Oct 2024 18:46:33 -0600 Subject: [PATCH 170/260] (1) Reorganize verification section in config_defaults.yaml so that parameters are grouped together in a way that is more natural; (2) update and improve description of variables in the verification section of config_defaults.yaml; (3) update documentation in ConfigWorkflow.rst to reflect the updates in config_defaults.yaml. --- .../CustomizingTheWorkflow/ConfigWorkflow.rst | 404 ++++++++++++++---- ush/config_defaults.yaml | 382 +++++++++-------- 2 files changed, 526 insertions(+), 260 deletions(-) diff --git a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst index 14fccdd5e5..df9a0dfa22 100644 --- a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst +++ b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst @@ -293,6 +293,9 @@ Directory Parameters ``EXPTDIR``: (Default: ``'{{ [workflow.EXPT_BASEDIR, workflow.EXPT_SUBDIR]|path_join }}'``) The full path to the experiment directory. By default, this value will point to ``"${EXPT_BASEDIR}/${EXPT_SUBDIR}"``, but the user can define it differently in the configuration file if desired. +``WFLOW_FLAG_FILES_DIR``: (Default: ``'{{ [workflow.EXPTDIR, "wflow_flag_files"]|path_join }}'``) + Directory in which flag files marking completion of various workflow tasks can be placed. + Pre-Processing File Separator Parameters -------------------------------------------- @@ -1582,26 +1585,21 @@ Pressure Tendency Diagnostic ``PRINT_DIFF_PGR``: (Default: false) Option to turn on/off the pressure tendency diagnostic. -Verification Parameters -========================== +Verification (VX) Parameters +================================= Non-default parameters for verification tasks are set in the ``verification:`` section of the ``config.yaml`` file. -General Verification Parameters ---------------------------------- - -``METPLUS_VERBOSITY_LEVEL``: (Default: ``2``) - Logging verbosity level used by METplus verification tools. Valid values: 0 to 5, with 0 quiet and 5 loud. - -METplus Parameters ----------------------- - -:ref:`METplus ` is a scientific verification framework that spans a wide range of temporal and spatial scales. Many of the METplus parameters are described below, but additional documentation for the METplus components is available on the `METplus website `__. +.. note:: + The verification tasks in the SRW App are based on the :ref:`METplus ` + verification software developed at the Developmental Testbed Center (:ref:`DTC`). + :ref:`METplus ` is a scientific verification framework that spans a wide range of temporal and spatial scales. + Full documentation for METplus is available on the `METplus website `__. .. _METParamNote: .. note:: - Where a date field is required: + Where a date field is required: * ``YYYY`` refers to the 4-digit valid year * ``MM`` refers to the 2-digit valid month * ``DD`` refers to the 2-digit valid day of the month @@ -1609,111 +1607,345 @@ METplus Parameters * ``mm`` refers to the 2-digit valid minutes of the hour * ``SS`` refers to the two-digit valid seconds of the hour -``CCPA_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/ccpa/proc"``) - User-specified location of the directory where :term:`CCPA` hourly precipitation files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in file ``scripts/exregional_get_verif_obs.sh`` for more details about files and directory structure, as well as important caveats about errors in the metadata and file names. - - .. attention:: - Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. -``NOHRSC_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/nohrsc/proc"``) - User-specified location of top-level directory where NOHRSC 6- and 24-hour snowfall accumulation files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in file scripts/exregional_get_verif_obs.sh for more details about files and directory structure - - .. attention:: - Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. +General VX Parameters +--------------------------------- - .. note:: - Due to limited availability of NOHRSC observation data on NOAA :term:`HPSS` and the likelihood that snowfall accumulation verification will not be desired outside of winter cases, this verification option is currently not present in the workflow by default. In order to use it, the verification environment variable ``VX_FIELDS`` should be updated to include ``ASNOW``. This will allow the related workflow tasks to be run. +``VX_FIELD_GROUPS``: (Default: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA" ]) + The groups of fields (some of which may consist of only a single field) on which + to run verification. -``MRMS_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/mrms/proc"``) - User-specified location of the directory where :term:`MRMS` composite reflectivity and echo top files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in the ``scripts/exregional_get_verif_obs.sh`` for more details about files and directory structure. - - .. attention:: - Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. + Since accumulated snowfall (``ASNOW``) is often not of interest in non-winter + cases and because observation files for ``ASNOW`` are not available on NOAA + HPSS for retrospective cases before March 2020, by default ``ASNOW`` is not + included ``VX_FIELD_GROUPS``, but it may be added to this list in order to + include the verification tasks for ``ASNOW`` in the workflow. Valid values: + ``"APCP"`` | ``"ASNOW"`` | ``"REFC"`` | ``"RETOP"`` | ``"ADPSFC"`` | ``"ADPUPA"`` -``NDAS_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/ndas/proc"``) - User-specified location of top-level directory where :term:`NDAS` prepbufr files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in file ``scripts/exregional_get_verif_obs.sh`` for more details about files and directory structure. - - .. attention:: - Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. +``VX_APCP_ACCUMS_HRS``: (Default: [ 1, 3, 6, 24 ]) + The accumulation intervals (in hours) to include in the verification of + accumulated precipitation (APCP). If ``VX_FIELD_GROUPS`` contains ``"APCP"``, + then ``VX_APCP_ACCUMS_HRS`` must contain at least one element. Otherwise, + ``VX_APCP_ACCUMS_HRS`` will be ignored. Valid values: ``1`` | ``3`` | ``6`` | ``24`` -Templates for Observation Files ---------------------------------- +``VX_ASNOW_ACCUMS_HRS``: (Default: [ 6, 24 ]) + The accumulation intervals (in hours) to include in the verification of + accumulated snowfall (ASNOW). If ``VX_FIELD_GROUPS`` contains ``"ASNOW"``, + then ``VX_ASNOW_ACCUMS_HRS`` must contain at least one element. Otherwise, + ``VX_ASNOW_ACCUMS_HRS`` will be ignored. Valid values: ``6`` | ``12`` | ``18`` | ``24`` + +``VX_CONFIG_[DET|ENS]_FN``: (Default: ``vx_config_[det|ens].yaml``) + Names of configuration files for deterministic and ensemble verification + that specify the field groups, field names, levels, and (if applicable) + thresholds for which to run verification. These are relative to the + directory ``METPLUS_CONF`` in which the METplus config templates are + located. They may include leading relative paths before the file + names, e.g. ``some_dir/another_dir/vx_config_det.yaml``. -This section includes template variables for :term:`CCPA`, :term:`MRMS`, :term:`NOHRSC`, and :term:`NDAS` observation files. +``VX_OUTPUT_BASEDIR``: (Default: ``'{% if user.RUN_ENVIR == "nco" %}$COMOUT/metout{% else %}{{ workflow.EXPTDIR }}{% endif %}'``) + Template for base (i.e. top-level) directory in which METplus will place + its output. -``OBS_CCPA_APCP_FN_TEMPLATE``: (Default: ``'{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2'``) - File name template for CCPA accumulated precipitation (APCP) observations. This template is used by the workflow tasks that call the METplus *PcpCombine* tool on CCPA obs to find the input observation files containing 1-hour APCP and then generate NetCDF files containing either 1-hour or greater than 1-hour APCP. -``OBS_NOHRSC_ASNOW_FN_TEMPLATE``: (Default: ``'{valid?fmt=%Y%m%d}/sfav2_CONUS_${ACCUM_HH}h_{valid?fmt=%Y%m%d%H}_grid184.grb2'``) - File name template for NOHRSC snow observations. +METplus-Specific Parameters +----------------------------------- -``OBS_MRMS_REFC_FN_TEMPLATE``: (Default: ``'{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2'``) - File name template for :term:`MRMS` reflectivity observations. +``METPLUS_VERBOSITY_LEVEL``: (Default: ``2``) + Logging verbosity level used by METplus verification tools. Valid values: 0 to 5, with 0 quiet and 5 loudest. -``OBS_MRMS_RETOP_FN_TEMPLATE``: (Default: ``'{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2'``) - File name template for MRMS echo top observations. -``OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE``: (Default: ``'prepbufr.ndas.{valid?fmt=%Y%m%d%H}'``) - File name template for :term:`NDAS` surface and upper air observations. This template is used by the workflow tasks that call the METplus *Pb2nc* tool on NDAS obs to find the input observation files containing ADP surface (ADPSFC) or ADP upper air (ADPUPA) fields and then generate NetCDF versions of these files. +VX Parameters for Observations +------------------------------------- -``OBS_NDAS_SFCorUPA_FN_METPROC_TEMPLATE``: (Default: ``'${OBS_NDAS_SFCorUPA_FN_TEMPLATE}.nc'``) - File name template for NDAS surface and upper air observations after processing by MET's *pb2nc* tool (to change format to NetCDF). +.. note:: + The observation types that the SRW App can currently retrieve (if necessary) + and use in verification are: + * CCPA (Climatology-Calibrated Precipitation Analysis) + * NOHRSC (National Operational Hydrologic Remote Sensing Center) + * MRMS (Multi-Radar Multi-Sensor) + * NDAS (NAM Data Assimilation System) + The script ``ush/get_obs.py`` contains further details on the files and + directory structure of each obs type. -``OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT``: (Default: ``'${OBS_CCPA_APCP_FN_TEMPLATE}_a${ACCUM_HH}h.nc'``) - Template used to specify the names of the output NetCDF observation files generated by the workflow verification tasks that call the METplus *PcpCombine* tool on CCPA observations. (These files will contain observations of accumulated precipitation [APCP], both for 1 hour and for > 1 hour accumulation periods, in NetCDF format.) +``[CCPA|NOHRSC|MRMS|NDAS]_OBS_AVAIL_INTVL_HRS``: (Defaults: [1|6|1|1]) + Time interval (in hours) at which the various types of obs are available + on NOAA's HPSS. + + Note that MRMS files are in fact available every few minutes, but here + we set the obs availability interval to 1 hour because currently that + is the shortest output interval for forecasts, i.e. the forecasts cannot + (yet) support sub-hourly output. + +``[CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/[ccpa|nohrsc|mrms|ndas]"``) + Base directory in which CCPA, NOHRSC, MRMS, or NDAS obs files needed by + the verification tasks are located. If the files do not exist, they + will be retrieved and placed under this directory. Note that: + + * If the obs files need to be retrieved (e.g. from NOAA's HPSS), because + they are not already staged on disk, then the user must have write + permission to this directory. Otherwise, the ``get_obs`` workflow + tasks that attempt to create these files will fail. + + * CCPA obs contain errors in the metadata for a certain range of dates + that need to be corrected during obs retrieval. This is described + in more detail in the script ``ush/get_obs.py``. + +``OBS_[CCPA|NOHRSC|MRMS|NDAS]_FN_TEMPLATES``: + **Defaults:** + + ``OBS_CCPA_FN_TEMPLATES``: + .. code-block:: console + + [ 'APCP', + '{%- set obs_avail_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} + {{- "{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z." ~ obs_avail_intvl_hrs ~ "h.hrap.conus.gb2" }}' ] + + ``OBS_NOHRSC_FN_TEMPLATES``: + .. code-block:: console + + [ 'ASNOW', + '{%- set obs_avail_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} + {{- "sfav2_CONUS_" ~ obs_avail_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' ] + + ``OBS_MRMS_FN_TEMPLATES``: + .. code-block:: console + + [ 'REFC', '{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2', + 'RETOP', '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' ] + + ``OBS_NDAS_FN_TEMPLATES``: + .. code-block:: console + + [ 'ADPSFCandADPUPA', 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' ] + + File name templates for various obs types. These are meant to be used + in METplus configuration files and thus contain METplus time formatting + strings. Each of these variables is a python list containing pairs of + values. The first element of each pair specifies the verification field + group(s) for which the file name template will be needed, and the second + element is the file name template itself, which may include a leading + relative directory. (Here, by "verification field group", we mean a + group of fields that is verified together in the workflow; see the + description of the variable ``VX_FIELD_GROUPS``.) For example, for CCPA + obs, the variable name is ``OBS_CCPA_FN_TEMPLATES``. From the default value + of this variable given above, we see that if ``CCPA_OBS_AVAIL_INTVL_HRS`` + is set to 1 (i.e. the CCPA obs are assumed to be available every hour) + and the valid time is 2024042903, then the obs file (including a relative + path) to look for and, if necessary, create is + + ``20240429/ccpa.t03z.01h.hrap.conus.gb2`` + + This file will be used in the verification of fields under the APCP + field group (which consist of accumulated precipitation for the + accumulation intervals specified in ``VX_APCP_ACCUMS_HRS``). + + Note that: + + * The file name templates are relative to the obs base directories given in + the variables + + ``[CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR`` + + defined above. Thus, the template for the full path to the obs files + is given, e.g. for CCPA obs, by + + .. code-block:: console + + CCPA_OBS_DIR/OBS_CCPA_FN_TEMPLATES[1] + + where the ``[1]`` indicates the second element of the list ``OBS_CCPA_FN_TEMPLATES``. + + * The file name templates may represent file names only, or they may + include leading relative directories. + + * The default values of these variables for the CCPA, NOHRSC, and NDAS + obs types contain only one pair of values (because these obs types + contain only one set of files that we use in the verification) while + the default value for the MRMS obs type contains two pairs of values, + one for the set of files that contains composite reflectivity data + and another for the set that contains echo top data. This is simply + because the MRMS obs type does not group all its fields together into + one set of files as does, for example, the NDAS obs type. + + * Each file name template must contain full information about the year, + month, day, and hour by including METplus time formatting strings for + this information. Some of this information (e.g. the year, month, + and day) may be in the relative directory portion of the template and + the rest (e.g. the hour) in the file name, or there may be no relative + directory portion and all of this information may be in the file name, + but all four pieces of timing information must be present somewhere in + each template as METplus time formatting strings. If not, obs files + created by the ``get_obs`` tasks for different days might overwrite each + other. + + * The workflow generation scripts create a ``get_obs`` task for each obs + type that is needed in the verification and for each day on which that + obs type is needed at at least some hours. That ``get_obs`` task first + checks whether all the necessary obs files for that day already exist + at the locations specified by the full path template(s) (which are + obtained by combining the base directories [CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR + with the file name template(s)). If for a given day one or more of + these obs files do not exist on disk, the ``get_obs`` task will retrieve + "raw" versions of these files from a data store (e.g. NOAA's HPSS) + and will place them in a temporary "raw" directory. It will then + move or copy these raw files to the locations specified by the full + path template(s). + + * The raw obs files, i.e. the obs files as they are named and arranged + in the data stores and retrieved and placed in the raw directories, + may be arranged differently and/or have names that are different from + the ones specified in the file name templates. If so, they are renamed + while being moved or copied from the raw directories to the locations + specified by the full path template(s). (The lists of templates for + searching for and retrieving files from the data stores is different + than the METplus templates described here; the former are given in + the data retrieval configuration file at ``parm/data_locations.yml``.) + + * When the ex-scripts for the various vx tasks are converted from bash + to python scripts, these variables should be converted from python + lists to python dictionaries, where the first element of each pair + becomes the key and the second becomes the value. This currently + cannot be done due to limitations in the workflow on converting + python dictionaries to bash variables. + +``REMOVE_RAW_OBS_DIRS_[CCPA|NOHRSC|MRMS|NDAS]``: (Defaults: [True|True|True|True]) + Flag specifying whether to remove the "raw" observation directories + after retrieving the specified type of obs (CCPA, NOHRSC, MRMS, or + NOHRSC) from a data store (e.g. NOAA's HPSS). The raw directories + are the ones in which the observation files are placed immediately + after pulling them from the data store but before performing any + processing on them such as renaming the files and/or reorganizing + their directory structure. + +``OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT``: + **Default:** -``OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT``: (Default: ``'${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}.nc'``) - Template used to specify the names of the output NetCDF observation files generated by the workflow verification tasks that call the METplus Pb2nc tool on NDAS observations. (These files will contain obs ADPSFC or ADPUPA fields in NetCDF format.) + .. code-block:: console + {%- set obs_avail_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} + {{- "ccpa.t{valid?fmt=%H}z." ~ obs_avail_intvl_hrs ~ "h.hrap.conus.gb2_a${ACCUM_HH}h.nc" }} + METplus template for the names of the NetCDF files generated by the + worfklow verification tasks that call METplus's PcpCombine tool on + CCPA observations. These files will contain observed accumulated + precipitation in NetCDF format for various accumulation intervals. -VX Forecast Model Name ------------------------- +``OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT``: + **Default:** -``VX_FCST_MODEL_NAME``: (Default: ``'{{ nco.NET_default }}.{{ task_run_post.POST_OUTPUT_DOMAIN_NAME }}'``) - String that specifies a descriptive name for the model being verified. This is used in forming the names of the verification output files as well as in the contents of those files. + .. code-block:: console -``VX_FIELDS``: (Default: [ "APCP", "REFC", "RETOP", "SFC", "UPA" ]) - The fields or groups of fields for which verification tasks will run. Because ``ASNOW`` is often not of interest in cases outside of winter, and because observation files are not located for retrospective cases on NOAA HPSS before March 2020, ``ASNOW`` is not included by default. ``"ASNOW"`` may be added to this list in order to include the related verification tasks in the workflow. Valid values: ``"APCP"`` | ``"REFC"`` | ``"RETOP"`` | ``"SFC"`` | ``"UPA"`` | ``"ASNOW"`` - -``VX_APCP_ACCUMS_HRS``: (Default: [ 1, 3, 6, 24 ]) - The accumulation periods (in hours) to consider for accumulated precipitation (APCP). If ``VX_FIELDS`` contains ``"APCP"``, then ``VX_APCP_ACCUMS_HRS`` must contain at least one element. If ``VX_FIELDS`` does not contain ``"APCP"``, ``VX_APCP_ACCUMS_HRS`` will be ignored. Valid values: ``1`` | ``3`` | ``6`` | ``24`` + {%- set obs_avail_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} + {{- "sfav2_CONUS_" ~ obs_avail_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2_a${ACCUM_HH}h.nc" }} -``VX_ASNOW_ACCUMS_HRS``: (Default: [ 6, 24 ]) - The accumulation periods (in hours) to consider for ``ASNOW`` (accumulated snowfall). If ``VX_FIELDS`` contains ``"ASNOW"``, then ``VX_ASNOW_ACCUMS_HRS`` must contain at least one element. If ``VX_FIELDS`` does not contain ``"ASNOW"``, ``VX_ASNOW_ACCUMS_HRS`` will be ignored. Valid values: ``6`` | ``24`` + METplus template for the names of the NetCDF files generated by the + worfklow verification tasks that call METplus's PcpCombine tool on + NOHRSC observations. These files will contain observed accumulated + snowfall for various accumulaton intervals. -Verification (VX) Directories ------------------------------- +``OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT``: (Default: ``'${OBS_NDAS_FN_TEMPLATES[1]}.nc'``) + METplus template for the names of the NetCDF files generated by the + worfklow verification tasks that call METplus's Pb2nc tool on NDAS + observations. These files will contain the observed ADPSFC or ADPUPA + fields in NetCDF format (instead of NDAS's native prepbufr format). -``VX_FCST_INPUT_BASEDIR``: (Default: ``'{% if user.RUN_ENVIR == "nco" %}$COMOUT/../..{% else %}{{ workflow.EXPTDIR }}{% endif %}'``) - Template for top-level directory containing forecast (but not obs) files that will be used as input into METplus for verification. +``NUM_MISSING_OBS_FILES_MAX``: (Default: 2) + For verification tasks that need observational data, this specifies + the maximum number of observation files that may be missing. If more + than this number are missing, the verification task will error out. + This is a crude way of checking that there are enough obs to conduct + verification (crude because this number should probably depend on the + field being verified, the time interval between observations, the + length of the forecast, etc; an alternative may be to specify the + maximum allowed fraction of obs files that can be missing). -``VX_OUTPUT_BASEDIR``: (Default: ``'{% if user.RUN_ENVIR == "nco" %}$COMOUT/metout{% else %}{{ workflow.EXPTDIR }}{% endif %}'``) - Template for top-level directory in which METplus will place its output. -``VX_NDIGITS_ENSMEM_NAMES``: 3 - Number of digits in the ensemble member names. This is a configurable variable to allow users to change its value (e.g., to go from "mem004" to "mem04") when using staged forecast files that do not use the same number of digits as the SRW App. +VX Parameters for Forecasts +---------------------------------- -Verification (VX) File Name and Path Templates ------------------------------------------------- +``VX_FCST_MODEL_NAME``: (Default: ``'{{ nco.NET_default }}.{{ task_run_post.POST_OUTPUT_DOMAIN_NAME }}'``) + String that specifies a descriptive name for the model being verified. + This is used in forming the names of the verification output files and + is also included in the contents of those files. + +``VX_FCST_OUTPUT_INTVL_HRS``: (Default: 1) + The forecast output interval (in hours) to assume for verification + purposes. + + .. note:: + If/when a variable is created in this configuration file that specifies + the forecast output interval for native SRW forecasts, it should be + used as the default value of this variable. + +``VX_FCST_INPUT_BASEDIR``: (Default: ``'{% if user.RUN_ENVIR == "nco" %}$COMOUT/../..{% else %}{{ workflow.EXPTDIR }}{% endif %}'``) + METplus template for the name of the base (i.e. top-level) directory + containing the forecast files to use as inputs to the verification + tasks. -This section contains file name and path templates used in the verification (VX) tasks. +``FCST_SUBDIR_TEMPLATE``: + **Default:** + + .. code-block:: console + + {%- if user.RUN_ENVIR == "nco" %} + {{- "${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}" }} + {%- else %} + {{- "{init?fmt=%Y%m%d%H?shift=-${time_lag}}" }} + {%- if global.DO_ENSEMBLE %} + {{- "/${ensmem_name}" }} + {%- endif %} + {{- "/postprd" }} + {%- endif %} + + METplus template for the name of the subdirectory containing forecast + files to use as inputs to the verification tasks. + +``FCST_FN_TEMPLATE``: + **Default:** -``FCST_SUBDIR_TEMPLATE``: (Default: ``'{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}/postprd{% endif %}'``) - A template for the subdirectory containing input forecast files for VX tasks. + .. code-block:: console + + {{- "${NET_default}.t{init?fmt=%H?shift=-${time_lag}}z" }} + {%- if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %} + {{- ".${ensmem_name}" }} + {%- endif %} + {{- ".prslev.f{lead?fmt=%HHH?shift=${time_lag}}.${POST_OUTPUT_DOMAIN_NAME}.grib2" }} -``FCST_FN_TEMPLATE``: (Default: ``'${NET_default}.t{init?fmt=%H?shift=-${time_lag}}z{% if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %}.${ensmem_name}{% endif %}.prslev.f{lead?fmt=%HHH?shift=${time_lag}}.${POST_OUTPUT_DOMAIN_NAME}.grib2'``) - A template for the forecast file names used as input to verification tasks. + METplus template for the names of the forecast files to use as inputs + to the verification tasks. -``FCST_FN_METPROC_TEMPLATE``: (Default: ``'${NET_default}.t{init?fmt=%H}z{% if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %}.${ensmem_name}{% endif %}.prslev.f{lead?fmt=%HHH}.${POST_OUTPUT_DOMAIN_NAME}_${VAR}_a${ACCUM_HH}h.nc'``) - A template for how to name the forecast files for accumulated precipitation (APCP) with greater than 1-hour accumulation (i.e., 3-, 6-, and 24-hour accumulations) after processing by ``PcpCombine``. +``FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT``: + **Default:** -``NUM_MISSING_OBS_FILES_MAX``: (Default: 2) - For verification tasks that need observational data, this specifies the maximum number of observation files that may be missing. If more than this number are missing, the verification task will error out. - Note that this is a crude way of checking that there are enough observations to conduct verification since this number should probably depend on the field being verified, the time interval between observations, the length of the forecast, etc. An alternative may be to specify the maximum allowed fraction of observation files that can be missing (i.e., the number missing divided by the number that are expected to exist). + .. code-block:: console + + {{- "${NET_default}.t{init?fmt=%H}z" }} + {%- if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %} + {{- ".${ensmem_name}" }} + {%- endif %} + {{- ".prslev.f{lead?fmt=%HHH}.${POST_OUTPUT_DOMAIN_NAME}_${VAR}_a${ACCUM_HH}h.nc" }} + + METplus template for the names of the NetCDF files generated by the + worfklow verification tasks that call METplus's PcpCombine tool on + forecast output. These files will contain forecast accumulated + precipitation in NetCDF format for various accumulation intervals. + +``VX_NDIGITS_ENSMEM_NAMES``: (Default: 3) + Number of digits to assume/use in the forecast ensemble member identifier + string used in directory and file names and other instances in which the + ensemble member needs to be identified. For example, if this is set to + 3, the identifier for ensemble member 4 will be "mem004", while if it's + set to 2, the identifier will be "mem04". This is useful when verifying + staged forecast files from a forecasting model/system other than the + SRW that uses a different number of digits in the ensemble member + identifier string. ``NUM_MISSING_FCST_FILES_MAX``: (Default: 0) - For verification tasks that need forecast data, this specifies the maximum number of post-processed forecast files that may be missing. If more than this number are missing, the verification task will not be run. + For verification tasks that need forecast data, this specifies the + maximum number of post-processed forecast files that may be missing. + If more than this number are missing, the verification task will exit + with an error. + Coupled AQM Configuration Parameters ===================================== diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 6a2e31a94e..3957c3c0db 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2339,33 +2339,110 @@ global: # verification (vx) parameters #----------------------------- verification: + # + # General Verification Parameters + # ------------------------------- + # + # VX_FIELD_GROUPS: + # The groups of fields (some of which may consist of only a single field) + # on which to run verification. + # + # Since accumulated snowfall (ASNOW) is often not of interest in non-winter + # cases and because observation files for ASNOW are not available on NOAA + # HPSS for retrospective cases before March 2020, by default ASNOW is not + # included VX_FIELD_GROUPS, but it may be added to this list in order to + # include the verification tasks for ASNOW in the workflow. + # + VX_FIELD_GROUPS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] + # + # VX_APCP_ACCUMS_HRS: + # The accumulation intervals (in hours) to include in the verification of + # accumulated precipitation (APCP). If VX_FIELD_GROUPS contains "APCP", + # then VX_APCP_ACCUMS_HRS must contain at least one element. Otherwise, + # VX_APCP_ACCUMS_HRS will be ignored. + # + VX_APCP_ACCUMS_HRS: [ 1, 3, 6, 24 ] + # + # VX_ASNOW_ACCUMS_HRS: + # The accumulation intervals (in hours) to include in the verification of + # accumulated snowfall (ASNOW). If VX_FIELD_GROUPS contains "ASNOW", + # then VX_ASNOW_ACCUMS_HRS must contain at least one element. Otherwise, + # VX_ASNOW_ACCUMS_HRS will be ignored. + # + VX_ASNOW_ACCUMS_HRS: [ 6, 24 ] + # + # VX_CONFIG_[DET|ENS]_FN: + # Names of configuration files for deterministic and ensemble verification + # that specify the field groups, field names, levels, and (if applicable) + # thresholds for which to run verification. These are relative to the + # directory METPLUS_CONF in which the METplus config templates are + # located. They may include leading relative paths before the file + # names, e.g. ``some_dir/another_dir/vx_config_det.yaml``. + # + VX_CONFIG_DET_FN: 'vx_config_det.yaml' + VX_CONFIG_ENS_FN: 'vx_config_ens.yaml' + # + # VX_OUTPUT_BASEDIR: + # Template for base (i.e. top-level) directory in which METplus will place + # its output. + # + VX_OUTPUT_BASEDIR: '{% if user.RUN_ENVIR == "nco" %}$COMOUT/metout{% else %}{{ workflow.EXPTDIR }}{% endif %}' + # + # METplus-Specific Parameters + # ------------------------------- # # METPLUS_VERBOSITY_LEVEL: # Logging verbosity level used by METplus verification tools. 0 to 5, - # with 0 quiet and 5 loud. + # with 0 quiet and 5 loudest. # METPLUS_VERBOSITY_LEVEL: 2 # + # Observation-Specific Parameters + # ------------------------------- + # + # Note: + # The observation types that the SRW App can currently retrieve (if + # necessary) and use in verification are: + # + # * CCPA (Climatology-Calibrated Precipitation Analysis) + # * NOHRSC (National Operational Hydrologic Remote Sensing Center) + # * MRMS (Multi-Radar Multi-Sensor) + # * NDAS (NAM Data Assimilation System) + # + # The script ush/get_obs.py contains further details on the files and + # directory structure of each obs type. + # + + # + # [CCPA|NOHRSC|MRMS|NDAS]_OBS_AVAIL_INTVL_HRS: + # Time interval (in hours) at which various types of obs are available on + # NOAA's HPSS. + # + # Note that MRMS files are in fact available every few minutes, but here + # we set the obs availability interval to 1 hour because currently that + # is the shortest output interval for forecasts, i.e. the forecasts cannot + # (yet) support sub-hourly output. + # + CCPA_OBS_AVAIL_INTVL_HRS: 1 + NOHRSC_OBS_AVAIL_INTVL_HRS: 6 + MRMS_OBS_AVAIL_INTVL_HRS: 1 + NDAS_OBS_AVAIL_INTVL_HRS: 1 + # # [CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR: # Base directory in which CCPA, NOHRSC, MRMS, or NDAS obs files needed by # the verification tasks are located. If the files do not exist, they # will be retrieved and placed under this directory. # - # Notes: - # - # * If the obs files need to be retrieved (e.g. from NOAA's HPSS), then - # the user must have write permission to this directory. Otherwise, - # the get_obs tasks that attempt to create these files will fail. - # - # * Do not set two or more of these directories to the same location. - # Otherwise, unexpected results and data loss may occur. + # Note that: # - # * The script ush/get_obs.py contains further details on the files and - # directory structure of each obs type. + # * If the obs files need to be retrieved (e.g. from NOAA's HPSS), because + # they are not already staged on disk, then the user must have write + # permission to this directory. Otherwise, the "get_obs" workflow tasks + # that attempt to create these files will fail. # # * CCPA obs contain errors in the metadata for a certain range of dates # that need to be corrected during obs retrieval. This is described - # in more detail in ush/get_obs.py. + # in more detail in the script ush/get_obs.py. # CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" @@ -2379,37 +2456,32 @@ verification: # values. The first element of each pair specifies the verification field # group(s) for which the file name template will be needed, and the second # element is the file name template itself, which may include a leading - # relative directory. (Here, by "verification field group" we mean a - # group of fields that is verified together in the workflow.) For example, - # for the CCPA obs type, the variable name is OBS_CCPA_FN_TEMPLATES, and - # its default value contains only one pair of values given by - # - # [ 'APCP', - # '{%- set obs_avail_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} - # {{- "{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z." ~ obs_avail_intvl_hrs ~ "h.hrap.conus.gb2" }}' ] - # - # Thus, if CCPA_OBS_AVAIL_INTVL_HRS is set to 1 above (i.e. the CCPA obs - # are available every 1 hour), then for a valid time of 2024042903, the - # obs file name (including a relative path) to look for and, if necessary, - # create, will be + # relative directory. (Here, by "verification field group", we mean a + # group of fields that is verified together in the workflow; see the + # description of the variable VX_FIELD_GROUPS.) For example, for CCPA + # obs, the variable name is OBS_CCPA_FN_TEMPLATES. From the default value + # of this variable given above, we see that if CCPA_OBS_AVAIL_INTVL_HRS + # is set to 1 (i.e. the CCPA obs are assumed to be available every hour) + # and the valid time is 2024042903, then the obs file (including a relative + # path) to look for and, if necessary, create is # # 20240429/ccpa.t03z.01h.hrap.conus.gb2 # # This file will be used in the verification of fields under the APCP - # field group (which consist of accumulated precipitation for various - # accumulation intervals). + # field group (which consist of accumulated precipitation for the + # accumulation intervals specified in VX_APCP_ACCUMS_HRS). # # Notes: # - # * The file name templates are relative to the obs base directories given - # in the variables + # * The file name templates are relative to the base directories given in + # the variables # # [CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR # # defined above. Thus, the template for the full path to the obs files # is given, e.g. for CCPA obs, by # - # {CCPA_OBS_DIR}/{OBS_CCPA_FN_TEMPLATES[1]}, + # {CCPA_OBS_DIR}/{OBS_CCPA_FN_TEMPLATES[1]} # # where the [1] indicates the second element of the list OBS_CCPA_FN_TEMPLATES. # @@ -2418,40 +2490,41 @@ verification: # # * The default values of these variables for the CCPA, NOHRSC, and NDAS # obs types contain only one pair of values (because these obs types - # contain only one set of file that we use in the verification) while + # contain only one set of files that we use in the verification) while # the default value for the MRMS obs type contains two pairs of values, - # one for obs files that contain composite reflectivity data and another - # for the ones that contain echo top data (simply because the MRMS obs - # do not group these two fields together in one set of file as do, for - # example, the NDAS obs). - # - # * Each template must contain full information about the year, month, - # day, and hour by including METplus time formatting strings that serve - # as templates for this information. Some of this information (e.g. - # the year, month, and day) may be in the relative directory portion - # of the template and the rest (e.g. the hour) in the file name, or - # there may be no relative directory portion and all of this information - # may be in the file name, but all four pieces of timing information - # must be present somewhere in each template as METplus time formatting - # strings. If not, obs files created by the get_obs tasks for different - # days might overwrite each other. - # - # * The workflow creates a get_obs task for each obs type that is needed - # in the verification and for each day on which that obs type is needed - # at at least some hours. That get_obs task first checks whether all - # the necessary obs files for that day already exist at the locations - # specified by the full path template(s) (which is formed by combining - # the base directory and the file name template). If for a given day - # one or more of these obs files do not exist on disk, the get_obs task - # will retrieve "raw" versions of these files from a data store (e.g. - # NOAA's HPSS) and will place them in a temporary "raw" directory. It - # will then move or copy these raw files to the locations specified by - # the full path template(s). + # one for the set of files that contains composite reflectivity data + # and another for the set that contains echo top data. This is simply + # because the MRMS obs type does not group these two fields together + # one set of files as does, for example, the NDAS obs type. + # + # * Each file name template must contain full information about the year, + # month, day, and hour by including METplus time formatting strings for + # this information. Some of this information (e.g. the year, month, + # and day) may be in the relative directory portion of the template and + # the rest (e.g. the hour) in the file name, or there may be no relative + # directory portion and all of this information may be in the file name, + # but all four pieces of timing information must be present somewhere in + # each template as METplus time formatting strings. If not, obs files + # created by the "get_obs" tasks for different days might overwrite each + # other. + # + # * The workflow generation scripts create a "get_obs" task for each obs + # type that is needed in the verification and for each day on which that + # obs type is needed at at least some hours. That "get_obs" task first + # checks whether all the necessary obs files for that day already exist + # at the locations specified by the full path template(s) (which are + # obtained by combining the base directories [CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR + # with the file name template(s)). If for a given day one or more of + # these obs files do not exist on disk, the "get_obs" task will retrieve + # "raw" versions of these files from a data store (e.g. NOAA's HPSS) + # and will place them in a temporary "raw" directory. It will then + # move or copy these raw files to the locations specified by the full + # path template(s). # # * The raw obs files, i.e. the obs files as they are named and arranged - # in the data stores and retrieved to the raw directories, may be - # arranged differently and/or have names that are different from the - # ones specified in the file name templates. If so, they are renamed + # in the data stores and retrieved and placed in the raw directories, + # may be arranged differently and/or have names that are different from + # the ones specified in the file name templates. If so, they are renamed # while being moved or copied from the raw directories to the locations # specified by the full path template(s). (The lists of templates for # searching for and retrieving files from the data stores is different @@ -2475,19 +2548,6 @@ verification: 'RETOP', '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' ] OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' ] # - # Time interval (in hours) at which various types of obs are available on - # NOAA's HPSS. - # - # Note that MRMS files are in fact available every few minutes, but here - # we set the obs availability interval to 1 hour because currently that - # is the shortest output interval for the forecast, i.e. the forecast - # cannot (yet) support sub-hourly output. - # - CCPA_OBS_AVAIL_INTVL_HRS: 1 - NOHRSC_OBS_AVAIL_INTVL_HRS: 6 - MRMS_OBS_AVAIL_INTVL_HRS: 1 - NDAS_OBS_AVAIL_INTVL_HRS: 1 - # # REMOVE_RAW_OBS_DIRS_[CCPA|NOHRSC|MRMS|NDAS]: # Flag specifying whether to remove the "raw" observation directories # after retrieving the specified type of obs (CCPA, NOHRSC, MRMS, or @@ -2497,111 +2557,80 @@ verification: # processing on them such as renaming the files and/or reorganizing # their directory structure. # - REMOVE_RAW_OBS_CCPA: true - REMOVE_RAW_OBS_NOHRSC: true - REMOVE_RAW_OBS_MRMS: true - REMOVE_RAW_OBS_NDAS: true + REMOVE_RAW_OBS_CCPA: True + REMOVE_RAW_OBS_NOHRSC: True + REMOVE_RAW_OBS_MRMS: True + REMOVE_RAW_OBS_NDAS: True # # OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: - # METplus file name template used to specify the names of the NetCDF - # files generated by the worfklow verification tasks that call METplus's - # PcpCombine tool on CCPA observations. These files will contain observed - # accumulated precip in NetCDF format for various accumulation intervals. - # - # OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT: - # METplus file name template used to specify the names of the NetCDF - # files generated by the worfklow verification tasks that call METplus's - # PcpCombine tool on NOHRSC observations. These files will contain - # observed observed accumulated snow for various accumulaton intervals. - # - # OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: - # METplus file name template used to specify the names of the NetCDF - # files generated by the worfklow verification tasks that call METplus's - # Pb2nc tool on NDAS observations. These files will contain the observed - # ADPSFC or ADPUPA fields in NetCDF format (instead of NDAS's native - # prepbufr format). + # METplus template for the names of the NetCDF files generated by the + # worfklow verification tasks that call METplus's PcpCombine tool on + # CCPA observations. These files will contain observed accumulated + # precipitation in NetCDF format for various accumulation intervals. # OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{%- set obs_avail_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} {{- "ccpa.t{valid?fmt=%H}z." ~ obs_avail_intvl_hrs ~ "h.hrap.conus.gb2_a${ACCUM_HH}h.nc" }}' + # + # OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT: + # METplus template for the names of the NetCDF files generated by the + # worfklow verification tasks that call METplus's PcpCombine tool on + # NOHRSC observations. These files will contain observed accumulated + # snowfall for various accumulaton intervals. + # OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{%- set obs_avail_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} {{- "sfav2_CONUS_" ~ obs_avail_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2_a${ACCUM_HH}h.nc" }}' + # + # OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: + # METplus template for the names of the NetCDF files generated by the + # worfklow verification tasks that call METplus's Pb2nc tool on NDAS + # observations. These files will contain the observed ADPSFC or ADPUPA + # fields in NetCDF format (instead of NDAS's native prepbufr format). + # OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: '${OBS_NDAS_FN_TEMPLATES[1]}.nc' # - # VX_FCST_MODEL_NAME: - # String that specifies a descriptive name for the model being verified. - # This is used in forming the names of the verification output files as - # well as in the contents of those files. + # NUM_MISSING_OBS_FILES_MAX: + # For verification tasks that need observational data, this specifies + # the maximum number of observation files that may be missing. If more + # than this number are missing, the verification task will error out. + # This is a crude way of checking that there are enough obs to conduct + # verification (crude because this number should probably depend on the + # field being verified, the time interval between observations, the + # length of the forecast, etc; an alternative may be to specify the + # maximum allowed fraction of obs files that can be missing). # - # VX_FIELD_GROUPS: - # The groups of fields (some of which may consist of a single field) on - # which to run verification. Because accumulated snow (ASNOW) is often - # not of interest in non-winter cases and because observation files for - # ASNOW are not available on NOAA HPSS for retrospective cases before - # March 2020, by default ASNOW is not included VX_FIELD_GROUPS, but it - # may be added to this list in order to include the verification tasks - # for ASNOW in the workflow. + NUM_MISSING_OBS_FILES_MAX: 2 # - # VX_APCP_ACCUMS_HRS: - # The 2-digit accumulation periods (in units of hours) to consider for - # APCP (accumulated precipitation). If VX_FIELD_GROUPS contains "APCP", - # then VX_APCP_ACCUMS_HRS must contain at least one element. If not, - # VX_APCP_ACCUMS_HRS will be ignored. + # Forecast-Specific Parameters + # ---------------------------- # - # VX_ASNOW_ACCUMS_HRS: - # The 2-digit accumulation periods (in units of hours) to consider for - # ASNOW (accumulated snowfall). If VX_FIELD_GROUPS contains "ASNOW", - # then VX_ASNOW_ACCUMS_HRS must contain at least one element. If not, - # VX_ASNOW_ACCUMS_HRS will be ignored. + # VX_FCST_MODEL_NAME: + # String that specifies a descriptive name for the model being verified. + # This is used in forming the names of the verification output files and + # is also included in the contents of those files. # VX_FCST_MODEL_NAME: '{{ nco.NET_default }}.{{ task_run_post.POST_OUTPUT_DOMAIN_NAME }}' - VX_FIELD_GROUPS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] - VX_APCP_ACCUMS_HRS: [ 1, 3, 6, 24 ] - VX_ASNOW_ACCUMS_HRS: [ 6, 24 ] # # VX_FCST_OUTPUT_INTVL_HRS: - # The forecast output interval to use for verification purposes. The - # default value is currently 1 hour, but if/when a variable is created - # in this configuration file that specifies the forecast output interval - # for native SRW forecasts, then the default value of VX_FCST_OUTPUT_INTVL_HRS - # should be set to that. + # The forecast output interval (in hours) to assume for verification + # purposes. + # Note: + # If/when a variable is created in this configuration file that specifies + # the forecast output interval for native SRW forecasts, it should be + # used as the default value of this variable. # VX_FCST_OUTPUT_INTVL_HRS: 1 # # VX_FCST_INPUT_BASEDIR: - # Template for top-level directory containing forecast (but not obs) - # files that will be used as input into METplus for verification. - # - # VX_OUTPUT_BASEDIR: - # Template for top-level directory in which METplus will place its - # output. + # METplus template for the name of the base (i.e. top-level) directory + # containing the forecast files to use as inputs to the verification + # tasks. # VX_FCST_INPUT_BASEDIR: '{% if user.RUN_ENVIR == "nco" %}$COMOUT/../..{% else %}{{ workflow.EXPTDIR }}{% endif %}' - VX_OUTPUT_BASEDIR: '{% if user.RUN_ENVIR == "nco" %}$COMOUT/metout{% else %}{{ workflow.EXPTDIR }}{% endif %}' - # - # Number of digits in the ensemble member names. This is a configurable - # variable to allow users to change its value (e.g. to go from "mem004" - # to "mem04") when using staged forecast files that do not use the same - # number of digits as the SRW App. - # - VX_NDIGITS_ENSMEM_NAMES: 3 - # - # File name and path templates used in the verification tasks. # # FCST_SUBDIR_TEMPLATE: - # Template for the subdirectory containing forecast files that are - # inputs to the verification tasks. + # METplus template for the name of the subdirectory containing forecast + # files to use as inputs to the verification tasks. # - # FCST_FN_TEMPLATE: - # Template for the names of the forecast files that are inputs to the - # verification tasks. - # - # FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: - # Template used to specify the names of the output NetCDF forecast files - # generated by the worfklow verification tasks that call the METplus - # PcpCombine tool on forecasts. (These files will contain forecast APCP, - # both for 1 hour and for > 1 hour accumulation periods, in NetCDF - # format.) - # FCST_SUBDIR_TEMPLATE: '{%- if user.RUN_ENVIR == "nco" %} {{- "${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}" }} {%- else %} @@ -2611,43 +2640,48 @@ verification: {%- endif %} {{- "/postprd" }} {%- endif %}' + # + # FCST_FN_TEMPLATE: + # METplus template for the names of the forecast files to use as inputs + # to the verification tasks. + # FCST_FN_TEMPLATE: '{{- "${NET_default}.t{init?fmt=%H?shift=-${time_lag}}z" }} {%- if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %} {{- ".${ensmem_name}" }} {%- endif %} {{- ".prslev.f{lead?fmt=%HHH?shift=${time_lag}}.${POST_OUTPUT_DOMAIN_NAME}.grib2" }}' + # + # FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: + # METplus template for the names of the NetCDF files generated by the + # worfklow verification tasks that call METplus's PcpCombine tool on + # forecast output. These files will contain forecast accumulated + # precipitation in NetCDF format for various accumulation intervals. + # FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- "${NET_default}.t{init?fmt=%H}z" }} {%- if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %} {{- ".${ensmem_name}" }} {%- endif %} {{- ".prslev.f{lead?fmt=%HHH}.${POST_OUTPUT_DOMAIN_NAME}_${VAR}_a${ACCUM_HH}h.nc" }}' # - # For verification tasks that need observational data, this specifies - # the maximum number of observation files that may be missing. If more - # than this number are missing, the verification task will error out. - # - # Note that this is a crude way of checking that there are enough obs to - # conduct verification since this number should probably depend on the - # field being verified, the time interval between observations, the - # length of the forecast, etc. An alternative may be to specify the - # maximum allowed fraction of obs files that can be missing (i.e. the - # number missing divided by the number that are expected to exist). + # VX_NDIGITS_ENSMEM_NAMES: + # Number of digits to assume/use in the forecast ensemble member identifier + # string used in directory and file names and other instances in which the + # ensemble member needs to be identified. For example, if this is set to + # 3, the identifier for ensemble member 4 will be "mem004", while if it's + # set to 2, the identifier will be "mem04". This is useful when verifying + # staged forecast files from a forecasting model/system other than the + # SRW that uses a different number of digits in the ensemble member + # identifier string. # - NUM_MISSING_OBS_FILES_MAX: 2 + VX_NDIGITS_ENSMEM_NAMES: 3 # + # NUM_MISSING_FCST_FILES_MAX: # For verification tasks that need forecast data, this specifies the # maximum number of post-processed forecast files that may be missing. - # If more than this number are missing, the verification task will not - # be run. + # If more than this number are missing, the verification task will exit + # with an error. # NUM_MISSING_FCST_FILES_MAX: 0 - # - # Names of configuration files for deterministic and ensemble vx that - # specify the field groups, field names, levels, and (if applicable) - # thresholds for which to run verification. - # - VX_CONFIG_DET_FN: 'vx_config_det.yaml' - VX_CONFIG_ENS_FN: 'vx_config_ens.yaml' #---------------------------- # CPL_AQM config parameters From fd635cbc5d4c5f21b2b5155ed5a5051f94d721f4 Mon Sep 17 00:00:00 2001 From: Michael Kavulich Date: Mon, 28 Oct 2024 10:23:34 -0600 Subject: [PATCH 171/260] Refactor some bash to python, import filename templating directly from METplus (#3) * Replace call to custom templating script with direct invocation of metplus templating routines in get_obs.py * Replace set_leadhrs.sh with set_leadhrs.py * Fixes from pylint * Convert eval_METplus_timestr_tmpl.sh to python, redistribute some logic from set_leadhrs.py to this new script for efficiency; also fix some problems in set_leadhrs.py found by pylint * Refactor mrms_pull_topofhour.py to make it easily importable, call that function directly from get_obs.py * Remove the deprecated shell scripts * time_lag needs to be passed as an integer to set_leadhrs.py * Fixes to refactored mrms_pull_topofhour.py * Suggested test names --- modulefiles/tasks/derecho/get_obs.local.lua | 1 + modulefiles/tasks/gaea/get_obs.local.lua | 1 + modulefiles/tasks/hera/get_obs.local.lua | 3 +- modulefiles/tasks/hercules/get_obs.local.lua | 1 + modulefiles/tasks/jet/get_obs.local.lua | 3 +- modulefiles/tasks/noaacloud/get_obs.local.lua | 1 + modulefiles/tasks/orion/get_obs.local.lua | 1 + scripts/exregional_check_post_output.sh | 28 +- ...onal_run_met_genensprod_or_ensemblestat.sh | 34 +- ...gional_run_met_gridstat_or_pointstat_vx.sh | 21 +- ...un_met_gridstat_or_pointstat_vx_ensmean.sh | 21 +- ...un_met_gridstat_or_pointstat_vx_ensprob.sh | 21 +- scripts/exregional_run_met_pb2nc_obs.sh | 11 +- scripts/exregional_run_met_pcpcombine.sh | 32 +- tests/WE2E/run_WE2E_tests.py | 8 +- ...x-det_long-fcst_winter-wx_SRW-staged.yaml} | 0 ...-det_multicyc_fcst-overlap_ncep-hrrr.yaml} | 0 ...det_multicyc_first-obs-00z_ncep-hrrr.yaml} | 0 ...-det_multicyc_last-obs-00z_ncep-hrrr.yaml} | 0 ...ticyc_long-fcst-no-overlap_nssl-mpas.yaml} | 0 ...multicyc_long-fcst-overlap_nssl-mpas.yaml} | 0 ...vx-det_multicyc_no-00z-obs_nssl-mpas.yaml} | 0 ...t_multicyc_no-fcst-overlap_ncep-hrrr.yaml} | 0 ush/bash_utils/eval_METplus_timestr_tmpl.sh | 438 ------------------ ush/get_obs.py | 47 +- ush/mrms_pull_topofhour.py | 77 +-- ush/run_eval_METplus_timestr_tmpl.sh | 18 - ush/run_eval_metplus_timestr_tmpl.py | 63 +++ ush/set_leadhrs.py | 94 ++++ ush/set_leadhrs.sh | 334 ------------- ush/source_util_funcs.sh | 10 - 31 files changed, 313 insertions(+), 955 deletions(-) create mode 100644 modulefiles/tasks/derecho/get_obs.local.lua create mode 100644 modulefiles/tasks/gaea/get_obs.local.lua create mode 100644 modulefiles/tasks/hercules/get_obs.local.lua create mode 100644 modulefiles/tasks/noaacloud/get_obs.local.lua create mode 100644 modulefiles/tasks/orion/get_obs.local.lua rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml => config.vx-det_long-fcst_winter-wx_SRW-staged.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml => config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml => config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml => config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml => config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml => config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml => config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml} (100%) rename tests/WE2E/test_configs/verification/{config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml => config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml} (100%) delete mode 100644 ush/bash_utils/eval_METplus_timestr_tmpl.sh delete mode 100755 ush/run_eval_METplus_timestr_tmpl.sh create mode 100644 ush/run_eval_metplus_timestr_tmpl.py create mode 100644 ush/set_leadhrs.py delete mode 100644 ush/set_leadhrs.sh diff --git a/modulefiles/tasks/derecho/get_obs.local.lua b/modulefiles/tasks/derecho/get_obs.local.lua new file mode 100644 index 0000000000..c03abd8dfe --- /dev/null +++ b/modulefiles/tasks/derecho/get_obs.local.lua @@ -0,0 +1 @@ +load("run_vx.local") diff --git a/modulefiles/tasks/gaea/get_obs.local.lua b/modulefiles/tasks/gaea/get_obs.local.lua new file mode 100644 index 0000000000..c03abd8dfe --- /dev/null +++ b/modulefiles/tasks/gaea/get_obs.local.lua @@ -0,0 +1 @@ +load("run_vx.local") diff --git a/modulefiles/tasks/hera/get_obs.local.lua b/modulefiles/tasks/hera/get_obs.local.lua index dcca3116d8..e8d902abab 100644 --- a/modulefiles/tasks/hera/get_obs.local.lua +++ b/modulefiles/tasks/hera/get_obs.local.lua @@ -1,3 +1,2 @@ load("hpss") -unload("python") -load("python_srw") +load("run_vx.local") diff --git a/modulefiles/tasks/hercules/get_obs.local.lua b/modulefiles/tasks/hercules/get_obs.local.lua new file mode 100644 index 0000000000..c03abd8dfe --- /dev/null +++ b/modulefiles/tasks/hercules/get_obs.local.lua @@ -0,0 +1 @@ +load("run_vx.local") diff --git a/modulefiles/tasks/jet/get_obs.local.lua b/modulefiles/tasks/jet/get_obs.local.lua index dcca3116d8..e8d902abab 100644 --- a/modulefiles/tasks/jet/get_obs.local.lua +++ b/modulefiles/tasks/jet/get_obs.local.lua @@ -1,3 +1,2 @@ load("hpss") -unload("python") -load("python_srw") +load("run_vx.local") diff --git a/modulefiles/tasks/noaacloud/get_obs.local.lua b/modulefiles/tasks/noaacloud/get_obs.local.lua new file mode 100644 index 0000000000..c03abd8dfe --- /dev/null +++ b/modulefiles/tasks/noaacloud/get_obs.local.lua @@ -0,0 +1 @@ +load("run_vx.local") diff --git a/modulefiles/tasks/orion/get_obs.local.lua b/modulefiles/tasks/orion/get_obs.local.lua new file mode 100644 index 0000000000..c03abd8dfe --- /dev/null +++ b/modulefiles/tasks/orion/get_obs.local.lua @@ -0,0 +1 @@ +load("run_vx.local") diff --git a/scripts/exregional_check_post_output.sh b/scripts/exregional_check_post_output.sh index f176c9a12e..2a66a2fecf 100755 --- a/scripts/exregional_check_post_output.sh +++ b/scripts/exregional_check_post_output.sh @@ -12,6 +12,7 @@ # ENSMEM_INDX # GLOBAL_VAR_DEFNS_FP # VAR +# METPLUS_ROOT (used by ush/set_leadhrs.py) # # Experiment variables # @@ -52,14 +53,6 @@ done # #----------------------------------------------------------------------- # -# Source files defining auxiliary functions for verification. -# -#----------------------------------------------------------------------- -# -. $USHdir/set_leadhrs.sh -# -#----------------------------------------------------------------------- -# # Save current shell options (in a global array). Then set new options # for this script/function. # @@ -122,15 +115,16 @@ ensmem_indx=$(printf "%0${VX_NDIGITS_ENSMEM_NAMES}d" $(( 10#${ENSMEM_INDX}))) ensmem_name="mem${ensmem_indx}" FCST_INPUT_FN_TEMPLATE=$( eval echo ${FCST_SUBDIR_TEMPLATE:+${FCST_SUBDIR_TEMPLATE}/}${FCST_FN_TEMPLATE} ) -set_leadhrs \ - yyyymmddhh_init="${CDATE}" \ - lhr_min="0" \ - lhr_max="${FCST_LEN_HRS}" \ - lhr_intvl="${VX_FCST_OUTPUT_INTVL_HRS}" \ - base_dir="${VX_FCST_INPUT_BASEDIR}" \ - fn_template="${FCST_INPUT_FN_TEMPLATE}" \ - num_missing_files_max="${NUM_MISSING_FCST_FILES_MAX}" \ - outvarname_lhrs_list="FHR_LIST" +FHR_LIST=$( python3 $USHdir/set_leadhrs.py \ + --date_init="${CDATE}" \ + --lhr_min="0" \ + --lhr_max="${FCST_LEN_HRS}" \ + --lhr_intvl="${VX_FCST_OUTPUT_INTVL_HRS}" \ + --base_dir="${VX_FCST_INPUT_BASEDIR}" \ + --fn_template="${FCST_INPUT_FN_TEMPLATE}" \ + --num_missing_files_max="${NUM_MISSING_FCST_FILES_MAX}" \ + --time_lag="${time_lag%.*}") || \ +print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 475417ee53..89f92e8e55 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -22,7 +22,6 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -232,23 +231,28 @@ case "$OBTYPE" in esac vx_hr_end="${FCST_LEN_HRS}" +set -x if [ "${MetplusToolName}" = "GenEnsProd" ]; then - set_leadhrs_no_missing \ - lhr_min="${vx_hr_start}" \ - lhr_max="${vx_hr_end}" \ - lhr_intvl="${vx_intvl}" \ - outvarname_lhrs_list_no_missing="VX_LEADHR_LIST" + VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ + --lhr_min="${vx_hr_start}" \ + --lhr_max="${vx_hr_end}" \ + --lhr_intvl="${vx_intvl}" \ + --skip_check_files ) || \ +print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" + elif [ "${MetplusToolName}" = "EnsembleStat" ]; then - set_leadhrs \ - yyyymmddhh_init="${CDATE}" \ - lhr_min="${vx_hr_start}" \ - lhr_max="${vx_hr_end}" \ - lhr_intvl="${vx_intvl}" \ - base_dir="${OBS_INPUT_DIR}" \ - fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_lhrs_list="VX_LEADHR_LIST" + VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ + --date_init="${CDATE}" \ + --lhr_min="${vx_hr_start}" \ + --lhr_max="${vx_hr_end}" \ + --lhr_intvl="${vx_intvl}" \ + --base_dir="${OBS_INPUT_DIR}" \ + --fn_template="${OBS_INPUT_FN_TEMPLATE}" \ + --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ + --time_lag="${time_lag%.*}" ) || \ +print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" fi +echo "VX_LEADHR_LIST=$VX_LEADHR_LIST" # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index a6130ba50d..27ac6f11e6 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -22,7 +22,6 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -233,15 +232,17 @@ case "$OBTYPE" in esac vx_hr_end="${FCST_LEN_HRS}" -set_leadhrs \ - yyyymmddhh_init="${CDATE}" \ - lhr_min="${vx_hr_start}" \ - lhr_max="${vx_hr_end}" \ - lhr_intvl="${vx_intvl}" \ - base_dir="${OBS_INPUT_DIR}" \ - fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_lhrs_list="VX_LEADHR_LIST" +set -x +VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ + --date_init="${CDATE}" \ + --lhr_min="${vx_hr_start}" \ + --lhr_max="${vx_hr_end}" \ + --lhr_intvl="${vx_intvl}" \ + --base_dir="${OBS_INPUT_DIR}" \ + --fn_template="${OBS_INPUT_FN_TEMPLATE}" \ + --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ + --time_lag="${time_lag%.*}") || \ +print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index 75332e4929..70f13c27c1 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -22,7 +22,6 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -176,15 +175,17 @@ case "$OBTYPE" in esac vx_hr_end="${FCST_LEN_HRS}" -set_leadhrs \ - yyyymmddhh_init="${CDATE}" \ - lhr_min="${vx_hr_start}" \ - lhr_max="${vx_hr_end}" \ - lhr_intvl="${vx_intvl}" \ - base_dir="${OBS_INPUT_DIR}" \ - fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_lhrs_list="VX_LEADHR_LIST" +set -x +VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ + --date_init="${CDATE}" \ + --lhr_min="${vx_hr_start}" \ + --lhr_max="${vx_hr_end}" \ + --lhr_intvl="${vx_intvl}" \ + --base_dir="${OBS_INPUT_DIR}" \ + --fn_template="${OBS_INPUT_FN_TEMPLATE}" \ + --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" ) || \ +print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" + # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 382bd71ac8..b4e279218b 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -22,7 +22,6 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -175,15 +174,17 @@ case "$OBTYPE" in esac vx_hr_end="${FCST_LEN_HRS}" -set_leadhrs \ - yyyymmddhh_init="${CDATE}" \ - lhr_min="${vx_hr_start}" \ - lhr_max="${vx_hr_end}" \ - lhr_intvl="${vx_intvl}" \ - base_dir="${OBS_INPUT_DIR}" \ - fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_lhrs_list="VX_LEADHR_LIST" +set -x +VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ + --date_init="${CDATE}" \ + --lhr_min="${vx_hr_start}" \ + --lhr_max="${vx_hr_end}" \ + --lhr_intvl="${vx_intvl}" \ + --base_dir="${OBS_INPUT_DIR}" \ + --fn_template="${OBS_INPUT_FN_TEMPLATE}" \ + --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" ) || \ +print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" + # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_pb2nc_obs.sh b/scripts/exregional_run_met_pb2nc_obs.sh index e93387ed0a..046f4fea67 100755 --- a/scripts/exregional_run_met_pb2nc_obs.sh +++ b/scripts/exregional_run_met_pb2nc_obs.sh @@ -160,11 +160,12 @@ for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do # create. sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE}" \ - outvarname_evaluated_timestr="fp" + + fp=$( python3 $USHdir/run_eval_metplus_timestr_tmpl.py \ + --init_time="${yyyymmdd_task}00" \ + --fhr="${lhr}" \ + --fn_template="${OBS_DIR}/${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE}") || \ +print_err_msg_exit "Call to run_eval_metplus_timestr_tmpl.py failed with return code: $?" if [[ -f "${fp}" ]]; then print_info_msg " diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 590ceb43ef..23b14ce154 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -22,7 +22,6 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_leadhrs.sh # #----------------------------------------------------------------------- # @@ -212,12 +211,14 @@ fi # #----------------------------------------------------------------------- # +set -x vx_intvl="$((10#${ACCUM_HH}))" -set_leadhrs_no_missing \ - lhr_min="${vx_intvl}" \ - lhr_max="${FCST_LEN_HRS}" \ - lhr_intvl="${vx_intvl}" \ - outvarname_lhrs_list_no_missing="VX_LEADHR_LIST" +VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ + --lhr_min="${vx_intvl}" \ + --lhr_max="${FCST_LEN_HRS}" \ + --lhr_intvl="${vx_intvl}" \ + --skip_check_files ) || \ +print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" # #----------------------------------------------------------------------- # @@ -250,15 +251,16 @@ for hr_end in ${subintvl_end_hrs[@]}; do Checking for the presence of files that will contribute to the ${vx_intvl}-hour accumulation ending at lead hour ${hr_end} (relative to ${CDATE})... " - set_leadhrs \ - yyyymmddhh_init="${CDATE}" \ - lhr_min="${hr_start}" \ - lhr_max="${hr_end}" \ - lhr_intvl="${subintvl}" \ - base_dir="${base_dir}" \ - fn_template="${fn_template}" \ - num_missing_files_max="${num_missing_files_max}" \ - outvarname_lhrs_list="tmp" + python3 $USHdir/set_leadhrs.py \ + --date_init="${CDATE}" \ + --lhr_min="${hr_start}" \ + --lhr_max="${hr_end}" \ + --lhr_intvl="${subintvl}" \ + --base_dir="${base_dir}" \ + --fn_template="${fn_template}" \ + --num_missing_files_max="${num_missing_files_max}" \ + --time_lag="${time_lag%.*}" || \ +print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" done print_info_msg " diff --git a/tests/WE2E/run_WE2E_tests.py b/tests/WE2E/run_WE2E_tests.py index f983d3452e..992fac88a3 100755 --- a/tests/WE2E/run_WE2E_tests.py +++ b/tests/WE2E/run_WE2E_tests.py @@ -159,13 +159,7 @@ def run_we2e_tests(homedir, args) -> None: # test-specific options, then write resulting complete config.yaml starttime = datetime.now() starttime_string = starttime.strftime("%Y%m%d%H%M%S") - test_fn = os.path.basename(test) - # Set the test name to all characters between the initial "config." and - # the final ".yaml" in the file name. This will allow any characters to - # be used as part of the test name, in particular a ".". - prefix = 'config.' - suffix = '.yaml' - test_name = test_fn[test_fn.find(prefix)+len(prefix):test_fn.rfind(suffix)] + test_name = os.path.basename(test).split('.')[1] logging.debug(f"For test {test_name}, constructing config.yaml") test_cfg = load_config_file(test) diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.singlecyc.init_00z_fcstlen_36hr.winter_wx.SRW.yaml rename to tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_07hr_inits_vary_fcstlen_09hr.ncep-hrrr.yaml rename to tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_00z_fcstlen_03hr.ncep-hrrr.yaml rename to tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_21z_fcstlen_03hr.ncep-hrrr.yaml rename to tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_96hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml rename to tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_48hr.nssl-mpas.yaml rename to tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_24hr_inits_12z_fcstlen_03hr.nssl-mpas.yaml rename to tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml diff --git a/tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml similarity index 100% rename from tests/WE2E/test_configs/verification/config.get_obs_hpss.do_vx_det.multicyc.cycintvl_11hr_inits_vary_fcstlen_03hr.ncep-hrrr.yaml rename to tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml diff --git a/ush/bash_utils/eval_METplus_timestr_tmpl.sh b/ush/bash_utils/eval_METplus_timestr_tmpl.sh deleted file mode 100644 index a4421958ee..0000000000 --- a/ush/bash_utils/eval_METplus_timestr_tmpl.sh +++ /dev/null @@ -1,438 +0,0 @@ -# -#----------------------------------------------------------------------- -# -# This function evaluates a METplus time-string template, i.e. a string -# (e.g. a file name template) containing one or more METplus time- -# formatting strings. -# -#----------------------------------------------------------------------- -# -function eval_METplus_timestr_tmpl() { -# -#----------------------------------------------------------------------- -# -# Save current shell options (in a global array). Then set new options -# for this script/function. -# -#----------------------------------------------------------------------- -# - { save_shell_opts; . ${USHdir}/preamble.sh; } > /dev/null 2>&1 -# -#----------------------------------------------------------------------- -# -# Get the full path to the file in which this script/function is located -# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in -# which the file is located (scrfunc_dir). -# -#----------------------------------------------------------------------- -# - local scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) - local scrfunc_fn=$( basename "${scrfunc_fp}" ) - local scrfunc_dir=$( dirname "${scrfunc_fp}" ) -# -#----------------------------------------------------------------------- -# -# Get the name of this function. -# -#----------------------------------------------------------------------- -# - local func_name="${FUNCNAME[0]}" -# -#----------------------------------------------------------------------- -# -# Specify the set of valid argument names for this script/function. Then -# process the arguments provided to this script/function (which should -# consist of a set of name-value pairs of the form arg1="value1", etc). -# -#----------------------------------------------------------------------- -# - local valid_args=( \ - "init_time" \ - "fhr" \ - "METplus_timestr_tmpl" \ - "outvarname_evaluated_timestr" \ - ) - process_args valid_args "$@" -# -#----------------------------------------------------------------------- -# -# For debugging purposes, print out values of arguments passed to this -# script. Note that these will be printed out only if VERBOSE is set to -# TRUE. -# -#----------------------------------------------------------------------- -# -# print_input_args "valid_args" -# -#----------------------------------------------------------------------- -# -# Declare local variables. -# -#----------------------------------------------------------------------- -# - local crnt_timefmt \ - crnt_timefmt_esc \ - evaluated_timestr \ - regex_search_tmpl \ - the_time \ - tmpl_remainder -# -#----------------------------------------------------------------------- -# -# Loop over all METplus time-formatting strings in the given METplus -# template and evaluate each using the given initial time (init_time) and -# forecast hour (fhr). -# -# Note that the while-loop below is over all METplus time-formatting -# strings of the form {...} in the template METplus_timestr_tmpl; it -# continues until all such time-formatting strings have been evaluated -# to actual times. -# -#----------------------------------------------------------------------- -# -# Regular expression used by the sed utility below to pick out the next -# METplus time-formatting string in the given METplus time-string template. -# - regex_search_tmpl="(.*)(\{.*\})(.*)" -# -# Initialize while-loop variables. -# - evaluated_timestr="${METplus_timestr_tmpl}" - - crnt_timefmt=$( printf "%s" "${METplus_timestr_tmpl}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\2|p" ) - tmpl_remainder=$( printf "%s" "${METplus_timestr_tmpl}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\1\3|p" ) - - while [ ! -z "${crnt_timefmt}" ]; do - - eval_single_METplus_timefmt \ - init_time="${init_time}" \ - fhr="${fhr}" \ - METplus_timefmt="${crnt_timefmt}" \ - outvarname_evaluated_timefmt="the_time" -# -# Replace the next METplus time string in evaluated_timestr with an actual -# time. -# -# Note that when using sed, we need to escape various characters (question -# mark, closing and opening curly braces, etc) in the METplus template in -# order for the sed command below to work properly. -# - crnt_timefmt_esc=$( echo "${crnt_timefmt}" | \ - $SED -r -e "s/\?/\\\?/g" -e "s/\{/\\\{/g" -e "s/\}/\\\}/g" ) - evaluated_timestr=$( echo "${evaluated_timestr}" | \ - $SED -n -r "s|(.*)(${crnt_timefmt_esc})(.*)|\1${the_time}\3|p" ) -# -# Set up values for the next iteration of the while-loop. -# - crnt_timefmt=$( printf "%s" "${tmpl_remainder}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\2|p" ) - tmpl_remainder=$( printf "%s" "${tmpl_remainder}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\1\3|p" ) - - done -# -#----------------------------------------------------------------------- -# -# Set output variables. -# -#----------------------------------------------------------------------- -# - if [ ! -z "${outvarname_evaluated_timestr}" ]; then - printf -v ${outvarname_evaluated_timestr} "%s" "${evaluated_timestr}" - fi -# -#----------------------------------------------------------------------- -# -# Restore the shell options saved at the beginning of this script/function. -# -#----------------------------------------------------------------------- -# - { restore_shell_opts; } > /dev/null 2>&1 - -} - -# -#----------------------------------------------------------------------- -# -# This function uses the specified initial forecast time and forecast -# hour to evaluate a single METplus time-formatting string and return -# the corresponding time. -# -#----------------------------------------------------------------------- -# -function eval_single_METplus_timefmt() { -# -#----------------------------------------------------------------------- -# -# Save current shell options (in a global array). Then set new options -# for this script/function. -# -#----------------------------------------------------------------------- -# - { save_shell_opts; . ${USHdir}/preamble.sh; } > /dev/null 2>&1 -# -#----------------------------------------------------------------------- -# -# Get the full path to the file in which this script/function is located -# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in -# which the file is located (scrfunc_dir). -# -#----------------------------------------------------------------------- -# - local scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) - local scrfunc_fn=$( basename "${scrfunc_fp}" ) - local scrfunc_dir=$( dirname "${scrfunc_fp}" ) -# -#----------------------------------------------------------------------- -# -# Get the name of this function. -# -#----------------------------------------------------------------------- -# - local func_name="${FUNCNAME[0]}" -# -#----------------------------------------------------------------------- -# -# Specify the set of valid argument names for this script/function. Then -# process the arguments provided to this script/function (which should -# consist of a set of name-value pairs of the form arg1="value1", etc). -# -#----------------------------------------------------------------------- -# - local valid_args=( \ - "init_time" \ - "fhr" \ - "METplus_timefmt" \ - "outvarname_evaluated_timefmt" \ - ) - process_args valid_args "$@" -# -#----------------------------------------------------------------------- -# -# For debugging purposes, print out values of arguments passed to this -# script. Note that these will be printed out only if VERBOSE is set to -# TRUE. -# -#----------------------------------------------------------------------- -# -# print_input_args "valid_args" -# -#----------------------------------------------------------------------- -# -# Declare local variables. -# -#----------------------------------------------------------------------- -# - local evaluated_timefmt \ - fmt \ - hh_init \ - init_time_str \ - lead_hrs \ - len \ - METplus_time_codes \ - METplus_time_shift \ - METplus_time_type \ - mn_init \ - regex_search \ - ss_init \ - valid_time_str \ - yyyymmdd_init -# -#----------------------------------------------------------------------- -# -# Run checks on input arguments. -# -#----------------------------------------------------------------------- -# - if [ -z "${METplus_timefmt}" ]; then - print_err_msg_exit "\ -The specified METplus time-formatting string (METplus_timefmt) cannot be -empty: - METplus_timefmt = \"${METplus_timefmt}\"" - fi - - len=${#init_time} - if [[ ${init_time} =~ ^[0-9]+$ ]]; then - if [ "$len" -ne 10 ] && [ "$len" -ne 12 ] && [ "$len" -ne 14 ]; then - print_err_msg_exit "\ -The specified initial time (init_time) must contain 10, 12, or 14 digits -but instead contains $len: - init_time = \"${init_time}\"" - fi - else - print_err_msg_exit "\ -The specified initial time (init_time) must consist of digits only and -cannot be empty: - init_time = \"${init_time}\"" - fi - - if ! [[ $fhr =~ ^[0-9]+$ ]]; then - print_err_msg_exit "\ -The specified forecast hour (fhr) must consist of digits only and cannot -be empty: - fhr = \"${fhr}\"" - fi -# -#----------------------------------------------------------------------- -# -# Set strings for the initial and valid times that can be passed to the -# "date" utility for evaluation. -# -#----------------------------------------------------------------------- -# - yyyymmdd_init=${init_time:0:8} - hh_init=${init_time:8:2} - - mn_init="00" - if [ "$len" -gt "10" ]; then - mn_init=${init_time:10:2} - fi - - ss_init="00" - if [ "$len" -gt "12" ]; then - ss_init=${init_time:12:2} - fi - - init_time_str=$( printf "%s" "${yyyymmdd_init} + ${hh_init} hours + ${mn_init} minutes + ${ss_init} seconds" ) - valid_time_str=$( printf "%s" "${init_time_str} + ${fhr} hours" ) -# -#----------------------------------------------------------------------- -# -# Parse the input METplus time string template. -# -#----------------------------------------------------------------------- -# - regex_search="^\{(init|valid|lead)(\?)(fmt=)([^\?]*)(\?)?(shift=)?([^\?]*)?\}" - METplus_time_type=$( \ - printf "%s" "${METplus_timefmt}" | $SED -n -r -e "s/${regex_search}/\1/p" ) - METplus_time_codes=$( \ - printf "%s" "${METplus_timefmt}" | $SED -n -r -e "s/${regex_search}/\4/p" ) - METplus_time_shift=$( \ - printf "%s" "${METplus_timefmt}" | $SED -n -r -e "s/${regex_search}/\7/p" ) -# -#----------------------------------------------------------------------- -# -# Get strings for the time format and time shift that can be passed to -# the "date" utility or the "printf" command. -# -#----------------------------------------------------------------------- -# - case "${METplus_time_codes}" in - "%Y%m%d%H"|"%Y%m%d"|"%H%M%S") - fmt="${METplus_time_codes}" - ;; - "%H") -# -# The "%H" format needs to be treated differently depending on if it's -# formatting a "lead" time type or another (e.g. "init" or "vald") because -# for "lead", the printf function is used below (which doesn't understand -# the "%H" format) whereas for the others, the date utility is used (which -# does understand "%H"). -# - if [ "${METplus_time_type}" = "lead" ]; then - fmt="%02.0f" - else - fmt="${METplus_time_codes}" - fi - ;; - "%HHH") -# -# Print format assumes that the argument to printf (i.e. the number to -# print out) may be a float. If we instead assume an integer and use -# "%03d" as the format, the printf function below will fail if the argument -# happens to be a float. The "%03.0f" format will work for both a float -# and an integer argument (and will truncate the float and print out a -# 3-digit integer). -# - fmt="%03.0f" - ;; - *) - print_err_msg_exit "\ -Unsupported METplus time codes: - METplus_time_codes = \"${METplus_time_codes}\" -METplus time-formatting string passed to this function is: - METplus_timefmt = \"${METplus_timefmt}\"" - ;; - esac -# -# Calculate the time shift as an integer in units of seconds. -# - time_shift_str=$(( $(printf "%.0f" "${METplus_time_shift}") + 0 ))" seconds" -# -#----------------------------------------------------------------------- -# -# Set the formatted time string. -# -#----------------------------------------------------------------------- -# - case "${METplus_time_type}" in - "init") - evaluated_timefmt=$( ${DATE_UTIL} --date="${init_time_str} + ${time_shift_str}" +"${fmt}" ) - ;; - "valid") - evaluated_timefmt=$( ${DATE_UTIL} --date="${valid_time_str} + ${time_shift_str}" +"${fmt}" ) - ;; - "lead") - lead_secs=$(( $( ${DATE_UTIL} --date="${valid_time_str} + ${time_shift_str}" +"%s" ) \ - - $( ${DATE_UTIL} --date="${init_time_str}" +"%s" ) )) - lead_hrs=$( bc -l <<< "${lead_secs}/${SECS_PER_HOUR}" ) -# -# Check to make sure lead_hrs is an integer. -# - lead_hrs_trunc=$( bc <<< "${lead_secs}/${SECS_PER_HOUR}" ) - lead_hrs_rem=$( bc -l <<< "${lead_hrs} - ${lead_hrs_trunc}" ) - if [ "${lead_hrs_rem}" != "0" ]; then - print_err_msg_exit "\ -The lead in hours (lead_hrs) must be an integer but isn't: - lead_hrs = ${lead_hrs} -The lead in seconds (lead_secs) is: - lead_secs = ${lead_secs} -The remainder (lead_hrs_rem) after dividing the lead_secs by SECS_PER_HOUR -= ${SECS_PER_HOUR} is: - lead_hrs_rem = ${lead_hrs_rem}" - fi -# -# Get the lead in the proper format. -# - evaluated_timefmt=$( printf "${fmt}" "${lead_hrs}" ) - ;; - *) - print_err_msg_exit "\ -Unsupported METplus time type: - METplus_time_type = \"${METplus_time_type}\" -METplus time-formatting string passed to this function is: - METplus_timefmt = \"${METplus_timefmt}\"" - ;; - esac - - if [ -z "${evaluated_timefmt}" ]; then - print_err_msg_exit "\ -The specified METplus time-formatting string (METplus_timefmt) could not -be evaluated for the given initial time (init_time) and forecast hour -(fhr): - METplus_timefmt = \"${METplus_timefmt}\" - init_time = \"${init_time}\" - fhr = \"${fhr}\"" - fi -# -#----------------------------------------------------------------------- -# -# Set output variables. -# -#----------------------------------------------------------------------- -# - if [ ! -z "${outvarname_evaluated_timefmt}" ]; then - printf -v ${outvarname_evaluated_timefmt} "%s" "${evaluated_timefmt}" - fi -# -#----------------------------------------------------------------------- -# -# Restore the shell options saved at the beginning of this script/function. -# -#----------------------------------------------------------------------- -# - { restore_shell_opts; } > /dev/null 2>&1 - -} diff --git a/ush/get_obs.py b/ush/get_obs.py index 666c6f1298..9681eb8c69 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -15,7 +15,14 @@ from python_utils import ( load_yaml_config, ) - +from mrms_pull_topofhour import mrms_pull_topofhour +try: + sys.path.append(os.environ['METPLUS_ROOT']) +except: + print("\nERROR ERROR ERROR\n") + print("Environment variable METPLUS_ROOT must be set to use this script\n") + raise +from metplus.util import string_template_substitution as sts def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): """ @@ -482,20 +489,14 @@ def get_obs(config, obtype, yyyymmdd_task): for fg, fp_proc_templ in zip(field_groups_in_obs, fp_proc_templates): all_fp_proc_dict[fg] = [] for yyyymmddhh in obs_retrieve_times_crnt_day: - # Set the lead hour, i.e. the number of hours from the beginning of the + # Set the lead time, a timedelta object from the beginning of the # day at which the file is valid. - lhr = int((yyyymmddhh - yyyymmdd_task)/dt.timedelta(hours=1)) - # Call a bash script to evaluate the template for the full path to the - # file containing METplus timestrings at the current time. This should - # be upgraded to a python script at some point. - cmd = '; '.join(['export USHdir=' + ushdir, - 'export yyyymmdd_task=' + yyyymmdd_task_str, - 'export lhr=' + str(lhr), - 'export METplus_timestr_tmpl=' + fp_proc_templ, - os.path.join(ushdir, 'run_eval_METplus_timestr_tmpl.sh')]) - result = subprocess.run(cmd, shell=True, capture_output=True, text=True) - fp_proc = result.stdout.strip() - all_fp_proc_dict[fg].append(fp_proc) + leadtime = yyyymmddhh - yyyymmdd_task + # Call METplus subroutine to evaluate the template for the full path to + # the file containing METplus timestrings at the current time. + fn = sts.do_string_sub(tmpl=fp_proc_templ,init=yyyymmdd_task,valid=yyyymmddhh, + lead=leadtime.total_seconds()) + all_fp_proc_dict[fg].append(fn) # Check whether any obs files already exist on disk in their processed # (i.e. final) locations. If so, adjust the starting archive hour. In @@ -804,25 +805,21 @@ def get_obs(config, obtype, yyyymmdd_task): # those that are nearest in time to the current hour. Unzip these in a # temporary subdirectory under the raw base directory. # - # Note that the script we call to do this (mrms_pull_topofhour.py) assumes + # Note that the function we call to do this (mrms_pull_topofhour) assumes # a certain file naming convention. That convention must match the names # of the files that the retrieve_data.py script called above ends up # retrieving. The list of possible templates for these names is given # in parm/data_locations.yml, but which of those is actually used is not # known until retrieve_data.py completes. Thus, that information needs - # to be passed back by retrieve_data.py and then passed to mrms_pull_topofhour.py. + # to be passed back by retrieve_data.py and then passed to mrms_pull_topofhour. # For now, we hard-code the file name here. if obtype == 'MRMS': yyyymmddhh_str = dt.datetime.strftime(yyyymmddhh, '%Y%m%d%H') - cmd = ' '.join(['python3', \ - '-u', os.path.join(ushdir, 'mrms_pull_topofhour.py'), \ - '--valid_time', yyyymmddhh_str, \ - '--source', basedir_raw, \ - '--outdir', os.path.join(basedir_raw, 'topofhour'), \ - '--product', fields_in_filenames[i], \ - '--no-add_vdate_subdir']) - result = subprocess.run(cmd, shell=True, capture_output=True, text=True) - rc = result.returncode + mrms_pull_topofhour(valid_time=yyyymmddhh_str, + source=basedir_raw, + outdir=os.path.join(basedir_raw, 'topofhour'), + product=fields_in_filenames[i], + add_vdate_subdir=False) # The raw file name needs to be the same as what the retrieve_data.py # script called above ends up retrieving. The list of possible templates diff --git a/ush/mrms_pull_topofhour.py b/ush/mrms_pull_topofhour.py index cad54e74dc..58d24aeff1 100644 --- a/ush/mrms_pull_topofhour.py +++ b/ush/mrms_pull_topofhour.py @@ -6,7 +6,7 @@ import shutil import gzip -def main(): +def mrms_pull_topofhour(valid_time, outdir, source, product, level=None, add_vdate_subdir=True, debug=False): """Identifies the MRMS file closest to the valid time of the forecast. METplus is configured to look for a MRMS composite reflectivity file for the valid time of the forecast being verified; since MRMS composite @@ -22,61 +22,45 @@ def main(): time of the forecast """ - #Parse input arguments - parser = argparse.ArgumentParser() - parser.add_argument('-v', '--valid_time', type=str, required=True, - help='Valid time (in string format YYYYMMDDHH) to find MRMS data for') - parser.add_argument('-o', '--outdir', type=str, required=True, - help='Destination directory for extracted MRMS data; data will be placed in `dest/YYYYMMDD`') - parser.add_argument('-s', '--source', type=str, required=True, - help='Source directory where zipped MRMS data is found') - parser.add_argument('-p', '--product', type=str, required=True, choices=['MergedReflectivityQCComposite', 'EchoTop'], - help='Name of MRMS product') - parser.add_argument('-l', '--level', type=str, help='MRMS product level', - choices=['_00.50_','_18_00.50_']) - parser.add_argument('--add_vdate_subdir', default=True, required=False, action=argparse.BooleanOptionalAction, - help='Flag to add valid-date subdirectory to source and destination directories') - parser.add_argument('-d', '--debug', action='store_true', help='Add additional debug output') - args = parser.parse_args() # Level is determined by MRMS product; set if not provided - if args.level is None: - if args.product == "MergedReflectivityQCComposite": - args.level = "_00.50_" - elif args.product == "EchoTop": - args.level = "_18_00.50_" + if level is None: + if product == "MergedReflectivityQCComposite": + level = "_00.50_" + elif product == "EchoTop": + level = "_18_00.50_" else: raise Exception("This should never have happened") # Copy and unzip MRMS files that are closest to top of hour # Done every hour on a 20-minute lag - YYYY = int(args.valid_time[0:4]) - MM = int(args.valid_time[4:6]) - DD = int(args.valid_time[6:8]) - HH = int(args.valid_time[8:19]) + YYYY = int(valid_time[0:4]) + MM = int(valid_time[4:6]) + DD = int(valid_time[6:8]) + HH = int(valid_time[8:19]) valid = datetime.datetime(YYYY, MM, DD, HH, 0, 0) valid_str = valid.strftime("%Y%m%d") - print(f"Pulling MRMS product {args.product} for valid time: {args.valid_time}") + print(f"Pulling MRMS product {product} for valid time: {valid_time}") # Set up working directory valid_str_or_empty = '' - if args.add_vdate_subdir: + if add_vdate_subdir: valid_str_or_empty = valid_str - dest_dir = os.path.join(args.outdir, valid_str_or_empty) + dest_dir = os.path.join(outdir, valid_str_or_empty) if not os.path.exists(dest_dir): os.makedirs(dest_dir) # Sort list of files for each MRMS product - if args.debug: + if debug: print(f"Valid date: {valid_str}") - search_path = os.path.join(args.source, valid_str_or_empty, args.product + "*.gz") + search_path = os.path.join(source, valid_str_or_empty, product + "*.gz") file_list = [f for f in glob.glob(search_path)] - if args.debug: + if debug: print(f"Files found: \n{file_list}") time_list = [file_list[x][-24:-9] for x in range(len(file_list))] int_list = [ @@ -96,12 +80,12 @@ def main(): # Check to make sure closest file is within +/- 15 mins of top of the hour difference = abs(closest_timestamp - valid) if difference.total_seconds() <= 900: - filename1 = f"{args.product}{args.level}{closest_timestamp.strftime('%Y%m%d-%H%M%S')}.grib2.gz" - filename2 = f"{args.product}{args.level}{valid.strftime('%Y%m%d-%H')}0000.grib2" - origfile = os.path.join(args.source, valid_str_or_empty, filename1) + filename1 = f"{product}{level}{closest_timestamp.strftime('%Y%m%d-%H%M%S')}.grib2.gz" + filename2 = f"{product}{level}{valid.strftime('%Y%m%d-%H')}0000.grib2" + origfile = os.path.join(source, valid_str_or_empty, filename1) target = os.path.join(dest_dir, filename2) - if args.debug: + if debug: print(f"Unzipping file {origfile} to {target}") @@ -113,4 +97,23 @@ def main(): raise FileNotFoundError(f"Did not find a valid file within 15 minutes of {valid}") if __name__ == "__main__": - main() + #Parse input arguments + parser = argparse.ArgumentParser() + parser.add_argument('-v', '--valid_time', type=str, required=True, + help='Valid time (in string format YYYYMMDDHH) to find MRMS data for') + parser.add_argument('-o', '--outdir', type=str, required=True, + help='Destination directory for extracted MRMS data; data will be placed in `dest/YYYYMMDD`') + parser.add_argument('-s', '--source', type=str, required=True, + help='Source directory where zipped MRMS data is found') + parser.add_argument('-p', '--product', type=str, required=True, choices=['MergedReflectivityQCComposite', 'EchoTop'], + help='Name of MRMS product') + parser.add_argument('-l', '--level', type=str, help='MRMS product level', + choices=['_00.50_','_18_00.50_']) + parser.add_argument('--add_vdate_subdir', default=True, required=False, action=argparse.BooleanOptionalAction, + help='Flag to add valid-date subdirectory to source and destination directories') + parser.add_argument('-d', '--debug', action='store_true', help='Add additional debug output') + args = parser.parse_args() + + #Consistency checks + + mrms_pull_topofhour(**vars(args)) diff --git a/ush/run_eval_METplus_timestr_tmpl.sh b/ush/run_eval_METplus_timestr_tmpl.sh deleted file mode 100755 index f5438be2f4..0000000000 --- a/ush/run_eval_METplus_timestr_tmpl.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env bash -# -#----------------------------------------------------------------------- -# -# This script is simply a wrapper to the eval_METplus_timestr_tmpl bash -# function. It is needed in order to enable the function to be called -# from a python script. -# -#----------------------------------------------------------------------- -# -set -u -. $USHdir/source_util_funcs.sh -eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${METplus_timestr_tmpl}" \ - outvarname_evaluated_timestr="fp_proc" -echo "${fp_proc}" diff --git a/ush/run_eval_metplus_timestr_tmpl.py b/ush/run_eval_metplus_timestr_tmpl.py new file mode 100644 index 0000000000..259531ea8d --- /dev/null +++ b/ush/run_eval_metplus_timestr_tmpl.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +import argparse +import os +import sys +from datetime import datetime, timedelta +try: + sys.path.append(os.environ['METPLUS_ROOT']) +except: + print("\nERROR ERROR ERROR\n") + print("Environment variable METPLUS_ROOT must be set to use this script\n") + raise +from metplus.util import string_template_substitution as sts + +def eval_tmpl(init_time, fhr, time_lag, fn_template, verbose=False): + """ + Calls native METplus routine for evaluating filename templates + + Args: + init_time (str): Date string for initial time in YYYYMMDD[mmss] format, where minutes and + seconds are optional. + fhr (int): Forecast hour (number of hours since init_time) + time_lag (int): Hours of time lag for a time-lagged ensemble member + fn_template (str): The METplus filename template for finding the files + verbose (bool): By default this script only outputs the list of forecast hours + Returns: + str: The fully resolved filename based on the input parameters + """ + + if len(init_time) == 10: + initdate=datetime.strptime(init_time, '%Y%m%d%H') + elif len(init_time) == 12: + initdate=datetime.strptime(init_time, '%Y%m%d%H%M') + elif len(init_time) == 14: + initdate=datetime.strptime(init_time, '%Y%m%d%H%M%S') + else: + raise ValueError(f"Invalid {init_time=}; must be 10, 12, or 14 characters in length") + + validdate=initdate + timedelta(hours=fhr) + leadsec=fhr*3600 + # Evaluate the METplus timestring template for the current lead hour + if verbose: + print("Resolving METplus template for:") + print(f"{fn_template=}\ninit={initdate}\nvalid={validdate}\nlead={leadsec}\n{time_lag=}\n") + # Return the full path with templates resolved + return sts.do_string_sub(tmpl=fn_template,init=initdate,valid=validdate, + lead=leadsec,time_lag=time_lag) + +if __name__ == "__main__": + + parser = argparse.ArgumentParser( + description="Print a list of forecast hours in bash-readable comma-separated format such that there is a corresponding file (can be observations or forecast files) for each list entry.", + ) + parser.add_argument("-v", "--verbose", help="Verbose output", action="store_true") + parser.add_argument("-i", "--init_time", help="Initial date in YYYYMMDDHH[mmss] format", type=str, default='') + parser.add_argument("-f", "--fhr", help="Forecast hour", type=int, required=True) + parser.add_argument("-tl", "--time_lag", help="Hours of time lag for a time-lagged ensemble member", type=int, default=0) + parser.add_argument("-ft", "--fn_template", help="Template for file names to search; see ??? for details on template settings", type=str, default='') + + args = parser.parse_args() + + filename = eval_tmpl(**vars(args)) + # If called from command line, we want to print the resolved filename + print(filename) diff --git a/ush/set_leadhrs.py b/ush/set_leadhrs.py new file mode 100644 index 0000000000..64d483f652 --- /dev/null +++ b/ush/set_leadhrs.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +import argparse +import os +from run_eval_metplus_timestr_tmpl import eval_tmpl + +def set_leadhrs(date_init, lhr_min, lhr_max, lhr_intvl, base_dir, time_lag, fn_template, num_missing_files_max, + skip_check_files=False, verbose=False): + """ + Creates a list of lead hours based on the provided range and interval, + checks for the existence of corresponding files, and returns a list + of lead hours for which files exist. If too many files are missing, it fails with an exception. + + Args: + date_init (str): Date string for initial time in YYYYMMDD[mmss] format, where + minutes and seconds are optional. + lhr_min (int): Minimum lead hour to check + lhr_max (int): Maximum lead hour to check + lhr_intvl (int): Interval between lead hours + base_dir (str): Base directory for forecast/observation file + time_lag (int): Hours of time lag for a time-lagged ensemble member + fn_template (str): The METplus filename template for finding the files + verbose (bool): By default this script only outputs the list of forecast hours + (for easier parsing from bash contexts). Set the verbose flag + to True for additional debugging output. + num_missing_files_max (int): If more files than this value are not found, raise exception + skip_check_files (bool): If true, return the list of forecast hours, skipping the file check + Returns: + A list of forecast hours where files were found + """ + + # Step 1: Generate lead hours without filtering for missing files + lhrs_list = list(range(lhr_min, lhr_max + 1, lhr_intvl)) + if verbose: + print(f"Initial set of lead hours (relative to {date_init}): {lhrs_list}") + + if skip_check_files: + return lhrs_list + + # Step 2: Loop through lead hours and check for corresponding file existence + final_list = [] + num_missing_files = 0 + for lhr in lhrs_list: + + # Evaluate the METplus timestring template for the current lead hour + fn = eval_tmpl(date_init, lhr, time_lag, fn_template, verbose=False) + + # Get the full path and check if the file exists + fp = os.path.join(base_dir, fn) + if os.path.isfile(fp): + if verbose: + print(f"Found file for lead hour {lhr} (relative to {date_init}): {fp}") + final_list.append(lhr) + else: + num_missing_files += 1 + + if verbose: + print(f"File for lead hour {lhr} (relative to {date_init}) is MISSING: {fp}") + + if verbose: + print(f"Final set of lead hours relative to {date_init}: {final_list}") + + # Step 3: Check if the number of missing files exceeds the maximum allowed + if num_missing_files > num_missing_files_max: + raise Exception(f"Number of missing files ({num_missing_files}) exceeds maximum allowed ({num_missing_files_max}).") + + return final_list + +if __name__ == "__main__": + + parser = argparse.ArgumentParser( + description="Print a list of forecast hours in bash-readable comma-separated format such that there is a corresponding file (can be observations or forecast files) for each list entry.", + ) + parser.add_argument("-v", "--verbose", help="Verbose output", action="store_true") + parser.add_argument("-d", "--date_init", help="Initial date in YYYYMMDDHH[mmss] format", type=str, default='') + parser.add_argument("-min", "--lhr_min", help="Minimum lead hour to check", type=int, required=True) + parser.add_argument("-max", "--lhr_max", help="Maximum lead hour to check", type=int, required=True) + parser.add_argument("-int", "--lhr_intvl", help="Interval between lead hours", type=int, required=True) + parser.add_argument("-tl", "--time_lag", help="Hours of time lag for a time-lagged ensemble member", type=int, default=0) + parser.add_argument("-bd", "--base_dir", help="Base directory for forecast/observation file", type=str, default='') + parser.add_argument("-ft", "--fn_template", help="Template for file names to search; see ??? for details on template settings", type=str, default='') + parser.add_argument("-n", "--num_missing_files_max", type=int, default=5, + help="Number of missing files to tolerate; if more files than this number can not be found, raise an exception") + parser.add_argument("-s", "--skip_check_files", action="store_true", + help="Flag to skip file check and just return the list of lead hours") + + args = parser.parse_args() + + #Consistency checks + if not args.skip_check_files and not args.date_init: + raise argparse.ArgumentTypeError('--date_init must be specified unless --skip_check_files is specified') + + leadhr_list = set_leadhrs(**vars(args)) + # If called from command line, we want to print a bash-parsable list + print(', '.join(str(x) for x in leadhr_list)) diff --git a/ush/set_leadhrs.sh b/ush/set_leadhrs.sh deleted file mode 100644 index aa3b4b338f..0000000000 --- a/ush/set_leadhrs.sh +++ /dev/null @@ -1,334 +0,0 @@ -# -#----------------------------------------------------------------------- -# -# This file defines functions used to generate sets of lead hours for -# which verification will be performed. -# -#----------------------------------------------------------------------- -# - -function set_leadhrs_no_missing() { -# -#----------------------------------------------------------------------- -# -# This function sets the lead hours (relative to some unspecified initial/ -# reference time) for which verification will be performed under the -# assumption that the data file (which may be a forecast output file or -# an observation file) for each hour is available (i.e. it assumes that -# there are no missing files). -# -#----------------------------------------------------------------------- -# - -# -#----------------------------------------------------------------------- -# -# Save current shell options (in a global array). Then set new options -# for this script/function. -# -#----------------------------------------------------------------------- -# - { save_shell_opts; set -u +x; } > /dev/null 2>&1 -# -#----------------------------------------------------------------------- -# -# Get the full path to the file in which this script/function is located -# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in -# which the file is located (scrfunc_dir). -# -#----------------------------------------------------------------------- -# - local scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) - local scrfunc_fn=$( basename "${scrfunc_fp}" ) - local scrfunc_dir=$( dirname "${scrfunc_fp}" ) -# -#----------------------------------------------------------------------- -# -# Get the name of this function. -# -#----------------------------------------------------------------------- -# - local func_name="${FUNCNAME[0]}" -# -#----------------------------------------------------------------------- -# -# Specify the set of valid argument names for this script/function. Then -# process the arguments provided to this script/function (which should -# consist of a set of name-value pairs of the form arg1="value1", etc). -# -#----------------------------------------------------------------------- -# - local valid_args=( \ - "lhr_min" \ - "lhr_max" \ - "lhr_intvl" \ - "outvarname_lhrs_list_no_missing" \ - ) - process_args valid_args "$@" -# -#----------------------------------------------------------------------- -# -# For debugging purposes, print out values of arguments passed to this -# script. Note that these will be printed out only if VERBOSE is set to -# TRUE. -# -#----------------------------------------------------------------------- -# - print_input_args valid_args -# -#----------------------------------------------------------------------- -# -# Declare local variables. -# -#----------------------------------------------------------------------- -# - local lhrs_array \ - lhrs_list -# -#----------------------------------------------------------------------- -# -# Create the array of lead hours. -# -#----------------------------------------------------------------------- -# - lhrs_array=($( seq ${lhr_min} ${lhr_intvl} ${lhr_max} )) - - # Express the array of lead hours as a (scalar) string containing a comma - # (and space) separated list of the elements of lhrs_array. - lhrs_list=$( printf "%s, " "${lhrs_array[@]}" ) - lhrs_list=$( echo "${lhrs_list}" | $SED "s/, $//g" ) -# -#----------------------------------------------------------------------- -# -# Set output variables. -# -#----------------------------------------------------------------------- -# - if [ ! -z "${outvarname_lhrs_list_no_missing}" ]; then - printf -v ${outvarname_lhrs_list_no_missing} "%s" "${lhrs_list}" - fi -# -#----------------------------------------------------------------------- -# -# Restore the shell options saved at the beginning of this script/function. -# -#----------------------------------------------------------------------- -# - { restore_shell_opts; } > /dev/null 2>&1 - -} - - -# -#----------------------------------------------------------------------- -# -# This function generates a list of lead hours (relative to an initial or -# reference time yyyymmddhh_init) such that for each such hour, there -# exists a corresponding data file with a name of the form specified by -# the template fn_template. Depending on fn_template, this file may -# contain forecast or observation data. -# -#----------------------------------------------------------------------- -# -function set_leadhrs() { -# -#----------------------------------------------------------------------- -# -# Save current shell options (in a global array). Then set new options -# for this script/function. -# -#----------------------------------------------------------------------- -# - { save_shell_opts; set -u +x; } > /dev/null 2>&1 -# -#----------------------------------------------------------------------- -# -# Get the full path to the file in which this script/function is located -# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in -# which the file is located (scrfunc_dir). -# -#----------------------------------------------------------------------- -# - local scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) - local scrfunc_fn=$( basename "${scrfunc_fp}" ) - local scrfunc_dir=$( dirname "${scrfunc_fp}" ) -# -#----------------------------------------------------------------------- -# -# Get the name of this function. -# -#----------------------------------------------------------------------- -# - local func_name="${FUNCNAME[0]}" -# -#----------------------------------------------------------------------- -# -# Specify the set of valid argument names for this script/function. Then -# process the arguments provided to this script/function (which should -# consist of a set of name-value pairs of the form arg1="value1", etc). -# -#----------------------------------------------------------------------- -# - local valid_args=( \ - "yyyymmddhh_init" \ - "lhr_min" \ - "lhr_max" \ - "lhr_intvl" \ - "base_dir" \ - "fn_template" \ - "num_missing_files_max" \ - "outvarname_lhrs_list" \ - ) - process_args valid_args "$@" -# -#----------------------------------------------------------------------- -# -# For debugging purposes, print out values of arguments passed to this -# script. Note that these will be printed out only if VERBOSE is set to -# TRUE. -# -#----------------------------------------------------------------------- -# - print_input_args valid_args -# -#----------------------------------------------------------------------- -# -# Declare local variables. -# -#----------------------------------------------------------------------- -# - local crnt_tmpl \ - crnt_tmpl_esc \ - fn \ - fp \ - i \ - lhr \ - lhrs_array \ - lhrs_list \ - num_hrs \ - num_missing_files \ - remainder \ - skip_this_hour -# -#----------------------------------------------------------------------- -# -# For the specified field, generate the set of lead hours at which -# verification will be performed under the assumption that for each such -# hour, the corresponding or observation file exists. Thus, this set is -# an initial guess for the lead hours at which vx will be performed. -# -#----------------------------------------------------------------------- -# - set_leadhrs_no_missing \ - lhr_min="${lhr_min}" \ - lhr_max="${lhr_max}" \ - lhr_intvl="${lhr_intvl}" \ - outvarname_lhrs_list_no_missing="lhrs_list_no_missing" - - # For convenience, save the scalar variable lhrs_list_no_missing to a - # bash array. - lhrs_array=($( printf "%s" "${lhrs_list_no_missing}" | $SED "s/,//g" )) - - print_info_msg "$VERBOSE" "\ -Initial (i.e. before filtering for missing files) set of lead hours -(relative to ${yyyymmddhh_init}) is: - lhrs_array = ( $( printf "\"%s\" " "${lhrs_array[@]}" )) -" -# -#----------------------------------------------------------------------- -# -# Loop through the array of lead hours generated above and construct the -# variable lhrs_list that will be scalar (string) containing a comma- -# separated list of hours for which corresponding forecast or observation -# files have been confirmed to exist. Also, use the variable -# num_missing_files to keep track of the number of files that are missing. -# -#----------------------------------------------------------------------- -# - lhrs_list="" - num_missing_files="0" - num_hrs=${#lhrs_array[@]} - for (( i=0; i<${num_hrs}; i++ )); do - - lhr="${lhrs_array[$i]}" - skip_this_hour="FALSE" -# -# Evaluate the METplus file name template containing METplus timestrings -# for the specified yyyymmddhh_init and current hour (lhr) to obtain the -# name of the current file (including possibly a relative directory). -# - eval_METplus_timestr_tmpl \ - init_time="${yyyymmddhh_init}" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${fn_template}" \ - outvarname_evaluated_timestr="fn" -# -# Get the full path to the file and check if it exists. -# - fp="${base_dir}/${fn}" - if [ -f "${fp}" ]; then - print_info_msg "\ -Found file (fp) for lead hour ${lhr} (relative to ${yyyymmddhh_init}): - fp = \"${fp}\" -" - else - skip_this_hour="TRUE" - num_missing_files=$(( ${num_missing_files} + 1 )) - print_info_msg "\ -The file (fp) for lead hour ${lhr} (relative to ${yyyymmddhh_init}) is MISSING: - fp = \"${fp}\" -Excluding this hour from the list of lead hours to return. -" - break - fi - - if [[ ! $(boolify "${skip_this_hour}") == "TRUE" ]]; then - lhrs_list="${lhrs_list},${lhr}" - fi - - done -# -# Remove leading comma from lhrs_list. -# - lhrs_list=$( echo "${lhrs_list}" | $SED "s/^,//g" ) - print_info_msg "$VERBOSE" "\ -Final (i.e. after filtering for missing files) set of lead hours relative -to ${yyyymmddhh_init} (saved in a scalar string variable) is: - lhrs_list = \"${lhrs_list}\" -" -# -#----------------------------------------------------------------------- -# -# If the number of missing files is greater than the maximum allowed -# (specified by num_missing_files_max), print out an error message and -# exit. -# -#----------------------------------------------------------------------- -# - if [ "${num_missing_files}" -gt "${num_missing_files_max}" ]; then - print_err_msg_exit "\ -The number of missing files (num_missing_files) is greater than the -maximum allowed number (num_missing_files_max): - num_missing_files = ${num_missing_files} - num_missing_files_max = ${num_missing_files_max}" - fi -# -#----------------------------------------------------------------------- -# -# Set output variables. -# -#----------------------------------------------------------------------- -# - if [ ! -z "${outvarname_lhrs_list}" ]; then - printf -v ${outvarname_lhrs_list} "%s" "${lhrs_list}" - fi -# -#----------------------------------------------------------------------- -# -# Restore the shell options saved at the beginning of this script/function. -# -#----------------------------------------------------------------------- -# - { restore_shell_opts; } > /dev/null 2>&1 - -} diff --git a/ush/source_util_funcs.sh b/ush/source_util_funcs.sh index 9feceaf68e..266975e97d 100644 --- a/ush/source_util_funcs.sh +++ b/ush/source_util_funcs.sh @@ -214,16 +214,6 @@ function source_util_funcs() { # #----------------------------------------------------------------------- # -# Source the file containing the function that evaluates a METplus time -# string template. -# -#----------------------------------------------------------------------- -# - . ${bashutils_dir}/eval_METplus_timestr_tmpl.sh - -# -#----------------------------------------------------------------------- -# # Source the file that sources YAML files as if they were bash # #----------------------------------------------------------------------- From f3d21bdbc438f2fd2aba67539a12291dea84767f Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 28 Oct 2024 12:24:22 -0600 Subject: [PATCH 172/260] Minor modifications to Mike's PR changes. --- ...exregional_run_met_genensprod_or_ensemblestat.sh | 6 ++---- .../exregional_run_met_gridstat_or_pointstat_vx.sh | 3 +-- ...onal_run_met_gridstat_or_pointstat_vx_ensmean.sh | 3 +-- ...onal_run_met_gridstat_or_pointstat_vx_ensprob.sh | 3 +-- scripts/exregional_run_met_pb2nc_obs.sh | 13 ++++--------- scripts/exregional_run_met_pcpcombine.sh | 5 ++--- ush/eval_metplus_timestr_tmpl.py | 12 ++++++------ ush/set_leadhrs.py | 4 ++-- 8 files changed, 19 insertions(+), 30 deletions(-) diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 2ff346442a..934ba63283 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -231,14 +231,13 @@ case "$OBTYPE" in esac vx_hr_end="${FCST_LEN_HRS}" -set -x if [ "${MetplusToolName}" = "GenEnsProd" ]; then VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ --lhr_min="${vx_hr_start}" \ --lhr_max="${vx_hr_end}" \ --lhr_intvl="${vx_intvl}" \ --skip_check_files ) || \ -print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" + print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" elif [ "${MetplusToolName}" = "EnsembleStat" ]; then VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ @@ -250,9 +249,8 @@ elif [ "${MetplusToolName}" = "EnsembleStat" ]; then --fn_template="${OBS_INPUT_FN_TEMPLATE}" \ --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ --time_lag="${time_lag%.*}" ) || \ -print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" + print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" fi -echo "VX_LEADHR_LIST=$VX_LEADHR_LIST" # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index 1f4b65a7c9..6200b0ba7e 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -232,7 +232,6 @@ case "$OBTYPE" in esac vx_hr_end="${FCST_LEN_HRS}" -set -x VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ --date_init="${CDATE}" \ --lhr_min="${vx_hr_start}" \ @@ -242,7 +241,7 @@ VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ --fn_template="${OBS_INPUT_FN_TEMPLATE}" \ --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ --time_lag="${time_lag%.*}") || \ -print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" + print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index 6c6be42a52..0bfcff36d6 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -175,7 +175,6 @@ case "$OBTYPE" in esac vx_hr_end="${FCST_LEN_HRS}" -set -x VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ --date_init="${CDATE}" \ --lhr_min="${vx_hr_start}" \ @@ -184,7 +183,7 @@ VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ --base_dir="${OBS_INPUT_DIR}" \ --fn_template="${OBS_INPUT_FN_TEMPLATE}" \ --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" ) || \ -print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" + print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" # #----------------------------------------------------------------------- diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 3fe23d7510..0e8d44578c 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -174,7 +174,6 @@ case "$OBTYPE" in esac vx_hr_end="${FCST_LEN_HRS}" -set -x VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ --date_init="${CDATE}" \ --lhr_min="${vx_hr_start}" \ @@ -183,7 +182,7 @@ VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ --base_dir="${OBS_INPUT_DIR}" \ --fn_template="${OBS_INPUT_FN_TEMPLATE}" \ --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" ) || \ -print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" + print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" # #----------------------------------------------------------------------- diff --git a/scripts/exregional_run_met_pb2nc_obs.sh b/scripts/exregional_run_met_pb2nc_obs.sh index afe91f14a1..3e6631cd1d 100755 --- a/scripts/exregional_run_met_pb2nc_obs.sh +++ b/scripts/exregional_run_met_pb2nc_obs.sh @@ -160,17 +160,12 @@ for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do # create. sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) - eval_METplus_timestr_tmpl \ - init_time="${yyyymmdd_task}00" \ - fhr="${lhr}" \ - METplus_timestr_tmpl="${OBS_DIR}/${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE}" \ - outvarname_evaluated_timestr="fp" - fp=$( python3 $USHdir/run_eval_metplus_timestr_tmpl.py \ + fp=$( python3 $USHdir/eval_metplus_timestr_tmpl.py \ --init_time="${yyyymmdd_task}00" \ - --fhr="${lhr}" \ - --fn_template="${OBS_DIR}/${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE}") || \ -print_err_msg_exit "Call to run_eval_metplus_timestr_tmpl.py failed with return code: $?" + --lhr="${lhr}" \ + --fn_template="${OBS_DIR}/${OBS_NDAS_FN_TEMPLATES[1]}") || \ + print_err_msg_exit "Call to eval_metplus_timestr_tmpl.py failed with return code: $?" if [[ -f "${fp}" ]]; then print_info_msg " diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 9a1eb33bc8..c60ac30e36 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -211,14 +211,13 @@ fi # #----------------------------------------------------------------------- # -set -x vx_intvl="$((10#${ACCUM_HH}))" VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ --lhr_min="${vx_intvl}" \ --lhr_max="${FCST_LEN_HRS}" \ --lhr_intvl="${vx_intvl}" \ --skip_check_files ) || \ -print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" + print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" # #----------------------------------------------------------------------- # @@ -260,7 +259,7 @@ accumulation ending at lead hour ${hr_end} (relative to ${CDATE})... --fn_template="${fn_template}" \ --num_missing_files_max="${num_missing_files_max}" \ --time_lag="${time_lag%.*}" || \ -print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" + print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" done print_info_msg " diff --git a/ush/eval_metplus_timestr_tmpl.py b/ush/eval_metplus_timestr_tmpl.py index 259531ea8d..edbe0e7012 100644 --- a/ush/eval_metplus_timestr_tmpl.py +++ b/ush/eval_metplus_timestr_tmpl.py @@ -11,14 +11,14 @@ raise from metplus.util import string_template_substitution as sts -def eval_tmpl(init_time, fhr, time_lag, fn_template, verbose=False): +def eval_metplus_timestr_tmpl(init_time, lhr, time_lag, fn_template, verbose=False): """ Calls native METplus routine for evaluating filename templates Args: init_time (str): Date string for initial time in YYYYMMDD[mmss] format, where minutes and seconds are optional. - fhr (int): Forecast hour (number of hours since init_time) + lhr (int): Lead hour (number of hours since init_time) time_lag (int): Hours of time lag for a time-lagged ensemble member fn_template (str): The METplus filename template for finding the files verbose (bool): By default this script only outputs the list of forecast hours @@ -35,8 +35,8 @@ def eval_tmpl(init_time, fhr, time_lag, fn_template, verbose=False): else: raise ValueError(f"Invalid {init_time=}; must be 10, 12, or 14 characters in length") - validdate=initdate + timedelta(hours=fhr) - leadsec=fhr*3600 + validdate=initdate + timedelta(hours=lhr) + leadsec=lhr*3600 # Evaluate the METplus timestring template for the current lead hour if verbose: print("Resolving METplus template for:") @@ -52,12 +52,12 @@ def eval_tmpl(init_time, fhr, time_lag, fn_template, verbose=False): ) parser.add_argument("-v", "--verbose", help="Verbose output", action="store_true") parser.add_argument("-i", "--init_time", help="Initial date in YYYYMMDDHH[mmss] format", type=str, default='') - parser.add_argument("-f", "--fhr", help="Forecast hour", type=int, required=True) + parser.add_argument("-f", "--lhr", help="Forecast hour", type=int, required=True) parser.add_argument("-tl", "--time_lag", help="Hours of time lag for a time-lagged ensemble member", type=int, default=0) parser.add_argument("-ft", "--fn_template", help="Template for file names to search; see ??? for details on template settings", type=str, default='') args = parser.parse_args() - filename = eval_tmpl(**vars(args)) + filename = eval_metplus_timestr_tmpl(**vars(args)) # If called from command line, we want to print the resolved filename print(filename) diff --git a/ush/set_leadhrs.py b/ush/set_leadhrs.py index 64d483f652..3256297af2 100644 --- a/ush/set_leadhrs.py +++ b/ush/set_leadhrs.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 import argparse import os -from run_eval_metplus_timestr_tmpl import eval_tmpl +from eval_metplus_timestr_tmpl import eval_metplus_timestr_tmpl def set_leadhrs(date_init, lhr_min, lhr_max, lhr_intvl, base_dir, time_lag, fn_template, num_missing_files_max, skip_check_files=False, verbose=False): @@ -42,7 +42,7 @@ def set_leadhrs(date_init, lhr_min, lhr_max, lhr_intvl, base_dir, time_lag, fn_t for lhr in lhrs_list: # Evaluate the METplus timestring template for the current lead hour - fn = eval_tmpl(date_init, lhr, time_lag, fn_template, verbose=False) + fn = eval_metplus_timestr_tmpl(date_init, lhr, time_lag, fn_template, verbose=False) # Get the full path and check if the file exists fp = os.path.join(base_dir, fn) From 879fd98d92b746c9d4b790eed7c176d15a665c81 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 28 Oct 2024 13:43:48 -0600 Subject: [PATCH 173/260] Bug fixes to the merge. --- ush/get_obs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ush/get_obs.py b/ush/get_obs.py index c6f8732be3..361426c16c 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -469,7 +469,7 @@ def get_obs(config, obtype, yyyymmdd_task): leadtime = yyyymmddhh - yyyymmdd_task # Call METplus subroutine to evaluate the template for the full path to # the file containing METplus timestrings at the current time. - fn = sts.do_string_sub(tmpl=fp_proc_templ,init=yyyymmdd_task,valid=yyyymmddhh, + fn = sts.do_string_sub(tmpl=fp_proc_tmpl,init=yyyymmdd_task,valid=yyyymmddhh, lead=leadtime.total_seconds()) all_fp_proc_dict[fg].append(fn) @@ -796,7 +796,7 @@ def get_obs(config, obtype, yyyymmdd_task): mrms_pull_topofhour(valid_time=yyyymmddhh_str, source=basedir_raw, outdir=os.path.join(basedir_raw, 'topofhour'), - product=fields_in_filenames[i], + product=mrms_fields_in_obs_filenames[i], add_vdate_subdir=False) # The raw file name needs to be the same as what the retrieve_data.py From 476eb15be578f92de7fe4bc1e314406da88ff7a3 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 29 Oct 2024 10:41:03 -0600 Subject: [PATCH 174/260] First shot at modifications to enable variable forecast output interval in the verification. --- ush/set_cycle_and_obs_timeinfo.py | 295 +++++++++++++++++++++--------- 1 file changed, 211 insertions(+), 84 deletions(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index 2130ad99ea..36c20e126c 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -624,97 +624,224 @@ def get_obs_retrieve_times_by_day( # Get list of field groups to be verified. vx_field_groups = vx_config['VX_FIELD_GROUPS'] - # Define dictionary containing information about all field groups that - # can possibly be verified. This information includes their temporal - # characteristics (cumulative vs. instantaneous) and the mapping between - # the observation type and the field group. - vx_field_info = {'cumul': [{'obtype': 'CCPA', 'field_groups': ['APCP']}, - {'obtype': 'NOHRSC', 'field_groups': ['ASNOW']}], - 'inst': [{'obtype': 'MRMS', 'field_groups': ['REFC', 'RETOP']}, - {'obtype': 'NDAS', 'field_groups': ['ADPSFC', 'ADPUPA']}] - } - - # Keep only those items in the dictionary vx_field_info defined above that - # have field groups that appear in the list of field groups to verify. - for obs_time_type, obtypes_to_field_groups_dict_list in vx_field_info.copy().items(): - for obtypes_to_field_groups_dict in obtypes_to_field_groups_dict_list.copy(): - obtype = obtypes_to_field_groups_dict['obtype'] - field_groups = obtypes_to_field_groups_dict['field_groups'] - field_groups = [fg for fg in field_groups if fg in vx_field_groups] - obtypes_to_field_groups_dict['field_groups'] = field_groups - if not field_groups: obtypes_to_field_groups_dict_list.remove(obtypes_to_field_groups_dict) - if not obtypes_to_field_groups_dict_list: vx_field_info.pop(obs_time_type) - - # Create dictionary containing the temporal characteristics as keys and - # a string list of obs types to verify as the values. - obs_time_type_to_obtypes_dict = dict() - for obs_time_type, obtypes_to_field_groups_dict_list in vx_field_info.items(): - obtype_list = [a_dict['obtype'] for a_dict in obtypes_to_field_groups_dict_list] - obs_time_type_to_obtypes_dict[obs_time_type] = obtype_list - - # Initialize the return variable. - obs_retrieve_times_by_day = dict() - - # Define timedelta object representing a single day. + # Define a list of dictionaries containing information about all the obs + # types that can possibly be used for verification in the SRW App. Each + # dictionary in the list contains the name of the obs type, the temporal + # nature of that obs type (i.e. whether the obs type contains cumulative + # or instantaneous fields), and a list of the field groups that the obs + # type may be used to verify. + all_obs_info \ + = [{'obtype': 'CCPA', 'time_type': 'cumul', 'field_groups': ['APCP']}, + {'obtype': 'NOHRSC', 'time_type': 'cumul', 'field_groups': ['ASNOW']}, + {'obtype': 'MRMS', 'time_type': 'inst', 'field_groups': ['REFC', 'RETOP']}, + {'obtype': 'NDAS', 'time_type': 'inst', 'field_groups': ['ADPSFC', 'ADPUPA']} + ] + + # Create new list that has the same form as the list of dictionaries + # defined above but contains only those obs types that have at least one + # field group that appears in the list of field groups to verify. Note + # that for those obs types that are retained in the list, the field groups + # that will not be verified are discarded. + obs_info = [] + for obs_dict in all_obs_info.copy(): + obtype = obs_dict['obtype'] + field_groups = obs_dict['field_groups'] + field_groups = [field for field in field_groups if field in vx_field_groups] + obs_dict = obs_dict.copy() + obs_dict['field_groups'] = field_groups + if field_groups: obs_info.append(obs_dict) + + # For convenience, define timedelta object representing a single day. one_day = timedelta(days=1) - # Loop over all obs types to be verified (by looping over the temporal - # type and the specific obs under that type). For each obs type, loop - # over each obs day and find the times within that that at which the obs - # need to be retrieved. - for obs_time_type, obtypes in obs_time_type_to_obtypes_dict.items(): + # Generate a dictionary (of dictionaries) that, for each obs type to be + # used in the vx and for each day for which there is forecast output, + # will contain the times at which verification will be performed, i.e. + # the times at which the forecast output will be compared to observations. + # We refer to these times as the vx comparison times. + vx_compare_times_by_day = dict() + for obs_dict in obs_info: + + obtype = obs_dict['obtype'] + obs_time_type = obs_dict['time_type'] fcst_output_times_all_cycles_crnt_ttype = fcst_output_times_all_cycles[obs_time_type] obs_days_all_cycles_crnt_ttype = obs_days_all_cycles[obs_time_type] - for obtype in obtypes: + vx_compare_times_by_day[obtype] = dict() - obs_retrieve_times_by_day[obtype] = dict() + # Get the availability interval for the current observation type from the + # verification configuration dictionary. Then make sure it divides evenly + # into 24. + config_var_name = "".join([obtype, "_OBS_AVAIL_INTVL_HRS"]) + obs_avail_intvl_hrs = vx_config[config_var_name] + remainder = 24 % obs_avail_intvl_hrs + if remainder != 0: + msg = dedent(f""" + The obs availability interval for obs of type {obtype} must divide evenly + into 24 but doesn't: + obs_avail_intvl_hrs = {obs_avail_intvl_hrs} + 24 % obs_avail_intvl_hrs = {remainder}" + """) + logging.error(msg) + raise Exception(msg) + obs_avail_intvl = timedelta(hours=obs_avail_intvl_hrs) + num_obs_avail_times_per_day = int(24/obs_avail_intvl_hrs) + + # Loop over all obs days over all cycles (for the current obs type). For + # each such day, get the list forecast output times and the list of obs + # availability times. Finally, set the times (on that day) that verification + # will be performed to the intersection of these two lists. + for obs_day in obs_days_all_cycles_crnt_ttype: + + next_day = obs_day + one_day + if obs_time_type == "cumul": + fcst_output_times_crnt_day \ + = [time for time in fcst_output_times_all_cycles_crnt_ttype if obs_day < time <= next_day] + elif obs_time_type == "inst": + fcst_output_times_crnt_day \ + = [time for time in fcst_output_times_all_cycles_crnt_ttype if obs_day <= time < next_day] + fcst_output_times_crnt_day = [datetime.strftime(time, "%Y%m%d%H") for time in fcst_output_times_crnt_day] + + if obs_time_type == "cumul": + obs_avail_times_crnt_day \ + = [obs_day + (i+1)*obs_avail_intvl for i in range(0,num_obs_avail_times_per_day)] + elif obs_time_type == "inst": + obs_avail_times_crnt_day \ + = [obs_day + i*obs_avail_intvl for i in range(0,num_obs_avail_times_per_day)] + obs_avail_times_crnt_day = [datetime.strftime(time, "%Y%m%d%H") for time in obs_avail_times_crnt_day] + + vx_compare_times_crnt_day = list(set(fcst_output_times_crnt_day) & set(obs_avail_times_crnt_day)) + vx_compare_times_crnt_day.sort() + + obs_day_str = datetime.strftime(obs_day, "%Y%m%d") + vx_compare_times_by_day[obtype][obs_day_str] = vx_compare_times_crnt_day + + # For each obs type to be used in the vx and for each day for which there + # is forecast output, calculate the times at which obs need to be retrieved. + # For instantaneous fields, the obs retrieval times are the same as the + # times at which vx will be performed. For cumulative fields, each field + # value needs to be constructed by adding values from previous times. For + # example, if we're verifying 6-hourly precipitation and the obs availability + # interval for precip obs (CCPA) is 1 hour, then the 6-hourly values must + # be built by adding the 1-hour values. Thus, this requires obs at every + # hour, not just every 6 hours. + # + # First, initialze the dictionary (of dictionaries) that will contain the + # obs retreival times (for all obs types and each day for which there is + # forecast output), and set the values for instantaneous obs to the vx + # comparison times calculated above. + obs_retrieve_times_by_day = dict() + for obs_dict in obs_info: + obtype = obs_dict['obtype'] + obs_time_type = obs_dict['time_type'] + if obs_time_type == 'inst': + obs_retrieve_times_by_day[obtype] = vx_compare_times_by_day[obtype] + + # Next, calculate the obs retrieval times for cumulative fields. We want + # these times grouped into days because the get_obs workflow tasks that + # will use this information are day-based (i.e. each task will get obs + # for a single day). However, it is easier to first calculate these + # times as a single group over all cycles. We do this next. + obs_retrieve_times_all_cycles = dict() + for obs_dict in obs_info: + + obtype = obs_dict['obtype'] + obs_time_type = obs_dict['time_type'] + field_groups = obs_dict['field_groups'] + + # Consider only cumulative fields. + if obs_time_type != 'cumul': + continue - # Get the availability interval for the current observation type from the - # verification configuration dictionary. Then make sure it divides evenly - # into 24. - config_var_name = "".join([obtype, "_OBS_AVAIL_INTVL_HRS"]) - obs_avail_intvl_hrs = vx_config[config_var_name] - remainder = 24 % obs_avail_intvl_hrs - if remainder != 0: - msg = dedent(f""" - The obs availability interval for obs of type {obtype} must divide evenly - into 24 but doesn't: - {obs_avail_intvl_hrs = } - 24 % obs_avail_intvl_hrs = {remainder}" - """) - raise ValueError(msg) - obs_avail_intvl = timedelta(hours=obs_avail_intvl_hrs) - num_obs_avail_times_per_day = int(24/obs_avail_intvl_hrs) - - # Loop over all obs days over all cycles (for the current obs type). For - # each such day, get the list forecast output times and the list of obs - # availability times. Finally, set the times (on that day) that obs need - # to be retrieved to the intersection of these two lists. - for obs_day in obs_days_all_cycles_crnt_ttype: - - next_day = obs_day + one_day - if obs_time_type == "cumul": - fcst_output_times_crnt_day \ - = [time for time in fcst_output_times_all_cycles_crnt_ttype if obs_day < time <= next_day] - elif obs_time_type == "inst": - fcst_output_times_crnt_day \ - = [time for time in fcst_output_times_all_cycles_crnt_ttype if obs_day <= time < next_day] - fcst_output_times_crnt_day = [datetime.strftime(time, "%Y%m%d%H") for time in fcst_output_times_crnt_day] - - if obs_time_type == "cumul": - obs_avail_times_crnt_day \ - = [obs_day + (i+1)*obs_avail_intvl for i in range(0,num_obs_avail_times_per_day)] - elif obs_time_type == "inst": - obs_avail_times_crnt_day \ - = [obs_day + i*obs_avail_intvl for i in range(0,num_obs_avail_times_per_day)] - obs_avail_times_crnt_day = [datetime.strftime(time, "%Y%m%d%H") for time in obs_avail_times_crnt_day] - - obs_retrieve_times_crnt_day = list(set(fcst_output_times_crnt_day) & set(obs_avail_times_crnt_day)) - obs_retrieve_times_crnt_day.sort() - - obs_day_str = datetime.strftime(obs_day, "%Y%m%d") - obs_retrieve_times_by_day[obtype][obs_day_str] = obs_retrieve_times_crnt_day + # Initialize the set that will contain the obs retrieval times over all + # cycles. + obs_retrieve_times_all_cycles[obtype] = set() + + # Get the availability interval for the current observation type from the + # verification configuration dictionary. + config_var_name = "".join([obtype, "_OBS_AVAIL_INTVL_HRS"]) + obs_avail_intvl_hrs = vx_config[config_var_name] + obs_avail_intvl = timedelta(hours=obs_avail_intvl_hrs) + + # Consider all field groups to be verified for the current obs type. + for fg in field_groups: + + # Get the list of accumulation intervals for the current cumulative obs + # type and field group combination. + accum_intvls_array_name = "".join(["VX_", fg, "_ACCUMS_HRS"]) + accum_intvls_hrs = vx_config[accum_intvls_array_name] + + for cycle_start_time in cycle_start_times: + + # Loop through the accumulation intervals for this obs type and field + # group combination. + for accum_intvl_hrs in accum_intvls_hrs: + accum_intvl = timedelta(hours=accum_intvl_hrs) + # Get the number of accumulation intervals that fits in the duration of + # the forecast. Note that the accumulation interval doesn't necessarily + # have to evenly divide the forecast duration; we simply drop any fractional + # accumulation intervals by rounding down to the nearest integer. + num_accum_intvls_in_fcst = int(fcst_len/accum_intvl) + # Calulate the times at which the current cumulative obs field will be + # compared to the forecast field(s) in the corresponding cumulative field + # group (for the current accumulation interval). + vx_compare_times_crnt_cycl = [cycle_start_time + (i+1)*accum_intvl + for i in range(0,num_accum_intvls_in_fcst)] + # For each such comparison time, get the times at which obs are needed + # to form that accumulation. For example, if the current accumulation + # interval is 6 hours and the obs are available every hour, then the + # times at which obs are needed will be the comparison time as well as + # the five hours preceeding it. Then put all such times over all vx + # comparison times within all cycles into a single array of times (which + # is stored in the dictionary obs_retrieve_times_all_cycles). + for vx_compare_time in vx_compare_times_crnt_cycl: + remainder = accum_intvl_hrs % obs_avail_intvl_hrs + if remainder != 0: + msg = dedent(f""" + The obs availability interval for obs of type {obtype} must divide evenly + into the current accumulation interval (accum_intvl) but doesn't: + accum_intvl_hrs = {accum_intvl_hrs} + obs_avail_intvl_hrs = {obs_avail_intvl_hrs} + accum_intvl_hrs % obs_avail_intvl_hrs = {remainder}" + """) + logging.error(msg) + raise Exception(msg) + num_obs_avail_times_in_accum_intvl = int(accum_intvl/obs_avail_intvl) + obs_retrieve_times_crnt_accum_intvl \ + = [vx_compare_time - i*obs_avail_intvl \ + for i in range(0,num_obs_avail_times_in_accum_intvl)] + obs_retrieve_times_all_cycles[obtype] \ + = obs_retrieve_times_all_cycles[obtype] | set(obs_retrieve_times_crnt_accum_intvl) + + # Convert the final set of obs retrieval times for the current obs type + # to a sorted list. Note that the sorted() function will convert a set + # to a sorted list (a set itself cannot be sorted). + obs_retrieve_times_all_cycles[obtype] = sorted(obs_retrieve_times_all_cycles[obtype]) + + # Now that the obs retrival times for cumulative fields have been obtained + # but grouped by cycle start date, regroup them by day and save results + # in obs_retrieve_times_by_day. + for obs_dict in obs_info: + + obtype = obs_dict['obtype'] + obs_time_type = obs_dict['time_type'] + + # Consider only cumulative obs/fields. + if obs_time_type != 'cumul': + continue + + # Initialize variables before looping over obs days. + obs_retrieve_times_by_day[obtype] = dict() + obs_days_all_cycles_crnt_ttype = obs_days_all_cycles[obs_time_type] + obs_retrieve_times_all_cycles_crnt_obtype = obs_retrieve_times_all_cycles[obtype] + + for obs_day in obs_days_all_cycles_crnt_ttype: + next_day = obs_day + one_day + obs_retrieve_times_crnt_day \ + = [time for time in obs_retrieve_times_all_cycles_crnt_obtype if obs_day < time <= next_day] + obs_retrieve_times_crnt_day = [datetime.strftime(time, "%Y%m%d%H") for time in obs_retrieve_times_crnt_day] + obs_day_str = datetime.strftime(obs_day, "%Y%m%d") + obs_retrieve_times_by_day[obtype][obs_day_str] = obs_retrieve_times_crnt_day return obs_retrieve_times_by_day From fcc7b1b26ed0d854ef79ad074e8295f3f003ef46 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 29 Oct 2024 12:57:46 -0600 Subject: [PATCH 175/260] Introduce flag that specifies whether or not vx tasks are being run in the workflow; run checks on and/or adjustments to vx parameters only if this flag is true. --- ush/setup.py | 434 +++++++++++++++++++++++++++------------------------ 1 file changed, 229 insertions(+), 205 deletions(-) diff --git a/ush/setup.py b/ush/setup.py index a4bcab3b32..01db240362 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -61,7 +61,9 @@ def load_config_for_setup(ushdir, default_config, user_config): ``config.yaml``) Returns: - None + cfg_d (dict): Experiment configuration dictionary based on default, + machine, and user config files + do_vx (bool): Flag specifying whether workflow will run vx tasks Raises: FileNotFoundError: If the user-provided configuration file or the machine file does not @@ -170,11 +172,13 @@ def load_config_for_setup(ushdir, default_config, user_config): if taskgroups: cfg_wflow['rocoto']['tasks']['taskgroups'] = taskgroups + # Save string specifying final workflow taskgroups for use later on. + taskgroups = cfg_wflow['rocoto']['tasks']['taskgroups'] + # Extend yaml here on just the rocoto section to include the # appropriate groups of tasks extend_yaml(cfg_wflow) - # Put the entries expanded under taskgroups in tasks rocoto_tasks = cfg_wflow["rocoto"]["tasks"] cfg_wflow["rocoto"]["tasks"] = yaml.load(rocoto_tasks.pop("taskgroups"),Loader=yaml.SafeLoader) @@ -244,40 +248,51 @@ def _add_jobname(tasks): # # ----------------------------------------------------------------------- # - # Ensure that the configuration parameters associated with cumulative - # fields (e.g. APCP) in the verification section of the experiment - # dicitonary are temporally consistent, e.g. that accumulation intervals - # are less than or equal to the forecast length. Update the verification - # section of the dictionary to remove inconsistencies. + # If the workflow includes at least one verification task, ensure that + # the configuration parameters associated with cumulative fields (e.g. + # APCP) in the verification section of the experiment dicitonary are + # temporally consistent, e.g. that accumulation intervals are less than + # or equal to the forecast length. Update the verification section of + # the dictionary to remove inconsistencies. # # ----------------------------------------------------------------------- # + # List containing the names of all workflow config files for vx (i.e. + # whether or not they're included in the workflow). + vx_taskgroup_fns = ['verify_pre.yaml', 'verify_det.yaml', 'verify_ens.yaml'] + # Flag that specifies whether the workflow will be running any vx tasks. + do_vx = any([fn for fn in vx_taskgroup_fns if fn in taskgroups]) + + # Initialize variable containing the vx configuration. This may be + # modified within the if-statement below. vx_config = cfg_d["verification"] - workflow_config = cfg_d["workflow"] - date_first_cycl = workflow_config.get("DATE_FIRST_CYCL") - date_last_cycl = workflow_config.get("DATE_LAST_CYCL") - incr_cycl_freq = int(workflow_config.get("INCR_CYCL_FREQ")) - fcst_len_hrs = workflow_config.get("FCST_LEN_HRS") - vx_fcst_output_intvl_hrs = vx_config.get("VX_FCST_OUTPUT_INTVL_HRS") + if do_vx: + workflow_config = cfg_d["workflow"] - # Convert various times and time intervals from integers or strings to - # datetime or timedelta objects. - date_first_cycl_dt = datetime.datetime.strptime(date_first_cycl, "%Y%m%d%H") - date_last_cycl_dt = datetime.datetime.strptime(date_last_cycl, "%Y%m%d%H") - cycl_intvl_dt = datetime.timedelta(hours=incr_cycl_freq) - fcst_len_dt = datetime.timedelta(hours=fcst_len_hrs) - vx_fcst_output_intvl_dt = datetime.timedelta(hours=vx_fcst_output_intvl_hrs) + date_first_cycl = workflow_config.get("DATE_FIRST_CYCL") + date_last_cycl = workflow_config.get("DATE_LAST_CYCL") + incr_cycl_freq = int(workflow_config.get("INCR_CYCL_FREQ")) + fcst_len_hrs = workflow_config.get("FCST_LEN_HRS") + vx_fcst_output_intvl_hrs = vx_config.get("VX_FCST_OUTPUT_INTVL_HRS") - # Generate a list containing the starting times of the cycles. - cycle_start_times \ - = set_cycle_dates(date_first_cycl_dt, date_last_cycl_dt, cycl_intvl_dt, - return_type='datetime') + # Convert various times and time intervals from integers or strings to + # datetime or timedelta objects. + date_first_cycl_dt = datetime.datetime.strptime(date_first_cycl, "%Y%m%d%H") + date_last_cycl_dt = datetime.datetime.strptime(date_last_cycl, "%Y%m%d%H") + cycl_intvl_dt = datetime.timedelta(hours=incr_cycl_freq) + fcst_len_dt = datetime.timedelta(hours=fcst_len_hrs) + vx_fcst_output_intvl_dt = datetime.timedelta(hours=vx_fcst_output_intvl_hrs) - # Call function that runs the consistency checks on the vx parameters. - vx_config, fcst_obs_matched_times_all_cycles_cumul \ - = check_temporal_consistency_cumul_fields( - vx_config, cycle_start_times, fcst_len_dt, vx_fcst_output_intvl_dt) + # Generate a list containing the starting times of the cycles. + cycle_start_times \ + = set_cycle_dates(date_first_cycl_dt, date_last_cycl_dt, cycl_intvl_dt, + return_type='datetime') + + # Call function that runs the consistency checks on the vx parameters. + vx_config, fcst_obs_matched_times_all_cycles_cumul \ + = check_temporal_consistency_cumul_fields( + vx_config, cycle_start_times, fcst_len_dt, vx_fcst_output_intvl_dt) cfg_d['verification'] = vx_config @@ -326,7 +341,7 @@ def _add_jobname(tasks): ) ) - return cfg_d + return cfg_d, do_vx def set_srw_paths(ushdir, expt_config): @@ -445,7 +460,7 @@ def setup(USHdir, user_config_fn="config.yaml", debug: bool = False): # user config files. default_config_fp = os.path.join(USHdir, "config_defaults.yaml") user_config_fp = os.path.join(USHdir, user_config_fn) - expt_config = load_config_for_setup(USHdir, default_config_fp, user_config_fp) + expt_config, do_vx = load_config_for_setup(USHdir, default_config_fp, user_config_fp) # Set up some paths relative to the SRW clone expt_config["user"].update(set_srw_paths(USHdir, expt_config)) @@ -609,192 +624,201 @@ def _remove_tag(tasks, tag): post_meta = rocoto_tasks.get("metatask_run_ens_post", {}) post_meta.pop("metatask_run_sub_hourly_post", None) post_meta.pop("metatask_sub_hourly_last_hour_post", None) - # - # ----------------------------------------------------------------------- - # - # Set some variables needed for running checks on and creating new - # (derived) configuration variables for the verification. - # - # ----------------------------------------------------------------------- - # - vx_config = expt_config["verification"] + date_first_cycl = workflow_config.get("DATE_FIRST_CYCL") date_last_cycl = workflow_config.get("DATE_LAST_CYCL") incr_cycl_freq = int(workflow_config.get("INCR_CYCL_FREQ")) - fcst_len_hrs = workflow_config.get("FCST_LEN_HRS") - vx_fcst_output_intvl_hrs = vx_config.get("VX_FCST_OUTPUT_INTVL_HRS") - - # To enable arithmetic with dates and times, convert various time - # intervals from integer to datetime.timedelta objects. cycl_intvl_dt = datetime.timedelta(hours=incr_cycl_freq) - fcst_len_dt = datetime.timedelta(hours=fcst_len_hrs) - vx_fcst_output_intvl_dt = datetime.timedelta(hours=vx_fcst_output_intvl_hrs) - # - # ----------------------------------------------------------------------- - # - # Generate a list containing the starting times of the cycles. This will - # be needed in checking that the hours-of-day of the forecast output match - # those of the observations. - # - # ----------------------------------------------------------------------- - # - cycle_start_times \ - = set_cycle_dates(date_first_cycl, date_last_cycl, cycl_intvl_dt, - return_type='datetime') - # - # ----------------------------------------------------------------------- - # - # Generate a list of forecast output times and a list of obs days (i.e. - # days on which observations are needed to perform verification because - # there is forecast output on those days) over all cycles, both for - # instantaneous fields (e.g. T2m, REFC, RETOP) and for cumulative ones - # (e.g. APCP). Then add these lists to the dictionary containing workflow - # configuration variables. These will be needed in generating the ROCOTO - # XML. # # ----------------------------------------------------------------------- # - fcst_output_times_all_cycles, obs_days_all_cycles, \ - = set_fcst_output_times_and_obs_days_all_cycles( - cycle_start_times, fcst_len_dt, vx_fcst_output_intvl_dt) - - workflow_config['OBS_DAYS_ALL_CYCLES_INST'] = obs_days_all_cycles['inst'] - workflow_config['OBS_DAYS_ALL_CYCLES_CUMUL'] = obs_days_all_cycles['cumul'] - # - # ----------------------------------------------------------------------- - # - # Generate lists of ROCOTO cycledef strings corresonding to the obs days - # for instantaneous fields and those for cumulative ones. Then save the - # lists of cycledefs in the dictionary containing values needed to - # construct the ROCOTO XML. - # - # ----------------------------------------------------------------------- - # - cycledefs_obs_days_inst = set_rocoto_cycledefs_for_obs_days(obs_days_all_cycles['inst']) - cycledefs_obs_days_cumul = set_rocoto_cycledefs_for_obs_days(obs_days_all_cycles['cumul']) - - rocoto_config['cycledefs']['cycledefs_obs_days_inst'] = cycledefs_obs_days_inst - rocoto_config['cycledefs']['cycledefs_obs_days_cumul'] = cycledefs_obs_days_cumul - # - # ----------------------------------------------------------------------- - # - # Generate dictionary of dictionaries that, for each combination of obs - # type needed and obs day, contains a string list of the times at which - # that type of observation is needed on that day. The elements of each - # list are formatted as 'YYYYMMDDHH'. This information is used by the - # day-based get_obs tasks in the workflow to get obs only at those times - # at which they are needed (as opposed to for the whole day). - # - # ----------------------------------------------------------------------- - # - vx_config = expt_config["verification"] - obs_retrieve_times_by_day \ - = get_obs_retrieve_times_by_day( - vx_config, cycle_start_times, fcst_len_dt, - fcst_output_times_all_cycles, obs_days_all_cycles) - - for obtype, obs_days_dict in obs_retrieve_times_by_day.items(): - for obs_day, obs_retrieve_times in obs_days_dict.items(): - array_name = '_'.join(["OBS_RETRIEVE_TIMES", obtype, obs_day]) - vx_config[array_name] = obs_retrieve_times - expt_config["verification"] = vx_config - # - # ----------------------------------------------------------------------- - # - # Remove all verification (meta)tasks for which no fields are specified. - # - # ----------------------------------------------------------------------- - # - vx_field_groups_all_by_obtype = {} - vx_metatasks_all_by_obtype = {} - - vx_field_groups_all_by_obtype["CCPA"] = ["APCP"] - vx_metatasks_all_by_obtype["CCPA"] \ - = ["task_get_obs_ccpa", - "metatask_PcpCombine_obs_APCP_all_accums_CCPA", - "metatask_PcpCombine_fcst_APCP_all_accums_all_mems", - "metatask_GridStat_CCPA_all_accums_all_mems", - "metatask_GenEnsProd_EnsembleStat_CCPA", - "metatask_GridStat_CCPA_ensmeanprob_all_accums"] - - vx_field_groups_all_by_obtype["NOHRSC"] = ["ASNOW"] - vx_metatasks_all_by_obtype["NOHRSC"] \ - = ["task_get_obs_nohrsc", - "metatask_PcpCombine_obs_ASNOW_all_accums_NOHRSC", - "metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems", - "metatask_GridStat_NOHRSC_all_accums_all_mems", - "metatask_GenEnsProd_EnsembleStat_NOHRSC", - "metatask_GridStat_NOHRSC_ensmeanprob_all_accums"] - - vx_field_groups_all_by_obtype["MRMS"] = ["REFC", "RETOP"] - vx_metatasks_all_by_obtype["MRMS"] \ - = ["task_get_obs_mrms", - "metatask_GridStat_MRMS_all_mems", - "metatask_GenEnsProd_EnsembleStat_MRMS", - "metatask_GridStat_MRMS_ensprob"] - - vx_field_groups_all_by_obtype["NDAS"] = ["ADPSFC", "ADPUPA"] - vx_metatasks_all_by_obtype["NDAS"] \ - = ["task_get_obs_ndas", - "task_run_MET_Pb2nc_obs_NDAS", - "metatask_PointStat_NDAS_all_mems", - "metatask_GenEnsProd_EnsembleStat_NDAS", - "metatask_PointStat_NDAS_ensmeanprob"] - - # If there are no field groups specified for verification, remove those - # tasks that are common to all observation types. - vx_field_groups = vx_config["VX_FIELD_GROUPS"] - if not vx_field_groups: - metatask = "metatask_check_post_output_all_mems" - rocoto_config['tasks'].pop(metatask) - - # If for a given obs type none of its field groups are specified for - # verification, remove all vx metatasks for that obs type. - for obtype in vx_field_groups_all_by_obtype: - #vx_field_groups_crnt_obtype = [field for field in vx_fields if field in vx_fields_all[obtype]] - vx_field_groups_crnt_obtype = list(set(vx_field_groups) & set(vx_field_groups_all_by_obtype[obtype])) - if not vx_field_groups_crnt_obtype: - for metatask in vx_metatasks_all_by_obtype[obtype]: - if metatask in rocoto_config['tasks']: - logging.info(dedent( - f""" - Removing verification (meta)task - "{metatask}" - from workflow since no fields belonging to observation type "{obtype}" - are specified for verification.""" - )) - rocoto_config['tasks'].pop(metatask) + # If running vx tasks, check and possibly reset values in expt_config + # and rocoto_config. # # ----------------------------------------------------------------------- # - # If there are at least some field groups to verify, then make sure that - # the base directories in which retrieved obs files will be placed are - # distinct for the different obs types. - # - # ----------------------------------------------------------------------- - # - if vx_field_groups: - obtypes_all = ['CCPA', 'NOHRSC', 'MRMS', 'NDAS'] - obs_basedir_var_names = [f'{obtype}_OBS_DIR' for obtype in obtypes_all] - obs_basedirs_dict = {key: vx_config[key] for key in obs_basedir_var_names} - obs_basedirs_orig = list(obs_basedirs_dict.values()) - obs_basedirs_uniq = list(set(obs_basedirs_orig)) - if len(obs_basedirs_orig) != len(obs_basedirs_uniq): - msg1 = dedent(f""" - The base directories for the obs files must be distinct, but at least two - are identical:""") - msg2 = '' - for obs_basedir_var_name, obs_dir in obs_basedirs_dict.items(): - msg2 = msg2 + dedent(f""" - {obs_basedir_var_name} = {obs_dir}""") - msg3 = dedent(f""" - Modify these in the SRW App's user configuration file to make them distinct - and rerun. - """) - msg = msg1 + ' '.join(msg2.splitlines(True)) + msg3 - logging.error(msg) - raise ValueError(msg) + if do_vx: + # + # ----------------------------------------------------------------------- + # + # Set some variables needed for running checks on and creating new + # (derived) configuration variables for the verification. + # + # ----------------------------------------------------------------------- + # + vx_config = expt_config["verification"] + + fcst_len_hrs = workflow_config.get("FCST_LEN_HRS") + vx_fcst_output_intvl_hrs = vx_config.get("VX_FCST_OUTPUT_INTVL_HRS") + + # To enable arithmetic with dates and times, convert various time + # intervals from integer to datetime.timedelta objects. + fcst_len_dt = datetime.timedelta(hours=fcst_len_hrs) + vx_fcst_output_intvl_dt = datetime.timedelta(hours=vx_fcst_output_intvl_hrs) + # + # ----------------------------------------------------------------------- + # + # Generate a list containing the starting times of the cycles. This will + # be needed in checking that the hours-of-day of the forecast output match + # those of the observations. + # + # ----------------------------------------------------------------------- + # + cycle_start_times \ + = set_cycle_dates(date_first_cycl, date_last_cycl, cycl_intvl_dt, + return_type='datetime') + # + # ----------------------------------------------------------------------- + # + # Generate a list of forecast output times and a list of obs days (i.e. + # days on which observations are needed to perform verification because + # there is forecast output on those days) over all cycles, both for + # instantaneous fields (e.g. T2m, REFC, RETOP) and for cumulative ones + # (e.g. APCP). Then add these lists to the dictionary containing workflow + # configuration variables. These will be needed in generating the ROCOTO + # XML. + # + # ----------------------------------------------------------------------- + # + fcst_output_times_all_cycles, obs_days_all_cycles, \ + = set_fcst_output_times_and_obs_days_all_cycles( + cycle_start_times, fcst_len_dt, vx_fcst_output_intvl_dt) + + workflow_config['OBS_DAYS_ALL_CYCLES_INST'] = obs_days_all_cycles['inst'] + workflow_config['OBS_DAYS_ALL_CYCLES_CUMUL'] = obs_days_all_cycles['cumul'] + # + # ----------------------------------------------------------------------- + # + # Generate lists of ROCOTO cycledef strings corresonding to the obs days + # for instantaneous fields and those for cumulative ones. Then save the + # lists of cycledefs in the dictionary containing values needed to + # construct the ROCOTO XML. + # + # ----------------------------------------------------------------------- + # + cycledefs_obs_days_inst = set_rocoto_cycledefs_for_obs_days(obs_days_all_cycles['inst']) + cycledefs_obs_days_cumul = set_rocoto_cycledefs_for_obs_days(obs_days_all_cycles['cumul']) + + rocoto_config['cycledefs']['cycledefs_obs_days_inst'] = cycledefs_obs_days_inst + rocoto_config['cycledefs']['cycledefs_obs_days_cumul'] = cycledefs_obs_days_cumul + # + # ----------------------------------------------------------------------- + # + # Generate dictionary of dictionaries that, for each combination of obs + # type needed and obs day, contains a string list of the times at which + # that type of observation is needed on that day. The elements of each + # list are formatted as 'YYYYMMDDHH'. This information is used by the + # day-based get_obs tasks in the workflow to get obs only at those times + # at which they are needed (as opposed to for the whole day). + # + # ----------------------------------------------------------------------- + # + obs_retrieve_times_by_day \ + = get_obs_retrieve_times_by_day( + vx_config, cycle_start_times, fcst_len_dt, + fcst_output_times_all_cycles, obs_days_all_cycles) + + for obtype, obs_days_dict in obs_retrieve_times_by_day.items(): + for obs_day, obs_retrieve_times in obs_days_dict.items(): + array_name = '_'.join(["OBS_RETRIEVE_TIMES", obtype, obs_day]) + vx_config[array_name] = obs_retrieve_times + expt_config["verification"] = vx_config + # + # ----------------------------------------------------------------------- + # + # Remove all verification (meta)tasks for which no fields are specified. + # + # ----------------------------------------------------------------------- + # + vx_field_groups_all_by_obtype = {} + vx_metatasks_all_by_obtype = {} + + vx_field_groups_all_by_obtype["CCPA"] = ["APCP"] + vx_metatasks_all_by_obtype["CCPA"] \ + = ["task_get_obs_ccpa", + "metatask_PcpCombine_obs_APCP_all_accums_CCPA", + "metatask_PcpCombine_fcst_APCP_all_accums_all_mems", + "metatask_GridStat_CCPA_all_accums_all_mems", + "metatask_GenEnsProd_EnsembleStat_CCPA", + "metatask_GridStat_CCPA_ensmeanprob_all_accums"] + + vx_field_groups_all_by_obtype["NOHRSC"] = ["ASNOW"] + vx_metatasks_all_by_obtype["NOHRSC"] \ + = ["task_get_obs_nohrsc", + "metatask_PcpCombine_obs_ASNOW_all_accums_NOHRSC", + "metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems", + "metatask_GridStat_NOHRSC_all_accums_all_mems", + "metatask_GenEnsProd_EnsembleStat_NOHRSC", + "metatask_GridStat_NOHRSC_ensmeanprob_all_accums"] + + vx_field_groups_all_by_obtype["MRMS"] = ["REFC", "RETOP"] + vx_metatasks_all_by_obtype["MRMS"] \ + = ["task_get_obs_mrms", + "metatask_GridStat_MRMS_all_mems", + "metatask_GenEnsProd_EnsembleStat_MRMS", + "metatask_GridStat_MRMS_ensprob"] + + vx_field_groups_all_by_obtype["NDAS"] = ["ADPSFC", "ADPUPA"] + vx_metatasks_all_by_obtype["NDAS"] \ + = ["task_get_obs_ndas", + "task_run_MET_Pb2nc_obs_NDAS", + "metatask_PointStat_NDAS_all_mems", + "metatask_GenEnsProd_EnsembleStat_NDAS", + "metatask_PointStat_NDAS_ensmeanprob"] + + # If there are no field groups specified for verification, remove those + # tasks that are common to all observation types. + vx_field_groups = vx_config["VX_FIELD_GROUPS"] + if not vx_field_groups: + metatask = "metatask_check_post_output_all_mems" + rocoto_config['tasks'].pop(metatask) + + # If for a given obs type none of its field groups are specified for + # verification, remove all vx metatasks for that obs type. + for obtype in vx_field_groups_all_by_obtype: + vx_field_groups_crnt_obtype = list(set(vx_field_groups) & set(vx_field_groups_all_by_obtype[obtype])) + if not vx_field_groups_crnt_obtype: + for metatask in vx_metatasks_all_by_obtype[obtype]: + if metatask in rocoto_config['tasks']: + logging.info(dedent( + f""" + Removing verification (meta)task + "{metatask}" + from workflow since no fields belonging to observation type "{obtype}" + are specified for verification.""" + )) + rocoto_config['tasks'].pop(metatask) + # + # ----------------------------------------------------------------------- + # + # If there are at least some field groups to verify, then make sure that + # the base directories in which retrieved obs files will be placed are + # distinct for the different obs types. + # + # ----------------------------------------------------------------------- + # + if vx_field_groups: + obtypes_all = ['CCPA', 'NOHRSC', 'MRMS', 'NDAS'] + obs_basedir_var_names = [f'{obtype}_OBS_DIR' for obtype in obtypes_all] + obs_basedirs_dict = {key: vx_config[key] for key in obs_basedir_var_names} + obs_basedirs_orig = list(obs_basedirs_dict.values()) + obs_basedirs_uniq = list(set(obs_basedirs_orig)) + if len(obs_basedirs_orig) != len(obs_basedirs_uniq): + msg1 = dedent(f""" + The base directories for the obs files must be distinct, but at least two + are identical:""") + msg2 = '' + for obs_basedir_var_name, obs_dir in obs_basedirs_dict.items(): + msg2 = msg2 + dedent(f""" + {obs_basedir_var_name} = {obs_dir}""") + msg3 = dedent(f""" + Modify these in the SRW App's user configuration file to make them distinct + and rerun. + """) + msg = msg1 + ' '.join(msg2.splitlines(True)) + msg3 + logging.error(msg) + raise ValueError(msg) # # ----------------------------------------------------------------------- # From 2213e93074298944e2cbc7830a42cd367974d36b Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Tue, 29 Oct 2024 14:44:40 -0600 Subject: [PATCH 176/260] Add WE2E tests for AI/ML models (FourCastNet, GraphCast, and PanguWeather) and GFS that all use a forecast output interval other than 1 hour (all use 6 hours) and use GDAS (instead of NDAS) as the obs in the verification. --- ...cst_custom-vx-config_aiml-fourcastnet.yaml | 63 ++++++++++++++++++ ...-fcst_custom-vx-config_aiml-graphcast.yaml | 63 ++++++++++++++++++ ...st_custom-vx-config_aiml-panguweather.yaml | 63 ++++++++++++++++++ ...vx-det_long-fcst_custom-vx-config_gfs.yaml | 66 +++++++++++++++++++ .../vx_config_det.obs_gdas.model_aiml.yaml | 54 +++++++++++++++ .../vx_config_det.obs_gdas.model_gfs.yaml | 54 +++++++++++++++ 6 files changed, 363 insertions(+) create mode 100644 tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml create mode 100644 tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml create mode 100644 tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml create mode 100644 tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml create mode 100644 tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml create mode 100644 tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml new file mode 100644 index 0000000000..47116561ed --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml @@ -0,0 +1,63 @@ +metadata: + description: |- + SRW App configuration file to test deterministic verification of the + FourCastNet (fcnv2) global AI model. Note that this test uses a custom + verification configuration file (as opposed to the default one in the + SRW) because the AI model output often does not include many of the + fields that exist in physics-based models. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + DATE_FIRST_CYCL: '2024073000' + DATE_LAST_CYCL: '2024073000' + FCST_LEN_HRS: 240 + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'global' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the required obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "/scratch2/BMC/fv3lam/Gerard.Ketefian/obs_for_ai_tests" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS (if it's necessary to fetch obs files). + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # + OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] + # + VX_FCST_MODEL_NAME: 'fcnv2' + VX_CONFIG_DET_FN: '../../tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_ai_tests' + FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}' + FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}.grb2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' + # + VX_FCST_OUTPUT_INTVL_HRS: 6 + VX_FIELD_GROUPS: [ "ADPSFC" ] diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml new file mode 100644 index 0000000000..a9067a9114 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml @@ -0,0 +1,63 @@ +metadata: + description: |- + SRW App configuration file to test deterministic verification of the + GraphCast (gc) global AI model. Note that this test uses a custom + verification configuration file (as opposed to the default one in the + SRW) because the AI model output often does not include many of the + fields that exist in physics-based models. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + DATE_FIRST_CYCL: '2024073000' + DATE_LAST_CYCL: '2024073000' + FCST_LEN_HRS: 240 + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'global' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the required obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "/scratch2/BMC/fv3lam/Gerard.Ketefian/obs_for_ai_tests" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS (if it's necessary to fetch obs files). + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # + OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] + # + VX_FCST_MODEL_NAME: 'gc' + VX_CONFIG_DET_FN: '../../tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_ai_tests' + FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}' + FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' + # + VX_FCST_OUTPUT_INTVL_HRS: 6 + VX_FIELD_GROUPS: [ "ADPSFC" ] diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml new file mode 100644 index 0000000000..aa3e3a834d --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml @@ -0,0 +1,63 @@ +metadata: + description: |- + SRW App configuration file to test deterministic verification of the + Pangu-Weather (pw) global AI model. Note that this test uses a custom + verification configuration file (as opposed to the default one in the + SRW) because the AI model output often does not include many of the + fields that exist in physics-based models. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + DATE_FIRST_CYCL: '2024073000' + DATE_LAST_CYCL: '2024073000' + FCST_LEN_HRS: 240 + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'global' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the required obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "/scratch2/BMC/fv3lam/Gerard.Ketefian/obs_for_ai_tests" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS (if it's necessary to fetch obs files). + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # + OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] + # + VX_FCST_MODEL_NAME: 'pw' + VX_CONFIG_DET_FN: '../../tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_ai_tests' + FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}' + FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}.grb2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' + # + VX_FCST_OUTPUT_INTVL_HRS: 6 + VX_FIELD_GROUPS: [ "ADPSFC" ] diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml new file mode 100644 index 0000000000..1672bd4fc9 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml @@ -0,0 +1,66 @@ +metadata: + description: |- + SRW App configuration file to test deterministic verification of the + Global Forecast System (GFS) model in a way that is comparable to vx + for several AI models [GraphCast (gc), FourCastNet (fcnv2), and Pangu- + Weather (pw)]. The idea is for this test to serve as a baseline to + which the AI vx can be compared. Thus, this test uses a custom vx + verification configuration file (as opposed to the default one in the + SRW) because the AI model output often does not include many of the + fields that exist in physics-based models. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + DATE_FIRST_CYCL: '2024073000' + DATE_LAST_CYCL: '2024073000' + FCST_LEN_HRS: 240 + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'global' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the required obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "/scratch2/BMC/fv3lam/Gerard.Ketefian/obs_for_ai_tests" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS (if it's necessary to fetch obs files). + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # + OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] + # + VX_FCST_MODEL_NAME: 'gfs' + VX_CONFIG_DET_FN: '../../tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml' + VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_ai_tests' + FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}/atmos' + FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.pgrb2.0p25.f{lead?fmt=%HHH}' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.pgrb2.0p25.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' + # + VX_FCST_OUTPUT_INTVL_HRS: 6 + VX_FIELD_GROUPS: [ "ADPSFC" ] diff --git a/tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml b/tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml new file mode 100644 index 0000000000..11bcb2e568 --- /dev/null +++ b/tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml @@ -0,0 +1,54 @@ +# +# This configuration file specifies the field groups, fields, levels, +# and thresholds to use for DETERMINISTIC verification. The format is +# as follows: +# +# FIELD_GROUP1: +# FIELD1: +# LEVEL1: list_of_thresholds +# LEVEL2: list_of_thresholds +# ... +# FIELD2: +# LEVEL1: list_of_thresholds +# LEVEL2: list_of_thresholds +# ... +# ... +# +# FIELD_GROUP2: +# FIELD1: +# LEVEL1: list_of_thresholds +# LEVEL2: list_of_thresholds +# ... +# FIELD2: +# LEVEL1: list_of_thresholds +# LEVEL2: list_of_thresholds +# ... +# ... +# +# ... +# +# If the threshold list for a given combination of field group, field, +# and level is set to the empty string ([]), then all values of that +# field will be included in the verification. +# +# Both the keys that represent field groups, fields, and levels and the +# strings in the list of thresholds may contain the separator string "%%" +# that separates the value of the quantity for the forecast from that for +# the observations. For example, if a field is set to +# +# RETOP%%EchoTop18 +# +# it means the name of the field in the forecast data is RETOP while its +# name in the observations is EchoTop18. +# +ADPSFC: + TMP: + Z2: [] + UGRD: + Z10: ['ge2.572'] + VGRD: + Z10: ['ge2.572'] + WIND: + Z10: ['ge2.572', 'ge2.572&<5.144', 'ge5.144', 'ge10.288', 'ge15.433'] + PRES%%PRMSL: + Z0: [] diff --git a/tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml b/tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml new file mode 100644 index 0000000000..9b8e25ff59 --- /dev/null +++ b/tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml @@ -0,0 +1,54 @@ +# +# This configuration file specifies the field groups, fields, levels, +# and thresholds to use for DETERMINISTIC verification. The format is +# as follows: +# +# FIELD_GROUP1: +# FIELD1: +# LEVEL1: list_of_thresholds +# LEVEL2: list_of_thresholds +# ... +# FIELD2: +# LEVEL1: list_of_thresholds +# LEVEL2: list_of_thresholds +# ... +# ... +# +# FIELD_GROUP2: +# FIELD1: +# LEVEL1: list_of_thresholds +# LEVEL2: list_of_thresholds +# ... +# FIELD2: +# LEVEL1: list_of_thresholds +# LEVEL2: list_of_thresholds +# ... +# ... +# +# ... +# +# If the threshold list for a given combination of field group, field, +# and level is set to the empty string ([]), then all values of that +# field will be included in the verification. +# +# Both the keys that represent field groups, fields, and levels and the +# strings in the list of thresholds may contain the separator string "%%" +# that separates the value of the quantity for the forecast from that for +# the observations. For example, if a field is set to +# +# RETOP%%EchoTop18 +# +# it means the name of the field in the forecast data is RETOP while its +# name in the observations is EchoTop18. +# +ADPSFC: + TMP: + Z2: [] + UGRD: + Z10: ['ge2.572'] + VGRD: + Z10: ['ge2.572'] + WIND: + Z10: ['ge2.572', 'ge2.572&<5.144', 'ge5.144', 'ge10.288', 'ge15.433'] + PRMSL: + Z0: [] From 51972efaa52307f406e78bae30530f7e88b6f4aa Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 30 Oct 2024 10:01:38 -0600 Subject: [PATCH 177/260] Move vx configuration files, both the defaults and the custom ones used for some WE2E tests, to parm/metplus/vx_configs. --- doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst | 2 +- .../vx_configs/vx_config_det.obs_gdas.model_aiml.yaml | 0 .../vx_configs/vx_config_det.obs_gdas.model_gfs.yaml | 0 parm/metplus/{ => vx_configs}/vx_config_det.yaml | 0 parm/metplus/{ => vx_configs}/vx_config_ens.yaml | 0 ....vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml | 2 +- ...ig.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml | 2 +- ...vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml | 2 +- .../config.vx-det_long-fcst_custom-vx-config_gfs.yaml | 2 +- ush/config_defaults.yaml | 6 +++--- 10 files changed, 8 insertions(+), 8 deletions(-) rename {tests/WE2E => parm/metplus}/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml (100%) rename {tests/WE2E => parm/metplus}/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml (100%) rename parm/metplus/{ => vx_configs}/vx_config_det.yaml (100%) rename parm/metplus/{ => vx_configs}/vx_config_ens.yaml (100%) diff --git a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst index df9a0dfa22..8689fdda8e 100644 --- a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst +++ b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst @@ -1634,7 +1634,7 @@ General VX Parameters then ``VX_ASNOW_ACCUMS_HRS`` must contain at least one element. Otherwise, ``VX_ASNOW_ACCUMS_HRS`` will be ignored. Valid values: ``6`` | ``12`` | ``18`` | ``24`` -``VX_CONFIG_[DET|ENS]_FN``: (Default: ``vx_config_[det|ens].yaml``) +``VX_CONFIG_[DET|ENS]_FN``: (Default: ``vx_configs/vx_config_[det|ens].yaml``) Names of configuration files for deterministic and ensemble verification that specify the field groups, field names, levels, and (if applicable) thresholds for which to run verification. These are relative to the diff --git a/tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml b/parm/metplus/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml similarity index 100% rename from tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml rename to parm/metplus/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml diff --git a/tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml b/parm/metplus/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml similarity index 100% rename from tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml rename to parm/metplus/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml diff --git a/parm/metplus/vx_config_det.yaml b/parm/metplus/vx_configs/vx_config_det.yaml similarity index 100% rename from parm/metplus/vx_config_det.yaml rename to parm/metplus/vx_configs/vx_config_det.yaml diff --git a/parm/metplus/vx_config_ens.yaml b/parm/metplus/vx_configs/vx_config_ens.yaml similarity index 100% rename from parm/metplus/vx_config_ens.yaml rename to parm/metplus/vx_configs/vx_config_ens.yaml diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml index 47116561ed..b21755f3cf 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml @@ -53,7 +53,7 @@ verification: OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] # VX_FCST_MODEL_NAME: 'fcnv2' - VX_CONFIG_DET_FN: '../../tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' + VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_ai_tests' FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}' FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}.grb2' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml index a9067a9114..6c330f9ba5 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml @@ -53,7 +53,7 @@ verification: OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] # VX_FCST_MODEL_NAME: 'gc' - VX_CONFIG_DET_FN: '../../tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' + VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_ai_tests' FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}' FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml index aa3e3a834d..a354793981 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml @@ -53,7 +53,7 @@ verification: OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] # VX_FCST_MODEL_NAME: 'pw' - VX_CONFIG_DET_FN: '../../tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' + VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_ai_tests' FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}' FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}.grb2' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml index 1672bd4fc9..b7ded54bb9 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml @@ -56,7 +56,7 @@ verification: OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] # VX_FCST_MODEL_NAME: 'gfs' - VX_CONFIG_DET_FN: '../../tests/WE2E/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml' + VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_gfs.yaml' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_ai_tests' FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}/atmos' FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.pgrb2.0p25.f{lead?fmt=%HHH}' diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 3957c3c0db..fb1ff66142 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2377,10 +2377,10 @@ verification: # thresholds for which to run verification. These are relative to the # directory METPLUS_CONF in which the METplus config templates are # located. They may include leading relative paths before the file - # names, e.g. ``some_dir/another_dir/vx_config_det.yaml``. + # names, e.g. "some_dir/another_dir/vx_config_det.yaml". # - VX_CONFIG_DET_FN: 'vx_config_det.yaml' - VX_CONFIG_ENS_FN: 'vx_config_ens.yaml' + VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.yaml' + VX_CONFIG_ENS_FN: 'vx_configs/vx_config_ens.yaml' # # VX_OUTPUT_BASEDIR: # Template for base (i.e. top-level) directory in which METplus will place From 36b647fe76a9a822d254c5b79b46fc4f140a70a8 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Wed, 30 Oct 2024 10:49:26 -0600 Subject: [PATCH 178/260] Add the 12 new WE2E vx tests to the various test suites. --- tests/WE2E/machine_suites/comprehensive | 12 ++++++++++++ tests/WE2E/machine_suites/coverage.hera.gnu.com | 4 ++++ tests/WE2E/machine_suites/coverage.hera.intel.nco | 4 ++++ tests/WE2E/machine_suites/coverage.jet | 4 ++++ 4 files changed, 24 insertions(+) diff --git a/tests/WE2E/machine_suites/comprehensive b/tests/WE2E/machine_suites/comprehensive index 8397e5d0c0..8c42aa4599 100644 --- a/tests/WE2E/machine_suites/comprehensive +++ b/tests/WE2E/machine_suites/comprehensive @@ -75,3 +75,15 @@ MET_verification_only_vx pregen_grid_orog_sfc_climo specify_EXTRN_MDL_SYSBASEDIR_ICS_LBCS specify_template_filenames +vx-det_long-fcst_custom-vx-config_aiml-fourcastnet +vx-det_long-fcst_custom-vx-config_aiml-graphcast +vx-det_long-fcst_custom-vx-config_aiml-panguweather +vx-det_long-fcst_custom-vx-config_gfs +vx-det_long-fcst_winter-wx_SRW-staged +vx-det_multicyc_fcst-overlap_ncep-hrrr +vx-det_multicyc_first-obs-00z_ncep-hrrr +vx-det_multicyc_last-obs-00z_ncep-hrrr +vx-det_multicyc_long-fcst-no-overlap_nssl-mpas +vx-det_multicyc_long-fcst-overlap_nssl-mpas +vx-det_multicyc_no-00z-obs_nssl-mpas +vx-det_multicyc_no-fcst-overlap_ncep-hrrr diff --git a/tests/WE2E/machine_suites/coverage.hera.gnu.com b/tests/WE2E/machine_suites/coverage.hera.gnu.com index c2018a6e78..09dadbaedd 100644 --- a/tests/WE2E/machine_suites/coverage.hera.gnu.com +++ b/tests/WE2E/machine_suites/coverage.hera.gnu.com @@ -9,3 +9,7 @@ MET_verification_only_vx MET_ensemble_verification_only_vx_time_lag 2019_halloween_storm 2020_jan_cold_blast +vx-det_long-fcst_custom-vx-config_aiml-fourcastnet +vx-det_long-fcst_custom-vx-config_gfs +vx-det_long-fcst_winter-wx_SRW-staged +vx-det_multicyc_fcst-overlap_ncep-hrrr diff --git a/tests/WE2E/machine_suites/coverage.hera.intel.nco b/tests/WE2E/machine_suites/coverage.hera.intel.nco index d5ab0d6fe8..cf8b92b59f 100644 --- a/tests/WE2E/machine_suites/coverage.hera.intel.nco +++ b/tests/WE2E/machine_suites/coverage.hera.intel.nco @@ -10,3 +10,7 @@ grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15p2 grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16 grid_RRFS_CONUScompact_3km_ics_HRRR_lbcs_RAP_suite_HRRR pregen_grid_orog_sfc_climo +vx-det_long-fcst_custom-vx-config_aiml-graphcast +vx-det_multicyc_long-fcst-no-overlap_nssl-mpas +vx-det_multicyc_first-obs-00z_ncep-hrrr +vx-det_multicyc_no-00z-obs_nssl-mpas diff --git a/tests/WE2E/machine_suites/coverage.jet b/tests/WE2E/machine_suites/coverage.jet index 53308090b1..5078e127ef 100644 --- a/tests/WE2E/machine_suites/coverage.jet +++ b/tests/WE2E/machine_suites/coverage.jet @@ -9,3 +9,7 @@ grid_RRFS_AK_3km_ics_FV3GFS_lbcs_FV3GFS_suite_HRRR grid_RRFS_CONUS_13km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16_plot grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15p2 grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_RRFS_v1beta +vx-det_long-fcst_custom-vx-config_aiml-panguweather +vx-det_multicyc_long-fcst-overlap_nssl-mpas +vx-det_multicyc_last-obs-00z_ncep-hrrr +vx-det_multicyc_no-fcst-overlap_ncep-hrrr From a9c3216ed8c96588682e1fff2460ab019a831b2c Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Fri, 1 Nov 2024 22:45:58 +0000 Subject: [PATCH 179/260] Post-merge fix for NDAS archive names --- parm/data_locations.yml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/parm/data_locations.yml b/parm/data_locations.yml index f3160c6bba..ca3d15d6ad 100644 --- a/parm/data_locations.yml +++ b/parm/data_locations.yml @@ -362,11 +362,15 @@ NDAS_obs: - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} + - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} + - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} archive_file_names: - "com2_nam_prod_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" - "gpfs_dell1_nco_ops_com_nam_prod_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" - "com_nam_prod_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" - - "com_obsproc_v1.*_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" + - "com_obsproc_v1.0_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" + - "com_obsproc_v1.1_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" + - "com_obsproc_v1.2_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" file_names: obs: - "./nam.t{hh}z.prepbufr.tm*.nr" @@ -387,7 +391,7 @@ NOHRSC_obs: archive_internal_dir: - ./wgrbbul/nohrsc_snowfall/ -AERONET: +AERONET_obs: hpss: protocol: htar archive_format: tar @@ -409,7 +413,7 @@ AERONET: obs: - "print_web_data_v3?year={yyyy}&month={mm}&day={dd}&AOD15=1&AVG=10" -AIRNOW: +AIRNOW_obs: hpss: protocol: htar archive_format: tar From f426302d29fd1552cd597a15e16f90305466d178 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sat, 2 Nov 2024 13:32:52 -0600 Subject: [PATCH 180/260] Rename vx tasks, metatasks, and variables and update documentation. Details below. * Since the vx tasks under the deterministic and ensemble vx metatasks are grouped by verification field group, rename the metatasks (in verify_det.yaml and veriify_ens.yaml and elsewhere as necessary) so that they refer to the vx field groups they apply to instead of the obs types. For example: * Change "metatask_GridStat_NOHRSC_all_accums_all_mems" to "metatask_GridStat_ASNOW_all_accums_all_mems". * Change "metatask_GenEnsProd_EnsembleStat_CCPA" to "metatask_GenEnsProd_EnsembleStat_APCP_all_accums" (add the "_all_accums" at the end to be consistent with the way other metatask names for cumulative field groups are named). * Change "metatask_GridStat_MRMS_ensprob" to "metatask_GridStat_ensprob_REFC_RETOP". * Replace the "ADPSFC" and "ADPUPA" field groups with "SFC" and "UPA", respectively, since the "ADP" part is specific to NDAS obs, and we want the verification field groups to have general names that are not connected to the obs type. * Move "_obs" and "_fcst" substrings in the names of several pre-vx tasks/metatasks (in verify_pre.yaml) towards the end for clarity/consistency. * For clarity, change the environment variable "VAR" in the vx tasks (where it is set to the name of the field group to be verified) to "FIELD_GROUP". Make corresponding necessary changes elsewhere (e.g. in the ex-scripts for the vx tasks). * Expand jinja2 code in verify_pre.yaml from a one-liner to multiple lines to make it easier to understand. * Make corresponding modifications in documentation. --- .../BuildingRunningTesting/RunSRW.rst | 366 +++++++++++++----- .../CustomizingTheWorkflow/ConfigWorkflow.rst | 15 +- parm/wflow/verify_det.yaml | 40 +- parm/wflow/verify_ens.yaml | 74 ++-- parm/wflow/verify_pre.yaml | 41 +- scripts/exregional_check_post_output.sh | 1 - ...onal_run_met_genensprod_or_ensemblestat.sh | 8 +- ...gional_run_met_gridstat_or_pointstat_vx.sh | 10 +- ...un_met_gridstat_or_pointstat_vx_ensmean.sh | 10 +- ...un_met_gridstat_or_pointstat_vx_ensprob.sh | 9 +- scripts/exregional_run_met_pb2nc_obs.sh | 4 +- scripts/exregional_run_met_pcpcombine.sh | 6 +- ...cst_custom-vx-config_aiml-fourcastnet.yaml | 2 +- ...-fcst_custom-vx-config_aiml-graphcast.yaml | 2 +- ...st_custom-vx-config_aiml-panguweather.yaml | 2 +- ...vx-det_long-fcst_custom-vx-config_gfs.yaml | 2 +- ush/config_defaults.yaml | 15 +- ush/set_vx_params.sh | 81 ++-- ush/setup.py | 36 +- ush/valid_param_vals.yaml | 2 +- 20 files changed, 449 insertions(+), 277 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 0eb10e1519..de4e5e54d8 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -203,15 +203,6 @@ The user must set the specifics of their experiment configuration in a ``config. * - ACCOUNT - "" - "an_account" - * - CCPA_OBS_DIR - - "{{ workflow.EXPTDIR }}/obs_data/ccpa/proc" - - "" - * - MRMS_OBS_DIR - - "{{ workflow.EXPTDIR }}/obs_data/mrms/proc" - - "" - * - NDAS_OBS_DIR - - "{{ workflow.EXPTDIR }}/obs_data/ndas/proc" - - "" * - USE_CRON_TO_RELAUNCH - false - false @@ -269,9 +260,6 @@ The user must set the specifics of their experiment configuration in a ``config. * - NUM_ENS_MEMBERS - 1 - 2 - * - VX_FCST_MODEL_NAME - - '{{ nco.NET_default }}.{{ task_run_post.POST_OUTPUT_DOMAIN_NAME }}' - - FV3_GFS_v16_CONUS_25km .. _GeneralConfig: @@ -639,7 +627,7 @@ The output files (in ``.png`` format) will be located in the ``postprd`` directo Configure METplus Verification Suite (Optional) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Users who want to use the METplus verification suite to evaluate their forecasts need to add additional information to their machine file (``ush/machine/.yaml``) or their ``config.yaml`` file. Other users may skip to the next step (:numref:`Section %s: Generate the SRW App Workflow `). +Users who want to use the METplus verification suite to evaluate their forecasts need to add additional information to their machine file (``ush/machine/.yaml``) [what would need to change in the machine file?] or their ``config.yaml`` file. Other users may skip to the next step (:numref:`Section %s: Generate the SRW App Workflow `). .. note:: If METplus users update their METplus installation, they must update the module load statements in ``ufs-srweather-app/modulefiles/tasks//run_vx.local`` to correspond to their system's updated installation: @@ -658,7 +646,12 @@ To use METplus verification, MET and METplus modules need to be installed. To t tasks: taskgroups: '{{ ["parm/wflow/prep.yaml", "parm/wflow/coldstart.yaml", "parm/wflow/post.yaml", "parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' -:numref:`Table %s ` indicates which functions each ``verify_*.yaml`` file configures. Users must add ``verify_pre.yaml`` anytime they want to run verification (VX); it runs preprocessing tasks that are necessary for both deterministic and ensemble VX. Then users can add ``verify_det.yaml`` for deterministic VX or ``verify_ens.yaml`` for ensemble VX (or both). Note that ensemble VX requires the user to be running an ensemble forecast or to stage ensemble forecast files in an appropriate location. +:numref:`Table %s ` indicates which verification capabilities/workflow tasks each ``verify_*.yaml`` file enables. +Users must add ``verify_pre.yaml`` anytime they want to run verification (VX); it runs preprocessing tasks that are necessary +for both deterministic and ensemble VX, including retrieval of obs files from various data stores (e.g. NOAA's HPSS) if those +files do not already exist on disk at the locations specified by some of the parameters in the ``verification:`` section of +``config_defaults.yaml`` and/or ``config.yaml`` (see ?? for details). +Then users can add ``verify_det.yaml`` for deterministic VX or ``verify_ens.yaml`` for ensemble VX (or both). Note that ensemble VX requires the user to be running an ensemble forecast or to stage ensemble forecast files in an appropriate location. .. _VX-yamls: @@ -669,11 +662,11 @@ To use METplus verification, MET and METplus modules need to be installed. To t * - File - Description * - verify_pre.yaml - - Contains (meta)tasks that are prerequisites for both deterministic and ensemble verification (vx) + - Enables (meta)tasks that are prerequisites for both deterministic and ensemble verification (vx) * - verify_det.yaml - - Perform deterministic vx + - Enables (meta)tasks that perform deterministic vx on a single forecast or on each member of an ensemble forecast * - verify_ens.yaml - - Perform ensemble vx (must set ``DO_ENSEMBLE: true`` in ``config.yaml``) + - Enables (meta)tasks that perform ensemble vx on an ensemble of forecasts as a whole (must set ``DO_ENSEMBLE: true`` in ``config.yaml``) The ``verify_*.yaml`` files include the definitions of several common verification tasks by default. Individual verification tasks appear in :numref:`Table %s `. The tasks in the ``verify_*.yaml`` files are independent of each other, so users may want to turn some off depending on the needs of their experiment. To turn off a task, simply include its entry from ``verify_*.yaml`` as an empty YAML entry in ``config.yaml``. For example, to turn off PointStat tasks: @@ -688,21 +681,51 @@ The ``verify_*.yaml`` files include the definitions of several common verificati More information about configuring the ``rocoto:`` section can be found in :numref:`Section %s `. -If users have access to NOAA :term:`HPSS` but have not pre-staged the data, the default ``verify_pre.yaml`` taskgroup will activate the tasks, and the workflow will attempt to download the appropriate data from NOAA HPSS. In this case, the ``*_OBS_DIR`` paths must be set to the location where users want the downloaded data to reside. +If users have access to NOAA :term:`HPSS` but have not pre-staged the obs data, the default ``verify_pre.yaml`` +taskgroup will activate a set of ``get_obs_...`` workflow tasks that will attempt to retrieve the required +files from a data store such as NOAA HPSS. In this case, the variables ``*_OBS_DIR`` in ``config.yaml`` must +be set to the base directories under which users want the files to reside, and the variables ``OBS_*_FN_TEMPLATES[1]`` +must be set to METplus file name templates (possibly including leading subdirectories relative to ``*_OBS_DIR``) +that will be used to name the obs files. (Here, the ``*`` represents any one of the obs types :term:`CCPA`, +:term:`NOHRSC`, :term:`MRMS`, and :term:`NDAS`.) -Users who do not have access to NOAA HPSS and do not have the data on their system will need to download :term:`CCPA`, :term:`MRMS`, and :term:`NDAS` data manually from collections of publicly available data, such as the ones listed `here `__. +Users who do not have access to NOAA HPSS and do not have the data on their system will need to download +:term:`CCPA`, :term:`MRMS`, and :term:`NDAS` data manually from collections of publicly available data, +such as the ones listed here [is there supposed to be a link here?]. -Users who have already staged the observation data needed for METplus (i.e., the :term:`CCPA`, :term:`MRMS`, and :term:`NDAS` data) on their system should set the path to this data in ``config.yaml``. +Users who have already staged the observation data needed for verification on their system should set +``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1]`` in ``config.yaml`` to match those staging locations and +file names For example, for a case in which all four types of obs are needed for vx, these variables +might be set as follows: .. code-block:: console - platform: - CCPA_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/ccpa/proc - NOHRSC_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/nohrsc/proc - MRMS_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/mrms/proc - NDAS_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/ndas/proc + verification: + + CCPA_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/ccpa + NOHRSC_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/nohrsc + MRMS_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/mrms + NDAS_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/ndas + + OBS_CCPA_FN_TEMPLATES: [ 'APCP', '{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2' ] + OBS_NOHRSC_FN_TEMPLATES: [ 'ASNOW', 'sfav2_CONUS_6h_{valid?fmt=%Y%m%d%H}_grid184.grb2' ] + OBS_MRMS_FN_TEMPLATES: [ 'REFC', '{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2', + 'RETOP', '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' ] + OBS_NDAS_FN_TEMPLATES: [ 'SFC_UPA', 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' ] + +If one of the days encompassed by the experiment was 20240429, and if one of the hours during +that day at which vx will be performed was 03, then, taking the CCPA obs type as an example, +one of the ``get_obs_ccpa_...`` tasks in the workflow will look for a CCPA file on disk +corresponding to this day and hour at + +``/path/to/UFS_SRW_data/develop/obs_data/ccpa/20240429/ccpa.t03z.01h.hrap.conus.gb2`` -After adding the VX tasks to the ``rocoto:`` section and the data paths to the ``platform:`` section, users can proceed to generate the experiment, which will perform VX tasks in addition to the default workflow tasks. +As described above, if this file does not exist, it will try to retrieve it from a data store +and place it at this location. + +After adding the VX tasks to the ``rocoto:`` section and the data paths to the ``verification:`` +section, users can proceed to generate the experiment, which will perform VX tasks in addition +to the default workflow tasks. .. _GenerateWorkflow: @@ -800,94 +823,231 @@ In addition to the baseline tasks described in :numref:`Table %s ` below. The column "taskgroup" indicates the taskgroup file that must be included in the user's ``config.yaml`` file under ``rocoto: tasks: taskgroups:`` (see :numref:`Section %s ` for more details). For each task, ``mem###`` refers to either ``mem000`` (if running a deterministic forecast) or a specific forecast member number (if running an ensemble forecast). "Metatasks" indicate task definitions that will become more than one workflow task based on different variables, number of hours, etc., as described in the Task Description column. See :numref:`Section %s ` for more details about metatasks. + +METplus verification tasks and metatasks are described in :numref:`Table %s ` below. +The ``taskgroup`` entry after the name of each task or metatask indicates the taskgroup file that must be +included in the user's ``config.yaml`` file under ``rocoto: tasks: taskgroups:`` in order for that task or +metatask to be considered for inclusion in the workflow (see :numref:`Section %s ` for more +details). Metatasks define a set of tasks in the workflow based on multiple values of one or more parameters +such as the ensemble member index, the accumulation interval (for cumulative fields such as accumulated +precipitation), and the name of the verificaiton field group (see description of ``VX_FIELD_GROUPS`` in +:numref:`Section %s `). See :numref:`Section %s ` for more details +about metatasks. .. _VXWorkflowTasksTable: -.. list-table:: Verification (VX) Workflow Tasks in the SRW App - :widths: 20 20 50 +.. list-table:: Verification (VX) Workflow Tasks and Metatasks in the SRW App + :widths: 5 95 :header-rows: 1 - * - Workflow Task - - ``taskgroup`` + * - Workflow Task (``taskgroup``) - Task Description - * - :bolditalic:`task_get_obs_ccpa` - - ``verify_pre.yaml`` - - If user has staged :term:`CCPA` data for verification, checks to ensure that data exists in the specified location (``CCPA_OBS_DIR``). If data does not exist, attempts to retrieve that data from NOAA :term:`HPSS`. - * - :bolditalic:`task_get_obs_ndas` - - ``verify_pre.yaml`` - - If user has staged :term:`NDAS` data for verification, checks to ensure that data exists in the specified location (``NDAS_OBS_DIR``). If data does not exist, attempts to retrieve that data from NOAA HPSS. - * - :bolditalic:`task_get_obs_nohrsc` - - ``verify_pre.yaml`` - - Retrieves and organizes hourly :term:`NOHRSC` data from NOAA HPSS. Can only be run if ``verify_pre.yaml`` is included in a ``tasksgroups`` list *and* user has access to NOAA :term:`HPSS` data. ``ASNOW`` should also be added to the ``VX_FIELDS`` list. - * - :bolditalic:`task_get_obs_mrms` - - ``verify_pre.yaml`` - - If user has staged :term:`MRMS` data for verification, checks to ensure that data exists in the specified location (``MRMS_OBS_DIR``). If data does not exist, attempts to retrieve that data from NOAA HPSS. - * - :bolditalic:`task_run_MET_Pb2nc_obs` - - ``verify_pre.yaml`` - - Converts files from prepbufr to NetCDF format. - * - :bolditalic:`metatask_PcpCombine_obs` - - ``verify_pre.yaml`` - - Derives 3-hr, 6-hr, and 24-hr accumulated precipitation observations from the 1-hr observation files. In log files, tasks will be named like ``MET_PcpCombine_obs_APCP##h``, where ``##h`` is 03h, 06h, or 24h. - * - :bolditalic:`metatask_check_post_output_all_mems` - - ``verify_pre.yaml`` - - Ensures that required post-processing tasks have completed and that the output exists in the correct form and location for each forecast member. In log files, tasks will be named like ``check_post_output_mem###``. - * - :bolditalic:`metatask_PcpCombine_fcst_APCP_all_accums_all_mems` - - ``verify_pre.yaml`` - - Derives accumulated precipitation forecast for 3-hr, 6-hr, and 24-hr windows for all forecast members based on 1-hr precipitation forecast values. In log files, tasks will be named like ``MET_PcpCombine_fcst_APCP##h_mem###``, where ``##h`` is 03h, 06h, or 24h. - * - :bolditalic:`metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems` - - ``verify_pre.yaml`` - - Derives accumulated snow forecast for 6-hr and 24-hr windows for all forecast members based on 1-hr precipitation forecast values. In log files, tasks will be named like ``MET_PcpCombine_fcst_ASNOW##h_mem###``, where ``##h`` is 06h or 24h. - * - :bolditalic:`metatask_GridStat_CCPA_all_accums_all_mems` - - ``verify_det.yaml`` - - Runs METplus grid-to-grid verification for 1-h, 3-h, 6-h, and 24-h (i.e., daily) accumulated precipitation. In log files, tasks will be named like ``run_MET_GridStat_vx_APCP##h_mem###``. - * - :bolditalic:`metatask_GridStat_NOHRSC_all_accums_all_mems` - - ``verify_det.yaml`` - - Runs METplus grid-to-grid verification for 6-h and 24-h (i.e., daily) accumulated snow. In log files, tasks will be named like ``run_MET_GridStat_vx_ASNOW##h_mem###``. - * - :bolditalic:`metatask_GridStat_MRMS_all_mems` - - ``verify_det.yaml`` - - Runs METplus grid-to-grid verification for composite reflectivity and :term:`echo top`. In log files, tasks will be named like ``run_MET_GridStat_vx_REFC_mem###`` or ``run_MET_GridStat_vx_RETOP_mem###``. - * - :bolditalic:`metatask_PointStat_NDAS_all_mems` - - ``verify_det.yaml`` - - Runs METplus grid-to-point verification for surface and upper-air variables. In log files, tasks will be named like ``run_MET_PointStat_vx_SFC_mem###`` or ``run_MET_PointStat_vx_UPA_mem###``. - * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_CCPA` :raw-html:`

` - (formerly *VX_ENSGRID_##h*) - - ``verify_ens.yaml`` - - Runs METplus grid-to-grid ensemble verification for 1-h, 3-h, 6-h, and 24-h (i.e., daily) accumulated precipitation. In log files, tasks will be named like ``run_MET_EnsembleStat_vx_APCP##h`` or ``run_MET_GenEnsProd_vx_APCP##h``. Can only be run if ``DO_ENSEMBLE: true`` in ``config.yaml``. - * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_NOHRSC` - - ``verify_ens.yaml`` - - Runs METplus grid-to-grid ensemble verification for 6-h and 24-h (i.e., daily) accumulated snow. In log files, tasks will be named like ``run_MET_EnsembleStat_vx_ASNOW##h`` or ``run_MET_GenEnsProd_vx_ASNOW##h``. Can only be run if ``DO_ENSEMBLE: true`` in ``config.yaml``. - * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_MRMS` :raw-html:`

` - (formerly *VX_ENSGRID_[REFC|RETOP]*) - - ``verify_ens.yaml`` - - Runs METplus grid-to-grid ensemble verification for composite reflectivity and :term:`echo top`. In log files, tasks will be named like ``run_MET_GenEnsProd_vx_[REFC|RETOP]`` or ``run_MET_EnsembleStat_vx_[REFC|RETOP]``. Can only be run if ``DO_ENSEMBLE: true`` in ``config.yaml``. - * - :bolditalic:`metatask_GridStat_CCPA_ensmeanprob_all_accums` :raw-html:`

` - (formerly *VX_ENSGRID_MEAN_##h* and *VX_ENSGRID_PROB_##h*) - - ``verify_ens.yaml`` - - Runs METplus grid-to-grid verification for (1) ensemble mean 1-h, 3-h, 6-h, and 24h (i.e., daily) accumulated precipitation and (2) 1-h, 3-h, 6-h, and 24h (i.e., daily) accumulated precipitation probabilistic output. In log files, the ensemble mean subtask will be named like ``run_MET_GridStat_vx_ensmean_APCP##h`` and the ensemble probabilistic output subtask will be named like ``run_MET_GridStat_vx_ensprob_APCP##h``, where ``##h`` is 01h, 03h, 06h, or 24h. Can only be run if ``DO_ENSEMBLE: true`` in ``config.yaml``. - * - :bolditalic:`metatask_GridStat_NOHRSC_ensmeanprob_all_accums` - - ``verify_ens.yaml`` - - Runs METplus grid-to-grid verification for (1) ensemble mean 6-h and 24h (i.e., daily) accumulated snow and (2) 6-h and 24h (i.e., daily) accumulated snow probabilistic output. In log files, the ensemble mean subtask will be named like ``run_MET_GridStat_vx_ensmean_ASNOW##h`` and the ensemble probabilistic output subtask will be named like ``run_MET_GridStat_vx_ensprob_ASNOW##h``, where ``##h`` is 06h or 24h. Can only be run if ``DO_ENSEMBLE: true`` in ``config.yaml``. - * - :bolditalic:`metatask_GridStat_MRMS_ensprob` :raw-html:`

` - (formerly *VX_ENSGRID_PROB_[REFC|RETOP]*) - - ``verify_ens.yaml`` - - Runs METplus grid-to-grid verification for ensemble probabilities for composite reflectivity and :term:`echo top`. In log files, tasks will be named like ``run_MET_GridStat_vx_ensprob_[REFC|RETOP]``. Can only be run if ``DO_ENSEMBLE: true`` in ``config.yaml``. - * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_NDAS` :raw-html:`

` - (formerly *VX_ENSPOINT*) - - ``verify_ens.yaml`` - - Runs METplus grid-to-point ensemble verification for surface and upper-air variables. In log files, tasks will be named like ``run_MET_GenEnsProd_vx_[SFC|UPA]`` or ``run_MET_EnsembleStat_vx_[SFC|UPA]``. Can only be run if ``DO_ENSEMBLE: true`` in ``config.yaml``. - * - :bolditalic:`metatask_PointStat_NDAS_ensmeanprob` :raw-html:`

` - (formerly *VX_ENSPOINT_[MEAN|PROB]*) - - ``verify_ens.yaml`` - - Runs METplus grid-to-point verification for (1) ensemble mean surface and upper-air variables and (2) ensemble probabilities for surface and upper-air variables. In log files, tasks will be named like ``run_MET_PointStat_vx_ensmean_[SFC|UPA]`` or ``run_MET_PointStat_vx_ensprob_[SFC|UPA]``. Can only be run if ``DO_ENSEMBLE: true`` in ``config.yaml``. + + * - :bolditalic:`task_get_obs_ccpa` (``verify_pre.yaml``) + - Checks for existence of staged :term:`CCPA` obs files at locations specified by ``CCPA_OBS_DIR`` + and ``OBS_CCPA_FN_TEMPLATES``. If any files do not exist, it attempts to retrieve all the files + from a data store (e.g. NOAA :term:`HPSS`) and place them in those locations. This task is included + in the workflow only if ``'APCP'`` is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`task_get_obs_nohrsc` (``verify_pre.yaml``) + - Checks for existence of staged :term:`NOHRSC` obs files at locations specified by ``NOHRSC_OBS_DIR`` + and ``OBS_NOHRSC_FN_TEMPLATES``. If any files do not exist, it attempts to retrieve all the files + from a data store (e.g. NOAA :term:`HPSS`) and place them in those locations. This task is included + in the workflow only if ``'ASNOW'`` is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`task_get_obs_mrms` (``verify_pre.yaml``) + - Checks for existence of staged :term:`MRMS` obs files at locations specified by ``MRMS_OBS_DIR`` + and ``OBS_MRMS_FN_TEMPLATES``. If any files do not exist, it attempts to retrieve all the files + from a data store (e.g. NOAA :term:`HPSS`) and place them in those locations. This task is included + in the workflow only if ``'REFC'`` and/or ``'RETOP'`` are included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`task_get_obs_ndas` (``verify_pre.yaml``) + - Checks for existence of staged :term:`NDAS` obs files at locations specified by ``NDAS_OBS_DIR`` + and ``OBS_NDAS_FN_TEMPLATES``. If any files do not exist, it attempts to retrieve all the files + from a data store (e.g. NOAA :term:`HPSS`) and place them in those locations. This task is included + in the workflow only if `'SFC'`` and/or ``'UPA'`` are included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`task_run_MET_Pb2nc_obs_NDAS` (``verify_pre.yaml``) + - Converts NDAS obs prepbufr files to NetCDF format. + + * - :bolditalic:`metatask_PcpCombine_APCP_all_accums_obs_CCPA` (``verify_pre.yaml``) + - Set of tasks that generate NetCDF files containing observed APCP for the accumulation intervals + specified in ``VX_APCP_ACCUMS_HRS``. Files for accumulation intervals larger than the one + provided in the obs are obtained by adding APCP values over multiple obs accumulation intervals, + e.g. if the obs contain 1-hour accumulations and 3-hr accumulation is specified in ``VX_APCP_ACCUMS_HRS``, + then groups of 3 successive 1-hour APCP values in the obs are added to obtain the 3-hour values. + In rocoto, the tasks under this metatask are named ``run_MET_PcpCombine_APCP{accum_intvl}h_obs_CCPA``, + where ``{accum_intvl}`` is the accumulation interval in hours (e.g. ``01``, ``03``, ``06``, etc) + for which the task is being run. This metatask is included in the workflow only if ``'APCP'`` is + included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_PcpCombine_ASNOW_all_accums_obs_NOHRSC` (``verify_pre.yaml``) + - Set of tasks that generate NetCDF files containing observed ASNOW for the accumulation intervals + specified in ``VX_ASNOW_ACCUMS_HRS``. Files for accumulation intervals larger than the one + provided in the obs are obtained by adding ASNOW values over multiple obs accumulation intervals, + e.g. if the obs contain 6-hour accumulations and 24-hr accumulation is specified in ``VX_ASNOW_ACCUMS_HRS``, + then groups of 4 successive 6-hour ASNOW values in the obs are added to obtain the 24-hour values. + In rocoto, the tasks under this metatask are named ``run_MET_PcpCombine_ASNOW{accum_intvl}h_obs_NOHRSC``, + where ``{accum_intvl}`` is the accumulation interval in hours (e.g. ``06``, ``24``, etc) for which + the task is being run. This metatask is included in the workflow only if ``'ASNOW'`` is included in + ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_check_post_output_all_mems` (``verify_pre.yaml``) + - Set of tasks that ensure that the post-processed forecast files required for verification exist in + the locations specified by ``VX_FCST_INPUT_BASEDIR``, ``FCST_SUBDIR_TEMPLATE``, and ``FCST_FN_TEMPLATE``. + In rocoto, the tasks under this metatask are named ``check_post_output_mem{mem_indx}``, where ``{mem_indx}`` + is the index of the ensemble forecast member. This takes on the values ``001``, ``002``, ... for an + ensemble of forecasts or just ``000`` for a single deterministic forecast. This metatask is included + in the workflow if at least one other verification task or metatask is included. + + * - :bolditalic:`metatask_PcpCombine_APCP_all_accums_all_mems` (``verify_pre.yaml``) + - Set of tasks that generate NetCDF files containing forecast APCP for the accumulation intervals + specified in ``VX_APCP_ACCUMS_HRS``. Files for accumulation intervals larger than the one + provided in the forecasts are obtained by adding APCP values over multiple forecast accumulation + intervals, e.g. if the forecasts contain 1-hour accumulations and 3-hr accumulation is specified + in ``VX_APCP_ACCUMS_HRS``, then groups of 3 successive 1-hour APCP values in the forecasts are + added to obtain the 3-hour values. In rocoto, the tasks under this metatask are named + ``run_MET_PcpCombine_APCP{accum_intvl}h_fcst_mem{mem_indx}``, where ``{accum_intvl}`` and + ``{mem_indx}`` are the accumulation interval (in hours, e.g. ``01``, ``03``, ``06``, etc) and + the ensemble forecast member index (or just ``000`` for a single deterministic forecast) for + which the task is being run. This metatask is included in the workflow only if ``'APCP'`` is + included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_PcpCombine_ASNOW_all_accums_all_mems` (``verify_pre.yaml``) + - Set of tasks that generate NetCDF files containing forecast ASNOW for the accumulation intervals + specified in ``VX_ASNOW_ACCUMS_HRS``. Files for accumulation intervals larger than the one + provided in the forecasts are obtained by adding ASNOW values over multiple forecast accumulation + intervals, e.g. if the forecasts contain 1-hour accumulations and 6-hr accumulation is specified + in ``VX_ASNOW_ACCUMS_HRS``, then groups of 6 successive 1-hour ASNOW values in the forecasts are + added to obtain 6-hour values. In rocoto, the tasks under this metatask are named + ``run_MET_PcpCombine_ASNOW{accum_intvl}h_fcst_mem{mem_indx}``, where ``{accum_intvl}`` and + ``{mem_indx}`` are the accumulation interval (in hours, e.g. ``06``, ``24``, etc) and the ensemble + forecast member index (or just ``000`` for a single deterministic forecast) for which the task is + being run. This metatask is included in the workflow only if ``'ASNOW'`` is included in + ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GridStat_APCP_all_accums_all_mems` (``verify_det.yaml``) + - Set of tasks that run grid-to-grid verification of accumulated precipitation (represented by the + verification field group ``APCP``) for the intervals specified in ``VX_APCP_ACCUMS_HRS``. In rocoto, + the tasks under this metatask are named ``run_MET_GridStat_vx_APCP{accum_intvl}h_mem{mem_indx}``, + where ``{accum_intvl}`` and ``{mem_indx}`` are the accumulation interval (in hours, e.g. ``01``, + ``03``, ``06``, etc) and the ensemble forecast member index (or just ``000`` for a single deterministic + forecast) for which the task is being run. This metatask is included in the workflow only if ``'APCP'`` + is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GridStat_ASNOW_all_accums_all_mems` (``verify_det.yaml``) + - Set of tasks that run grid-to-grid verification of accumulated snowfall (represented by the verification + field group ``ASNOW``) for the intervals specified in ``VX_ASNOW_ACCUMS_HRS``. In rocoto, the tasks under + this metatask are named ``run_MET_GridStat_vx_ASNOW{accum_intvl}h_mem{mem_indx}``, where ``{accum_intvl}`` + and ``{mem_indx}`` are the accumulation interval (in hours, e.g. ``06``, ``24``, etc) and the ensemble + forecast member index (or just ``000`` for a single deterministic forecast) for which the task is being + run. This metatask is included in the workflow only if ``'ASNOW'`` is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GridStat_REFC_RETOP_all_mems` (``verify_det.yaml``) + - Set of tasks that run grid-to-grid verification of :term:`composite reflectivity` (represented by + the verification field group ``REFC``) and :term:`echo top` (represented by the verification field + group ``RETOP``). In rocoto, the tasks under this metatask are named ``run_MET_GridStat_vx_{field_group}_mem{mem_indx}``, + where ``field_group`` and ``{mem_indx}`` are the field group (in this case either ``REFC`` or ``RETOP``) + and the ensemble forecast member index (or just ``000`` for a single deterministic forecast) for which + the task is being run. The tasks for ``REFC`` are included in the workflow only if ``'REFC'`` is + included in ``VX_FIELD_GROUPS``, and the ones for ``RETOP`` are included only if ``'RETOP'`` is included + in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_PointStat_SFC_UPA_all_mems` (``verify_det.yaml``) + - Set of tasks that run grid-to-point verification of surface fields (represented by the verification field + group ``SFC``) and upper-air fields (represented by the verification field group ``UPA``). In rocoto, + the tasks under this metatask are named ``run_MET_PointStat_vx_{field_group}_mem{mem_indx}``, where + ``field_group`` and ``{mem_indx}`` are the field group (in this case either ``SFC`` or ``UPA``) and the + ensemble forecast member index (or just ``000`` for a single deterministic forecast) for which the task + is being run. The tasks for the surface fields are included in the workflow only if ``'SFC'`` is included + in ``VX_FIELD_GROUPS``, and the ones for the upper-air fields are included only if ``'UPA'`` is included + in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_APCP_all_accums` (``verify_ens.yaml``) + - Set of tasks that run :term:`MET`'s ``GenEnsProd`` and ``EnsembleStat`` tools on APCP for the intervals + specified in ``VX_APCP_ACCUMS_HRS``. In rocoto, the tasks under this metatask that run ``GenEnsProd`` + are named ``run_MET_GenEnsProd_vx_APCP{accum_intvl}h``, and the ones that run `EnsembleStat`` are + named ``run_MET_EnsembleStat_vx_APCP{accum_intvl}h``, where ``{accum_intvl}`` is the accumulation + interval (in hours, e.g. ``01``, ``03``, ``06``, etc) for which the tasks are being run. This metatask + is included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'APCP'`` + is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_ASNOW_all_accums` (``verify_ens.yaml``) + - Set of tasks that run :term:`MET`'s ``GenEnsProd`` and ``EnsembleStat`` tools on ASNOW for the intervals + specified in ``VX_ASNOW_ACCUMS_HRS``. In rocoto, the tasks under this metatask that run ``GenEnsProd`` + are named ``run_MET_GenEnsProd_vx_ASNOW{accum_intvl}h`` and the ones that run `EnsembleStat`` are + named ``run_MET_EnsembleStat_vx_ASNOW{accum_intvl}h``, where ``{accum_intvl}`` is the accumulation + interval (in hours, e.g. ``06``, ``24``, etc) for which the tasks are being run. This metatask will be + included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'ASNOW'`` + is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_REFC_RETOP` (``verify_ens.yaml``) + - Set of tasks that run :term:`MET`'s ``GenEnsProd`` and ``EnsembleStat`` tools on REFC (:term:`composite + reflectivity`) and RETOP (:term:`echo top`). In rocoto, the tasks under this metatask that run + ``GenEnsProd`` are named ``run_MET_GenEnsProd_vx_{field_group}``, and the ones that run `EnsembleStat`` + are named ``run_MET_EnsembleStat_vx_{field_group}``, where ``{field_group}`` is the field group (in + this case either ``REFC`` or ``RETOP``) for which the tasks are being run. The tasks for ``REFC`` are + included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'REFC'`` + is included in ``VX_FIELD_GROUPS``, and the ones for ``RETOP`` are included only if ``DO_ENSEMBLE`` is + set to ``True`` in ``config.yaml`` and ``'RETOP'`` is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_SFC_UPA` (``verify_ens.yaml``) + - Set of tasks that run :term:`MET`'s ``GenEnsProd`` and ``EnsembleStat`` tools on surface fields (represented + by the verification field group ``SFC``) and upper-air fields (represented by the verification field group + ``UPA``). In rocoto, the tasks under this metatask that run ``GenEnsProd`` are named ``run_MET_GenEnsProd_vx_{field_group}``, + and the ones that run `EnsembleStat`` are named ``run_MET_EnsembleStat_vx_{field_group}``, where ``{field_group}`` + is the field group (in this case either ``SFC`` or ``UPA``) for which the tasks are being run. The tasks for + ``SFC`` are included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'SFC'`` + is included in ``VX_FIELD_GROUPS``, and the ones for ``UPA`` are included only if ``DO_ENSEMBLE`` is set to + ``True`` in ``config.yaml`` and ``'UPA'`` is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GridStat_ensmeanprob_APCP_all_accums` (``verify_ens.yaml``) + - Set of tasks that run grid-to-grid verification of the ensemble mean of APCP and grid-to-grid probabilistic + verification of the ensemble of APCP forecasts as a whole. In rocoto, the tasks under this metatask for + ensemble mean verification are named ``run_MET_GridStat_vx_ensmean_APCP{accum_intvl}h``, and the ones for + ensemble probabilistic verification are named ``run_MET_GridStat_vx_ensprob_APCP{accum_intvl}h``, where + ``{accum_intvl}`` is the accumulation interval (in hours, e.g. ``01``, ``03``, ``06``, etc) for which the + tasks are being run. This metatask is included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` + in ``config.yaml`` and ``'APCP'`` is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GridStat_ensmeanprob_ASNOW_all_accums` (``verify_ens.yaml``) + - Set of tasks that run grid-to-grid verification of the ensemble mean of ASNOW and grid-to-grid probabilistic + verification of the ensemble of ASNOW forecasts as a whole. In rocoto, the tasks under this metatask for + ensemble mean verification are named ``run_MET_GridStat_vx_ensmean_ASNOW{accum_intvl}h``, and the ones for + ensemble probabilistic verification are named ``run_MET_GridStat_vx_ensprob_ASNOW{accum_intvl}h``, where + ``{accum_intvl}`` is the accumulation interval (in hours, e.g. ``01``, ``03``, ``06``, etc) for which the + tasks are being run. These tasks will be included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` + in ``config.yaml`` and ``'ASNOW'`` is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GridStat_ensprob_REFC_RETOP` (``verify_ens.yaml``) + - Set of tasks that run grid-to-grid probabilistic verification of the ensemble of :term:`composite reflectivity` + (represented by the verification field group ``REFC``) and :term:`echo top` (represented by the field group + ``RETOP``). (Note that there is no grid-to-grid verification of the ensemble mean of these quantities.) + In rocoto, the tasks under this metatask are named ``run_MET_GridStat_vx_ensprob_{field_group}``, where + ``{field_group}`` is the field group (in this case either ``REFC`` or ``RETOP``) for which the task is + being run. The task for ``REFC`` is included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` + in ``config.yaml`` and ``'REFC'`` is included in ``VX_FIELD_GROUPS``, and the one for ``RETOP`` is included + only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'RETOP'`` is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_PointStat_ensmeanprob_SFC_UPA` (``verify_ens.yaml``) + - Set of tasks that run grid-to-grid verification of the ensemble mean of surface fields (represented by the + verification field group ``SFC``) and upper-air fields (represented by the verification field group ``UPA``) + as well as grid-to-grid probabilistic verification of the ensemble of the surface and upper-air field + forecasts as a whole. In rocoto, the tasks under this metatask for ensemble mean verification are named + ``run_MET_PointStat_vx_ensmean_{field_group}``, and the ones for ensemble probabilistic verification are + named ``run_MET_PointStat_vx_ensprob_{field_group}``, where ``{field_group}`` is the field group (in this + case either ``SFC`` or ``UPA``) on which the task is being run. The tasks for ``SFC`` are included in the + workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'SFC'`` is included in + ``VX_FIELD_GROUPS``, and the ones for ``UPA`` are included only if ``DO_ENSEMBLE`` is set to ``True`` in + ``config.yaml`` and ``'UPA'`` is included in ``VX_FIELD_GROUPS``. + .. _Run: @@ -1181,7 +1341,7 @@ Each task should finish with error code 0. For example: End exregional_get_extrn_mdl_files.sh at Wed Nov 16 18:08:19 UTC 2022 with error code 0 (time elapsed: 00:00:01) -Check the batch script output file in your experiment directory for a “SUCCESS” message near the end of the file. +Check the batch script output file in your experiment directory for a "SUCCESS" message near the end of the file. .. _RegionalWflowTasks: diff --git a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst index 8689fdda8e..231dc49dd5 100644 --- a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst +++ b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst @@ -1611,7 +1611,7 @@ Non-default parameters for verification tasks are set in the ``verification:`` s General VX Parameters --------------------------------- -``VX_FIELD_GROUPS``: (Default: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA" ]) +``VX_FIELD_GROUPS``: (Default: [ "APCP", "REFC", "RETOP", "SFC", "UPA" ]) The groups of fields (some of which may consist of only a single field) on which to run verification. @@ -1620,7 +1620,7 @@ General VX Parameters HPSS for retrospective cases before March 2020, by default ``ASNOW`` is not included ``VX_FIELD_GROUPS``, but it may be added to this list in order to include the verification tasks for ``ASNOW`` in the workflow. Valid values: - ``"APCP"`` | ``"ASNOW"`` | ``"REFC"`` | ``"RETOP"`` | ``"ADPSFC"`` | ``"ADPUPA"`` + ``"APCP"`` | ``"ASNOW"`` | ``"REFC"`` | ``"RETOP"`` | ``"SFC"`` | ``"UPA"`` ``VX_APCP_ACCUMS_HRS``: (Default: [ 1, 3, 6, 24 ]) The accumulation intervals (in hours) to include in the verification of @@ -1716,7 +1716,7 @@ VX Parameters for Observations ``OBS_NDAS_FN_TEMPLATES``: .. code-block:: console - [ 'ADPSFCandADPUPA', 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' ] + [ 'SFCandUPA', 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' ] File name templates for various obs types. These are meant to be used in METplus configuration files and thus contain METplus time formatting @@ -1843,11 +1843,12 @@ VX Parameters for Observations NOHRSC observations. These files will contain observed accumulated snowfall for various accumulaton intervals. -``OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT``: (Default: ``'${OBS_NDAS_FN_TEMPLATES[1]}.nc'``) +``OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT``: (Default: ``'${OBS_NDAS_FN_TEMPLATES[1]}.nc'``) METplus template for the names of the NetCDF files generated by the - worfklow verification tasks that call METplus's Pb2nc tool on NDAS - observations. These files will contain the observed ADPSFC or ADPUPA - fields in NetCDF format (instead of NDAS's native prepbufr format). + worfklow verification tasks that call METplus's Pb2nc tool on the + prepbufr files in NDAS observations. These files will contain the + observed surface (SFC) and upper-air (UPA) fields in NetCDF format + (instead of NDAS's native prepbufr format). ``NUM_MISSING_OBS_FILES_MAX``: (Default: 2) For verification tasks that need observational data, this specifies diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index f416ce7974..c090ea8b0c 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -21,10 +21,10 @@ default_task_verify_det: &default_task_verify_det queue: '&QUEUE_DEFAULT;' walltime: 00:30:00 -metatask_GridStat_CCPA_all_accums_all_mems: +metatask_GridStat_APCP_all_accums_all_mems: var: ACCUM_HH: '{% for ah in verification.VX_APCP_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' - metatask_GridStat_CCPA_APCP#ACCUM_HH#h_all_mems: + metatask_GridStat_APCP#ACCUM_HH#h_all_mems: var: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' task_run_MET_GridStat_vx_APCP#ACCUM_HH#h_mem#mem#: @@ -36,7 +36,7 @@ metatask_GridStat_CCPA_all_accums_all_mems: envars: <<: *default_vars OBS_DIR: '&CCPA_OBS_DIR;' - VAR: APCP + FIELD_GROUP: 'APCP' ACCUM_HH: '#ACCUM_HH#' METPLUSTOOLNAME: 'GRIDSTAT' OBTYPE: 'CCPA' @@ -50,15 +50,15 @@ metatask_GridStat_CCPA_all_accums_all_mems: and: taskdep_pcpcombine_obs: attrs: - task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h_CCPA + task: run_MET_PcpCombine_APCP#ACCUM_HH#h_obs_CCPA taskdep_pcpcombine_fcst: attrs: - task: run_MET_PcpCombine_fcst_APCP#ACCUM_HH#h_mem#mem# + task: run_MET_PcpCombine_APCP#ACCUM_HH#h_fcst_mem#mem# -metatask_GridStat_NOHRSC_all_accums_all_mems: +metatask_GridStat_ASNOW_all_accums_all_mems: var: ACCUM_HH: '{% for ah in verification.VX_ASNOW_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' - metatask_GridStat_NOHRSC_ASNOW#ACCUM_HH#h_all_mems: + metatask_GridStat_ASNOW#ACCUM_HH#h_all_mems: var: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' task_run_MET_GridStat_vx_ASNOW#ACCUM_HH#h_mem#mem#: @@ -70,7 +70,7 @@ metatask_GridStat_NOHRSC_all_accums_all_mems: envars: <<: *default_vars OBS_DIR: '&NOHRSC_OBS_DIR;' - VAR: ASNOW + FIELD_GROUP: 'ASNOW' ACCUM_HH: '#ACCUM_HH#' METPLUSTOOLNAME: 'GRIDSTAT' OBTYPE: 'NOHRSC' @@ -84,24 +84,24 @@ metatask_GridStat_NOHRSC_all_accums_all_mems: and: taskdep_pcpcombine_obs: attrs: - task: run_MET_PcpCombine_obs_ASNOW#ACCUM_HH#h_NOHRSC + task: run_MET_PcpCombine_ASNOW#ACCUM_HH#h_obs_NOHRSC taskdep_pcpcombine_fcst: attrs: - task: run_MET_PcpCombine_fcst_ASNOW#ACCUM_HH#h_mem#mem# + task: run_MET_PcpCombine_ASNOW#ACCUM_HH#h_fcst_mem#mem# -metatask_GridStat_MRMS_all_mems: +metatask_GridStat_REFC_RETOP_all_mems: var: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' - metatask_GridStat_MRMS_mem#mem#: + metatask_GridStat_REFC_RETOP_mem#mem#: var: - VAR: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' - task_run_MET_GridStat_vx_#VAR#_mem#mem#: + FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + task_run_MET_GridStat_vx_#FIELD_GROUP#_mem#mem#: <<: *default_task_verify_det command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX"' envars: <<: *default_vars OBS_DIR: '&MRMS_OBS_DIR;' - VAR: '#VAR#' + FIELD_GROUP: '#FIELD_GROUP#' METPLUSTOOLNAME: 'GRIDSTAT' OBTYPE: 'MRMS' ENSMEM_INDX: "#mem#" @@ -136,19 +136,19 @@ metatask_GridStat_MRMS_all_mems: age: 00:00:00:30 text: !cycstr '{{ workflow.EXPTDIR }}/@Y@m@d@H/post_files_exist_mem#mem#.txt' -metatask_PointStat_NDAS_all_mems: +metatask_PointStat_SFC_UPA_all_mems: var: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' - metatask_PointStat_NDAS_mem#mem#: + metatask_PointStat_SFC_UPA_mem#mem#: var: - VAR: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' - task_run_MET_PointStat_vx_#VAR#_mem#mem#: + FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["SFC", "UPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + task_run_MET_PointStat_vx_#FIELD_GROUP#_mem#mem#: <<: *default_task_verify_det command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX"' envars: <<: *default_vars OBS_DIR: '&NDAS_OBS_DIR;' - VAR: '#VAR#' + FIELD_GROUP: '#FIELD_GROUP#' METPLUSTOOLNAME: 'POINTSTAT' OBTYPE: 'NDAS' ENSMEM_INDX: "#mem#" diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index 8aed2d02b3..63d5392af1 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -21,7 +21,7 @@ default_task_verify_ens: &default_task_verify_ens queue: '&QUEUE_DEFAULT;' walltime: 01:00:00 -metatask_GenEnsProd_EnsembleStat_CCPA: +metatask_GenEnsProd_EnsembleStat_APCP_all_accums: var: ACCUM_HH: '{% for ah in verification.VX_APCP_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' task_run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h: &task_GenEnsProd_CCPA @@ -31,7 +31,7 @@ metatask_GenEnsProd_EnsembleStat_CCPA: <<: *default_vars ACCUM_HH: '#ACCUM_HH#' OBS_DIR: '&CCPA_OBS_DIR;' - VAR: APCP + FIELD_GROUP: 'APCP' METPLUSTOOLNAME: 'GENENSPROD' OBTYPE: 'CCPA' FCST_LEVEL: 'A#ACCUM_HH#' @@ -39,7 +39,7 @@ metatask_GenEnsProd_EnsembleStat_CCPA: dependency: metataskdep_pcpcombine_fcst: attrs: - metatask: PcpCombine_fcst_APCP#ACCUM_HH#h_all_mems + metatask: PcpCombine_APCP#ACCUM_HH#h_fcst_all_mems task_run_MET_EnsembleStat_vx_APCP#ACCUM_HH#h: <<: *task_GenEnsProd_CCPA envars: @@ -50,12 +50,12 @@ metatask_GenEnsProd_EnsembleStat_CCPA: and: taskdep_pcpcombine_obs_ccpa: &taskdep_pcpcombine_obs_ccpa attrs: - task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h_CCPA + task: run_MET_PcpCombine_APCP#ACCUM_HH#h_obs_CCPA taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h -metatask_GenEnsProd_EnsembleStat_NOHRSC: +metatask_GenEnsProd_EnsembleStat_ASNOW_all_accums: var: ACCUM_HH: '{% for ah in verification.VX_ASNOW_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' task_run_MET_GenEnsProd_vx_ASNOW#ACCUM_HH#h: &task_GenEnsProd_NOHRSC @@ -65,7 +65,7 @@ metatask_GenEnsProd_EnsembleStat_NOHRSC: <<: *default_vars ACCUM_HH: '#ACCUM_HH#' OBS_DIR: '&NOHRSC_OBS_DIR;' - VAR: ASNOW + FIELD_GROUP: 'ASNOW' METPLUSTOOLNAME: 'GENENSPROD' OBTYPE: 'NOHRSC' FCST_LEVEL: 'A#ACCUM_HH#' @@ -74,7 +74,7 @@ metatask_GenEnsProd_EnsembleStat_NOHRSC: and: metataskdep_pcpcombine_fcst: attrs: - metatask: PcpCombine_fcst_ASNOW#ACCUM_HH#h_all_mems + metatask: PcpCombine_ASNOW#ACCUM_HH#h_fcst_all_mems task_run_MET_EnsembleStat_vx_ASNOW#ACCUM_HH#h: <<: *task_GenEnsProd_NOHRSC envars: @@ -85,22 +85,22 @@ metatask_GenEnsProd_EnsembleStat_NOHRSC: and: taskdep_pcpcombine_obs_nohrsc: &taskdep_pcpcombine_obs_nohrsc attrs: - task: run_MET_PcpCombine_obs_ASNOW#ACCUM_HH#h_NOHRSC + task: run_MET_PcpCombine_ASNOW#ACCUM_HH#h_obs_NOHRSC taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_ASNOW#ACCUM_HH#h -metatask_GenEnsProd_EnsembleStat_MRMS: +metatask_GenEnsProd_EnsembleStat_REFC_RETOP: var: - VAR: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' - task_run_MET_GenEnsProd_vx_#VAR#: &task_GenEnsProd_MRMS + FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + task_run_MET_GenEnsProd_vx_#FIELD_GROUP#: &task_GenEnsProd_MRMS <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GENENSPROD_OR_ENSEMBLESTAT"' envars: &envars_GenEnsProd_MRMS <<: *default_vars ACCUM_HH: '01' OBS_DIR: '&MRMS_OBS_DIR;' - VAR: '#VAR#' + FIELD_GROUP: '#FIELD_GROUP#' METPLUSTOOLNAME: 'GENENSPROD' OBTYPE: 'MRMS' FCST_LEVEL: 'L0' @@ -110,7 +110,7 @@ metatask_GenEnsProd_EnsembleStat_MRMS: metataskdep_check_post_output: &check_post_output attrs: metatask: check_post_output_all_mems - task_run_MET_EnsembleStat_vx_#VAR#: + task_run_MET_EnsembleStat_vx_#FIELD_GROUP#: <<: *task_GenEnsProd_MRMS envars: <<: *envars_GenEnsProd_MRMS @@ -140,18 +140,18 @@ metatask_GenEnsProd_EnsembleStat_MRMS: {%- endfor %}' taskdep_genensprod: attrs: - task: run_MET_GenEnsProd_vx_#VAR# + task: run_MET_GenEnsProd_vx_#FIELD_GROUP# -metatask_GenEnsProd_EnsembleStat_NDAS: +metatask_GenEnsProd_EnsembleStat_SFC_UPA: var: - VAR: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' - task_run_MET_GenEnsProd_vx_#VAR#: &task_GenEnsProd_NDAS + FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + task_run_MET_GenEnsProd_vx_#FIELD_GROUP#: &task_GenEnsProd_NDAS <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GENENSPROD_OR_ENSEMBLESTAT"' envars: &envars_GenEnsProd_NDAS <<: *default_vars OBS_DIR: '&NDAS_OBS_DIR;' - VAR: '#VAR#' + FIELD_GROUP: '#FIELD_GROUP#' METPLUSTOOLNAME: 'GENENSPROD' OBTYPE: 'NDAS' ACCUM_HH: '01' @@ -161,7 +161,7 @@ metatask_GenEnsProd_EnsembleStat_NDAS: dependency: metataskdep_check_post_output: <<: *check_post_output - task_run_MET_EnsembleStat_vx_#VAR#: + task_run_MET_EnsembleStat_vx_#FIELD_GROUP#: <<: *task_GenEnsProd_NDAS envars: <<: *envars_GenEnsProd_NDAS @@ -190,13 +190,13 @@ metatask_GenEnsProd_EnsembleStat_NDAS: {%- endfor %}' taskdep_genensprod: attrs: - task: run_MET_GenEnsProd_vx_#VAR# + task: run_MET_GenEnsProd_vx_#FIELD_GROUP# -metatask_GridStat_CCPA_ensmeanprob_all_accums: +metatask_GridStat_ensmeanprob_APCP_all_accums: var: stat: MEAN PROB statlc: mean prob - metatask_GridStat_CCPA_ens#statlc#_all_accums: + metatask_GridStat_ens#statlc#_APCP_all_accums: var: ACCUM_HH: '{% for ah in verification.VX_APCP_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' task_run_MET_GridStat_vx_ens#statlc#_APCP#ACCUM_HH#h: @@ -205,7 +205,7 @@ metatask_GridStat_CCPA_ensmeanprob_all_accums: envars: <<: *default_vars OBS_DIR: '&CCPA_OBS_DIR;' - VAR: APCP + FIELD_GROUP: 'APCP' METPLUSTOOLNAME: 'GRIDSTAT' OBTYPE: 'CCPA' ACCUM_HH: '#ACCUM_HH#' @@ -219,11 +219,11 @@ metatask_GridStat_CCPA_ensmeanprob_all_accums: attrs: task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h -metatask_GridStat_NOHRSC_ensmeanprob_all_accums: +metatask_GridStat_ensmeanprob_ASNOW_all_accums: var: stat: MEAN PROB statlc: mean prob - metatask_GridStat_NOHRSC_ens#statlc#_all_accums: + metatask_GridStat_ens#statlc#_ASNOW_all_accums: var: ACCUM_HH: '{% for ah in verification.VX_ASNOW_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' task_run_MET_GridStat_vx_ens#statlc#_ASNOW#ACCUM_HH#h: @@ -232,7 +232,7 @@ metatask_GridStat_NOHRSC_ensmeanprob_all_accums: envars: <<: *default_vars OBS_DIR: '&NOHRSC_OBS_DIR;' - VAR: ASNOW + FIELD_GROUP: 'ASNOW' METPLUSTOOLNAME: 'GRIDSTAT' OBTYPE: 'NOHRSC' ACCUM_HH: '#ACCUM_HH#' @@ -246,17 +246,17 @@ metatask_GridStat_NOHRSC_ensmeanprob_all_accums: attrs: task: run_MET_GenEnsProd_vx_ASNOW#ACCUM_HH#h -metatask_GridStat_MRMS_ensprob: +metatask_GridStat_ensprob_REFC_RETOP: var: - VAR: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' - task_run_MET_GridStat_vx_ensprob_#VAR#: + FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + task_run_MET_GridStat_vx_ensprob_#FIELD_GROUP#: <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX_ENSPROB"' envars: <<: *default_vars ACCUM_HH: '01' OBS_DIR: '&MRMS_OBS_DIR;' - VAR: '#VAR#' + FIELD_GROUP: '#FIELD_GROUP#' METPLUSTOOLNAME: 'GRIDSTAT' OBTYPE: 'MRMS' FCST_LEVEL: 'L0' @@ -267,22 +267,22 @@ metatask_GridStat_MRMS_ensprob: <<: *all_get_obs_mrms_complete taskdep_genensprod: attrs: - task: run_MET_GenEnsProd_vx_#VAR# + task: run_MET_GenEnsProd_vx_#FIELD_GROUP# -metatask_PointStat_NDAS_ensmeanprob: +metatask_PointStat_ensmeanprob_SFC_UPA: var: stat: MEAN PROB statlc: mean prob - metatask_PointStat_NDAS_ens#statlc#: + metatask_PointStat_ens#statlc#_SFC_UPA: var: - VAR: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' - task_run_MET_PointStat_vx_ens#statlc#_#VAR#: + FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + task_run_MET_PointStat_vx_ens#statlc#_#FIELD_GROUP#: <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX_ENS#stat#"' envars: <<: *default_vars OBS_DIR: '&NDAS_OBS_DIR;' - VAR: '#VAR#' + FIELD_GROUP: '#FIELD_GROUP#' METPLUSTOOLNAME: 'POINTSTAT' OBTYPE: 'NDAS' ACCUM_HH: '01' @@ -294,4 +294,4 @@ metatask_PointStat_NDAS_ensmeanprob: <<: *all_pb2nc_obs_ndas_complete taskdep_genensprod: attrs: - task: run_MET_GenEnsProd_vx_#VAR# + task: run_MET_GenEnsProd_vx_#FIELD_GROUP# diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index d5ce7885e2..2b86772565 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -79,7 +79,7 @@ task_run_MET_Pb2nc_obs_NDAS: command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PB2NC_OBS"' envars: <<: *default_vars - VAR: ADPSFC + FIELD_GROUP: 'SFC' ACCUM_HH: '01' FCST_OR_OBS: OBS OBTYPE: NDAS @@ -98,10 +98,10 @@ task_run_MET_Pb2nc_obs_NDAS: attrs: task: get_obs_ndas -metatask_PcpCombine_obs_APCP_all_accums_CCPA: +metatask_PcpCombine_APCP_all_accums_obs_CCPA: var: ACCUM_HH: '{% for ah in verification.VX_APCP_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' - task_run_MET_PcpCombine_obs_APCP#ACCUM_HH#h_CCPA: + task_run_MET_PcpCombine_APCP#ACCUM_HH#h_obs_CCPA: <<: *default_task_verify_pre attrs: cycledefs: forecast @@ -109,7 +109,7 @@ metatask_PcpCombine_obs_APCP_all_accums_CCPA: command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PCPCOMBINE"' envars: <<: *default_vars - VAR: APCP + FIELD_GROUP: 'APCP' ACCUM_HH: '#ACCUM_HH#' FCST_OR_OBS: OBS OBTYPE: CCPA @@ -140,10 +140,10 @@ metatask_PcpCombine_obs_APCP_all_accums_CCPA: {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} {%- endfor %}' -metatask_PcpCombine_obs_ASNOW_all_accums_NOHRSC: +metatask_PcpCombine_ASNOW_all_accums_obs_NOHRSC: var: ACCUM_HH: '{% for ah in verification.VX_ASNOW_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' - task_run_MET_PcpCombine_obs_ASNOW#ACCUM_HH#h_NOHRSC: + task_run_MET_PcpCombine_ASNOW#ACCUM_HH#h_obs_NOHRSC: <<: *default_task_verify_pre attrs: cycledefs: forecast @@ -151,7 +151,7 @@ metatask_PcpCombine_obs_ASNOW_all_accums_NOHRSC: command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PCPCOMBINE"' envars: <<: *default_vars - VAR: ASNOW + FIELD_GROUP: 'ASNOW' ACCUM_HH: '#ACCUM_HH#' FCST_OR_OBS: OBS OBTYPE: NOHRSC @@ -193,7 +193,6 @@ metatask_check_post_output_all_mems: command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_CHECK_POST_OUTPUT"' envars: <<: *default_vars - VAR: APCP ENSMEM_INDX: '#mem#' dependency: # This "or" checks that the necessary stand-alone post tasks or forecast @@ -232,7 +231,15 @@ metatask_check_post_output_all_mems: # metatask: run_post_mem#mem#_all_fhrs taskdep: attrs: - task: '{% for h in range(0, workflow.LONG_FCST_LEN+1) %}{% if h > 0 %}{{" \n"}}{% endif %}{%- endfor -%}' + task: '{%- for h in range(0, workflow.LONG_FCST_LEN+1) %} + {%- if h > 0 %} + {{- " \n" }} + {%- endif %} + {%- endfor %}' # This "and" is to check whether post is being run inline (i.e. as part of # the weather model), and if so, to ensure that the forecast task for the # current member has completed. @@ -259,13 +266,13 @@ metatask_check_post_output_all_mems: taskvalid: <<: *fcst_task_exists -metatask_PcpCombine_fcst_APCP_all_accums_all_mems: +metatask_PcpCombine_APCP_all_accums_all_mems: var: ACCUM_HH: '{% for ah in verification.VX_APCP_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' - metatask_PcpCombine_fcst_APCP#ACCUM_HH#h_all_mems: + metatask_PcpCombine_APCP#ACCUM_HH#h_all_mems: var: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' - task_run_MET_PcpCombine_fcst_APCP#ACCUM_HH#h_mem#mem#: + task_run_MET_PcpCombine_APCP#ACCUM_HH#h_fcst_mem#mem#: <<: *default_task_verify_pre attrs: cycledefs: forecast @@ -273,7 +280,7 @@ metatask_PcpCombine_fcst_APCP_all_accums_all_mems: command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PCPCOMBINE"' envars: <<: *default_vars - VAR: APCP + FIELD_GROUP: 'APCP' ACCUM_HH: '#ACCUM_HH#' FCST_OR_OBS: FCST OBTYPE: CCPA @@ -287,13 +294,13 @@ metatask_PcpCombine_fcst_APCP_all_accums_all_mems: text: !cycstr '{{ workflow.EXPTDIR }}/@Y@m@d@H/post_files_exist_mem#mem#.txt' walltime: 00:30:00 -metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems: +metatask_PcpCombine_ASNOW_all_accums_all_mems: var: ACCUM_HH: '{% for ah in verification.VX_ASNOW_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' - metatask_PcpCombine_fcst_ASNOW#ACCUM_HH#h_all_mems: + metatask_PcpCombine_ASNOW#ACCUM_HH#h_all_mems: var: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' - task_run_MET_PcpCombine_fcst_ASNOW#ACCUM_HH#h_mem#mem#: + task_run_MET_PcpCombine_ASNOW#ACCUM_HH#h_fcst_mem#mem#: <<: *default_task_verify_pre attrs: cycledefs: forecast @@ -301,7 +308,7 @@ metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems: command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PCPCOMBINE"' envars: <<: *default_vars - VAR: ASNOW + FIELD_GROUP: 'ASNOW' ACCUM_HH: '#ACCUM_HH#' FCST_OR_OBS: FCST OBTYPE: NOHRSC diff --git a/scripts/exregional_check_post_output.sh b/scripts/exregional_check_post_output.sh index 2a66a2fecf..4d5836519c 100755 --- a/scripts/exregional_check_post_output.sh +++ b/scripts/exregional_check_post_output.sh @@ -11,7 +11,6 @@ # CDATE # ENSMEM_INDX # GLOBAL_VAR_DEFNS_FP -# VAR # METPLUS_ROOT (used by ush/set_leadhrs.py) # # Experiment variables diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 934ba63283..d02adddf77 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -107,7 +107,7 @@ FIELDNAME_IN_MET_FILEDIR_NAMES="" set_vx_params \ obtype="${OBTYPE}" \ - field="$VAR" \ + field_group="${FIELD_GROUP}" \ accum_hh="${ACCUM_HH}" \ outvarname_grid_or_point="grid_or_point" \ outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ @@ -158,7 +158,7 @@ if [ "${grid_or_point}" = "grid" ]; then elif [ "${grid_or_point}" = "point" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" - OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" + OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT}" FCST_INPUT_DIR="${vx_fcst_input_basedir}" fi @@ -183,7 +183,7 @@ for (( i=0; i<${NUM_ENS_MEMBERS}; i++ )); do time_lag=$( bc -l <<< "${ENS_TIME_LAG_HRS[$i]}*${SECS_PER_HOUR}" ) - if [ "${VAR}" = "APCP" ] || [ "${VAR}" = "ASNOW" ]; then + if [ "${FIELD_GROUP}" = "APCP" ] || [ "${FIELD_GROUP}" = "ASNOW" ]; then template="${cdate_ensmem_subdir_or_null:+${cdate_ensmem_subdir_or_null}/}metprd/PcpCombine_fcst/${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" else template="${FCST_SUBDIR_TEMPLATE}/${FCST_FN_TEMPLATE}" @@ -387,7 +387,7 @@ settings="\ 'accum_hh': '${ACCUM_HH:-}' 'accum_no_pad': '${ACCUM_NO_PAD:-}' 'metplus_templates_dir': '${METPLUS_CONF:-}' -'input_field_group': '${VAR:-}' +'input_field_group': '${FIELD_GROUP:-}' 'input_level_fcst': '${FCST_LEVEL:-}' 'input_thresh_fcst': '${FCST_THRESH:-}' # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index 6200b0ba7e..2641080fed 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -69,8 +69,8 @@ Entering script: \"${scrfunc_fn}\" In directory: \"${scrfunc_dir}\" This is the ex-script for the task that runs the METplus ${MetplusToolName} -tool to perform deterministic verification of the specified field (VAR) -for a single forecast. +tool to perform deterministic verification of the specified field gropup +(FIELD_GROUP) for a single forecast. ========================================================================" # #----------------------------------------------------------------------- @@ -98,7 +98,7 @@ FIELDNAME_IN_MET_FILEDIR_NAMES="" # ADPUPA field groups. set_vx_params \ obtype="${OBTYPE}" \ - field="$VAR" \ + field_group="${FIELD_GROUP}" \ accum_hh="${ACCUM_HH:-}" \ outvarname_grid_or_point="grid_or_point" \ outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ @@ -199,7 +199,7 @@ if [ "${grid_or_point}" = "grid" ]; then elif [ "${grid_or_point}" = "point" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" - OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" + OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT}" FCST_INPUT_DIR="${vx_fcst_input_basedir}" FCST_INPUT_FN_TEMPLATE="${FCST_SUBDIR_TEMPLATE:+${FCST_SUBDIR_TEMPLATE}/}${FCST_FN_TEMPLATE}" @@ -378,7 +378,7 @@ settings="\ 'accum_hh': '${ACCUM_HH:-}' 'accum_no_pad': '${ACCUM_NO_PAD:-}' 'metplus_templates_dir': '${METPLUS_CONF:-}' -'input_field_group': '${VAR:-}' +'input_field_group': '${FIELD_GROUP:-}' 'input_level_fcst': '${FCST_LEVEL:-}' 'input_thresh_fcst': '${FCST_THRESH:-}' # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index 0bfcff36d6..424756d72b 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -69,8 +69,8 @@ Entering script: \"${scrfunc_fn}\" In directory: \"${scrfunc_dir}\" This is the ex-script for the task that runs the METplus ${MetplusToolName} -tool to perform verification of the specified field (VAR) on the ensemble -mean. +tool to perform verification of the specified field group (FIELD_GROUP) +on the ensemble mean. ========================================================================" # #----------------------------------------------------------------------- @@ -96,7 +96,7 @@ FIELDNAME_IN_MET_FILEDIR_NAMES="" set_vx_params \ obtype="${OBTYPE}" \ - field="$VAR" \ + field_group="${FIELD_GROUP}" \ accum_hh="${ACCUM_HH}" \ outvarname_grid_or_point="grid_or_point" \ outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ @@ -143,7 +143,7 @@ if [ "${grid_or_point}" = "grid" ]; then elif [ "${grid_or_point}" = "point" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" - OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" + OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT}" FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/metprd/GenEnsProd" fi @@ -330,7 +330,7 @@ settings="\ 'accum_hh': '${ACCUM_HH:-}' 'accum_no_pad': '${ACCUM_NO_PAD:-}' 'metplus_templates_dir': '${METPLUS_CONF:-}' -'input_field_group': '${VAR:-}' +'input_field_group': '${FIELD_GROUP:-}' 'input_level_fcst': '${FCST_LEVEL:-}' 'input_thresh_fcst': '${FCST_THRESH:-}' # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 0e8d44578c..e6ad107e81 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -69,7 +69,8 @@ Entering script: \"${scrfunc_fn}\" In directory: \"${scrfunc_dir}\" This is the ex-script for the task that runs the METplus ${MetplusToolName} -tool to perform verification of the specified field (VAR) on the ensemble +tool to perform verification of the specified field group (FIELD_GROUP) +on the ensemble frequencies/probabilities. ========================================================================" # @@ -96,7 +97,7 @@ FIELDNAME_IN_MET_FILEDIR_NAMES="" set_vx_params \ obtype="${OBTYPE}" \ - field="$VAR" \ + field_group="${FIELD_GROUP}" \ accum_hh="${ACCUM_HH}" \ outvarname_grid_or_point="grid_or_point" \ outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ @@ -142,7 +143,7 @@ if [ "${grid_or_point}" = "grid" ]; then elif [ "${grid_or_point}" = "point" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" - OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" + OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT}" fi OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_INPUT_FN_TEMPLATE} ) @@ -329,7 +330,7 @@ settings="\ 'accum_hh': '${ACCUM_HH:-}' 'accum_no_pad': '${ACCUM_NO_PAD:-}' 'metplus_templates_dir': '${METPLUS_CONF:-}' -'input_field_group': '${VAR:-}' +'input_field_group': '${FIELD_GROUP:-}' 'input_level_fcst': '${FCST_LEVEL:-}' 'input_thresh_fcst': '${FCST_THRESH:-}' # diff --git a/scripts/exregional_run_met_pb2nc_obs.sh b/scripts/exregional_run_met_pb2nc_obs.sh index 3e6631cd1d..d1d055fe66 100755 --- a/scripts/exregional_run_met_pb2nc_obs.sh +++ b/scripts/exregional_run_met_pb2nc_obs.sh @@ -118,7 +118,7 @@ FIELDNAME_IN_MET_FILEDIR_NAMES="" set_vx_params \ obtype="${OBTYPE}" \ - field="$VAR" \ + field_group="ADP${FIELD_GROUP}" \ accum_hh="${ACCUM_HH}" \ outvarname_grid_or_point="grid_or_point" \ outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ @@ -140,7 +140,7 @@ OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_NDAS_FN_TEMPLATES[1]} ) OUTPUT_BASE="${vx_output_basedir}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_obs" -OUTPUT_FN_TEMPLATE=$( eval echo ${OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT} ) +OUTPUT_FN_TEMPLATE=$( eval echo ${OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT} ) STAGING_DIR="${OUTPUT_BASE}/stage/${MetplusToolName}_obs" # #----------------------------------------------------------------------- diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index c60ac30e36..23b10f6ff8 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -98,7 +98,7 @@ FIELDNAME_IN_MET_FILEDIR_NAMES="" set_vx_params \ obtype="${OBTYPE}" \ - field="$VAR" \ + field="${FIELD_GROUP}" \ accum_hh="${ACCUM_HH}" \ outvarname_grid_or_point="grid_or_point" \ outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ @@ -199,7 +199,7 @@ elif [ "${FCST_OR_OBS}" = "OBS" ]; then OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_obs_or_null}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_obs" - fn_template=$(eval echo \${OBS_${OBTYPE}_${VAR}_FN_TEMPLATE_PCPCOMBINE_OUTPUT}) + fn_template=$(eval echo \${OBS_${OBTYPE}_${FIELD_GROUP}_FN_TEMPLATE_PCPCOMBINE_OUTPUT}) OUTPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" @@ -394,7 +394,7 @@ settings="\ 'output_accum_hh': '${ACCUM_HH:-}' 'accum_no_pad': '${ACCUM_NO_PAD:-}' 'metplus_templates_dir': '${METPLUS_CONF:-}' - 'input_field_group': '${VAR:-}' + 'input_field_group': '${FIELD_GROUP:-}' 'input_level_fcst': '${FCST_LEVEL:-}' 'input_thresh_fcst': '${FCST_THRESH:-}' " diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml index b21755f3cf..e5db0cd451 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml @@ -50,7 +50,7 @@ verification: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false # - OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] + OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] # VX_FCST_MODEL_NAME: 'fcnv2' VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml index 6c330f9ba5..5411b0a34f 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml @@ -50,7 +50,7 @@ verification: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false # - OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] + OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] # VX_FCST_MODEL_NAME: 'gc' VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml index a354793981..9a088cf468 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml @@ -50,7 +50,7 @@ verification: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false # - OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] + OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] # VX_FCST_MODEL_NAME: 'pw' VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml index b7ded54bb9..9fbca68833 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml @@ -53,7 +53,7 @@ verification: REMOVE_RAW_OBS_NDAS: false REMOVE_RAW_OBS_NOHRSC: false # - OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] + OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', 'gdas/{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] # VX_FCST_MODEL_NAME: 'gfs' VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_gfs.yaml' diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index fb1ff66142..aa4cdf99e0 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2353,7 +2353,7 @@ verification: # included VX_FIELD_GROUPS, but it may be added to this list in order to # include the verification tasks for ASNOW in the workflow. # - VX_FIELD_GROUPS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] + VX_FIELD_GROUPS: [ "APCP", "REFC", "RETOP", "SFC", "UPA" ] # # VX_APCP_ACCUMS_HRS: # The accumulation intervals (in hours) to include in the verification of @@ -2546,7 +2546,7 @@ verification: {{- "sfav2_CONUS_" ~ obs_avail_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' ] OBS_MRMS_FN_TEMPLATES: [ 'REFC', '{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2', 'RETOP', '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' ] - OBS_NDAS_FN_TEMPLATES: [ 'ADPSFCandADPUPA', 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' ] + OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' ] # # REMOVE_RAW_OBS_DIRS_[CCPA|NOHRSC|MRMS|NDAS]: # Flag specifying whether to remove the "raw" observation directories @@ -2580,13 +2580,14 @@ verification: OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{%- set obs_avail_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} {{- "sfav2_CONUS_" ~ obs_avail_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2_a${ACCUM_HH}h.nc" }}' # - # OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: + # OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT: # METplus template for the names of the NetCDF files generated by the - # worfklow verification tasks that call METplus's Pb2nc tool on NDAS - # observations. These files will contain the observed ADPSFC or ADPUPA - # fields in NetCDF format (instead of NDAS's native prepbufr format). + # worfklow verification tasks that call METplus's Pb2nc tool on the + # prepbufr files in NDAS observations. These files will contain the + # observed surface (SFC) and upper-air (UPA) fields in NetCDF format + # (instead of NDAS's native prepbufr format). # - OBS_NDAS_ADPSFCandADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: '${OBS_NDAS_FN_TEMPLATES[1]}.nc' + OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT: '${OBS_NDAS_FN_TEMPLATES[1]}.nc' # # NUM_MISSING_OBS_FILES_MAX: # For verification tasks that need observational data, this specifies diff --git a/ush/set_vx_params.sh b/ush/set_vx_params.sh index 267cd6902f..e17a360c38 100644 --- a/ush/set_vx_params.sh +++ b/ush/set_vx_params.sh @@ -3,9 +3,10 @@ # # This file defines a function that sets various parameters needed when # performing verification. The way these parameters are set depends on -# the field being verified and, if the field is cumulative (e.g. -# accumulated precipitation or snowfall), the accumulation period -# (both of which are inputs to this function). +# the field group being verified and, if the field group consists of a +# set of cumulative fields (e.g. accumulated precipitation or accumulated +# snowfall), the accumulation interval (both of which are inputs to this +# function). # # As of 20220928, the verification tasks in the SRW App workflow use the # MET/METplus software (MET = Model Evaluation Tools) developed at the @@ -54,7 +55,7 @@ function set_vx_params() { # local valid_args=( \ "obtype" \ - "field" \ + "field_group" \ "accum_hh" \ "outvarname_grid_or_point" \ "outvarname_fieldname_in_obs_input" \ @@ -108,15 +109,17 @@ be a 2-digit integer: # # grid_or_point: # String that is set to either "grid" or "point" depending on whether -# the field in consideration has obs that are gridded or point-based. +# obs type containing the field group is gridded or point-based. # # fieldname_in_obs_input: -# String used to search for the field in the input observation files -# read in by MET. +# If the field group represents a single field, this is the string used +# to search for that field in the input observation files read in by MET. +# If not, this is set to a null string. # # fieldname_in_fcst_input: -# String used to search for the field in the input forecast files read -# in by MET. +# If the field group represents a single field, this is the string used +# to search for that field in the input forecast files read in by MET. +# If not, this is set to a null string. # # fieldname_in_MET_output: # String that will be used in naming arrays defined in MET output files @@ -140,21 +143,21 @@ be a 2-digit integer: "CCPA") _grid_or_point_="grid" - case "${field}" in + case "${field_group}" in "APCP") - fieldname_in_obs_input="${field}" - fieldname_in_fcst_input="${field}" - fieldname_in_MET_output="${field}" - fieldname_in_MET_filedir_names="${field}${accum_hh}h" + fieldname_in_obs_input="${field_group}" + fieldname_in_fcst_input="${field_group}" + fieldname_in_MET_output="${field_group}" + fieldname_in_MET_filedir_names="${field_group}${accum_hh}h" ;; *) print_err_msg_exit "\ A method for setting verification parameters has not been specified for -this observation type (obtype) and field (field) combination: +this observation type (obtype) and field group (field_group) combination: obtype = \"${obtype}\" - field = \"${field}\"" + field_group = \"${field_group}\"" ;; esac @@ -163,21 +166,21 @@ this observation type (obtype) and field (field) combination: "NOHRSC") _grid_or_point_="grid" - case "${field}" in + case "${field_group}" in "ASNOW") - fieldname_in_obs_input="${field}" - fieldname_in_fcst_input="${field}" - fieldname_in_MET_output="${field}" - fieldname_in_MET_filedir_names="${field}${accum_hh}h" + fieldname_in_obs_input="${field_group}" + fieldname_in_fcst_input="${field_group}" + fieldname_in_MET_output="${field_group}" + fieldname_in_MET_filedir_names="${field_group}${accum_hh}h" ;; *) print_err_msg_exit "\ A method for setting verification parameters has not been specified for -this observation type (obtype) and field (field) combination: +this observation type (obtype) and field group (field_group) combination: obtype = \"${obtype}\" - field = \"${field}\"" + field_group = \"${field_group}\"" ;; esac @@ -186,28 +189,28 @@ this observation type (obtype) and field (field) combination: "MRMS") _grid_or_point_="grid" - case "${field}" in + case "${field_group}" in "REFC") fieldname_in_obs_input="MergedReflectivityQCComposite" - fieldname_in_fcst_input="${field}" - fieldname_in_MET_output="${field}" - fieldname_in_MET_filedir_names="${field}" + fieldname_in_fcst_input="${field_group}" + fieldname_in_MET_output="${field_group}" + fieldname_in_MET_filedir_names="${field_group}" ;; "RETOP") fieldname_in_obs_input="EchoTop18" - fieldname_in_fcst_input="${field}" - fieldname_in_MET_output="${field}" - fieldname_in_MET_filedir_names="${field}" + fieldname_in_fcst_input="${field_group}" + fieldname_in_MET_output="${field_group}" + fieldname_in_MET_filedir_names="${field_group}" ;; *) print_err_msg_exit "\ A method for setting verification parameters has not been specified for -this observation type (obtype) and field (field) combination: +this observation type (obtype) and field group (field_group) combination: obtype = \"${obtype}\" - field = \"${field}\"" + field_group = \"${field_group}\"" ;; esac @@ -216,28 +219,28 @@ this observation type (obtype) and field (field) combination: "NDAS") _grid_or_point_="point" - case "${field}" in + case "${field_group}" in "ADPSFC") fieldname_in_obs_input="" fieldname_in_fcst_input="" - fieldname_in_MET_output="${field}" - fieldname_in_MET_filedir_names="${field}" + fieldname_in_MET_output="${field_group}" + fieldname_in_MET_filedir_names="${field_group}" ;; "ADPUPA") fieldname_in_obs_input="" fieldname_in_fcst_input="" - fieldname_in_MET_output="${field}" - fieldname_in_MET_filedir_names="${field}" + fieldname_in_MET_output="${field_group}" + fieldname_in_MET_filedir_names="${field_group}" ;; *) print_err_msg_exit "\ A method for setting verification parameters has not been specified for -this observation type (obtype) and field (field) combination: +this observation type (obtype) and field group (field_group) combination: obtype = \"${obtype}\" - field = \"${field}\"" + field_group = \"${field_group}\"" ;; esac diff --git a/ush/setup.py b/ush/setup.py index 01db240362..b9ca7df848 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -737,35 +737,35 @@ def _remove_tag(tasks, tag): vx_field_groups_all_by_obtype["CCPA"] = ["APCP"] vx_metatasks_all_by_obtype["CCPA"] \ = ["task_get_obs_ccpa", - "metatask_PcpCombine_obs_APCP_all_accums_CCPA", - "metatask_PcpCombine_fcst_APCP_all_accums_all_mems", - "metatask_GridStat_CCPA_all_accums_all_mems", - "metatask_GenEnsProd_EnsembleStat_CCPA", - "metatask_GridStat_CCPA_ensmeanprob_all_accums"] + "metatask_PcpCombine_APCP_all_accums_obs_CCPA", + "metatask_PcpCombine_APCP_all_accums_all_mems", + "metatask_GridStat_APCP_all_accums_all_mems", + "metatask_GenEnsProd_EnsembleStat_APCP_all_accums", + "metatask_GridStat_ensmeanprob_APCP_all_accums"] vx_field_groups_all_by_obtype["NOHRSC"] = ["ASNOW"] vx_metatasks_all_by_obtype["NOHRSC"] \ = ["task_get_obs_nohrsc", - "metatask_PcpCombine_obs_ASNOW_all_accums_NOHRSC", - "metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems", - "metatask_GridStat_NOHRSC_all_accums_all_mems", - "metatask_GenEnsProd_EnsembleStat_NOHRSC", - "metatask_GridStat_NOHRSC_ensmeanprob_all_accums"] + "metatask_PcpCombine_ASNOW_all_accums_obs_NOHRSC", + "metatask_PcpCombine_ASNOW_all_accums_all_mems", + "metatask_GridStat_ASNOW_all_accums_all_mems", + "metatask_GenEnsProd_EnsembleStat_ASNOW_all_accums", + "metatask_GridStat_ensmeanprob_ASNOW_all_accums"] vx_field_groups_all_by_obtype["MRMS"] = ["REFC", "RETOP"] vx_metatasks_all_by_obtype["MRMS"] \ = ["task_get_obs_mrms", - "metatask_GridStat_MRMS_all_mems", - "metatask_GenEnsProd_EnsembleStat_MRMS", - "metatask_GridStat_MRMS_ensprob"] + "metatask_GridStat_REFC_RETOP_all_mems", + "metatask_GenEnsProd_EnsembleStat_REFC_RETOP", + "metatask_GridStat_ensprob_REFC_RETOP"] vx_field_groups_all_by_obtype["NDAS"] = ["ADPSFC", "ADPUPA"] vx_metatasks_all_by_obtype["NDAS"] \ = ["task_get_obs_ndas", "task_run_MET_Pb2nc_obs_NDAS", - "metatask_PointStat_NDAS_all_mems", - "metatask_GenEnsProd_EnsembleStat_NDAS", - "metatask_PointStat_NDAS_ensmeanprob"] + "metatask_PointStat_SFC_UPA_all_mems", + "metatask_GenEnsProd_EnsembleStat_SFC_UPA", + "metatask_PointStat_ensmeanprob_SFC_UPA"] # If there are no field groups specified for verification, remove those # tasks that are common to all observation types. @@ -785,8 +785,8 @@ def _remove_tag(tasks, tag): f""" Removing verification (meta)task "{metatask}" - from workflow since no fields belonging to observation type "{obtype}" - are specified for verification.""" + from workflow since no field groups from observation type "{obtype}" are + specified for verification.""" )) rocoto_config['tasks'].pop(metatask) # diff --git a/ush/valid_param_vals.yaml b/ush/valid_param_vals.yaml index 017404aa2e..16f0aeb9ae 100644 --- a/ush/valid_param_vals.yaml +++ b/ush/valid_param_vals.yaml @@ -76,6 +76,6 @@ valid_vals_DO_AQM_CHEM_LBCS: [True, False] valid_vals_DO_AQM_GEFS_LBCS: [True, False] valid_vals_DO_AQM_SAVE_AIRNOW_HIST: [True, False] valid_vals_COLDSTART: [True, False] -valid_vals_VX_FIELD_GROUPS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] +valid_vals_VX_FIELD_GROUPS: [ "APCP", "ASNOW", "REFC", "RETOP", "SFC", "UPA" ] valid_vals_VX_APCP_ACCUMS_HRS: [ 1, 3, 6, 24 ] valid_vals_VX_ASNOW_ACCUMS_HRS: [ 6, 12, 18, 24 ] From 522c573aab91a21ff053fc0b2e1c32066c747c2f Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sat, 2 Nov 2024 20:49:15 -0600 Subject: [PATCH 181/260] Changes from the feature/daily_obs_tasks_doc_mods that were accidentally left out. --- doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst | 1 + ush/config.community.yaml | 5 ----- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst index 231dc49dd5..01bc917594 100644 --- a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst +++ b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst @@ -1607,6 +1607,7 @@ Non-default parameters for verification tasks are set in the ``verification:`` s * ``mm`` refers to the 2-digit valid minutes of the hour * ``SS`` refers to the two-digit valid seconds of the hour +.. _GeneralVXParams: General VX Parameters --------------------------------- diff --git a/ush/config.community.yaml b/ush/config.community.yaml index f380bd28cc..1ce7fc0108 100644 --- a/ush/config.community.yaml +++ b/ush/config.community.yaml @@ -30,11 +30,6 @@ task_plot_allvars: global: DO_ENSEMBLE: false NUM_ENS_MEMBERS: 2 -verification: - CCPA_OBS_DIR: "" - MRMS_OBS_DIR: "" - NDAS_OBS_DIR: "" - VX_FCST_MODEL_NAME: FV3_GFS_v16_CONUS_25km rocoto: tasks: metatask_run_ensemble: From 2c8b015d5007b39039747fedd350e43dadecefea Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 3 Nov 2024 02:53:15 -0700 Subject: [PATCH 182/260] Move the 4 new WE2E vx tests that were recently added to coverage.jet to the set of coverage tests for Hera since there is no data staged on Jet yet. --- tests/WE2E/machine_suites/coverage.hera.gnu.com | 2 ++ tests/WE2E/machine_suites/coverage.hera.intel.nco | 2 ++ tests/WE2E/machine_suites/coverage.jet | 4 ---- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/WE2E/machine_suites/coverage.hera.gnu.com b/tests/WE2E/machine_suites/coverage.hera.gnu.com index 09dadbaedd..e820e6327e 100644 --- a/tests/WE2E/machine_suites/coverage.hera.gnu.com +++ b/tests/WE2E/machine_suites/coverage.hera.gnu.com @@ -10,6 +10,8 @@ MET_ensemble_verification_only_vx_time_lag 2019_halloween_storm 2020_jan_cold_blast vx-det_long-fcst_custom-vx-config_aiml-fourcastnet +vx-det_long-fcst_custom-vx-config_aiml-panguweather vx-det_long-fcst_custom-vx-config_gfs vx-det_long-fcst_winter-wx_SRW-staged vx-det_multicyc_fcst-overlap_ncep-hrrr +vx-det_multicyc_last-obs-00z_ncep-hrrr diff --git a/tests/WE2E/machine_suites/coverage.hera.intel.nco b/tests/WE2E/machine_suites/coverage.hera.intel.nco index cf8b92b59f..ba90d0f5dc 100644 --- a/tests/WE2E/machine_suites/coverage.hera.intel.nco +++ b/tests/WE2E/machine_suites/coverage.hera.intel.nco @@ -11,6 +11,8 @@ grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16 grid_RRFS_CONUScompact_3km_ics_HRRR_lbcs_RAP_suite_HRRR pregen_grid_orog_sfc_climo vx-det_long-fcst_custom-vx-config_aiml-graphcast +vx-det_multicyc_long-fcst-overlap_nssl-mpas vx-det_multicyc_long-fcst-no-overlap_nssl-mpas vx-det_multicyc_first-obs-00z_ncep-hrrr vx-det_multicyc_no-00z-obs_nssl-mpas +vx-det_multicyc_no-fcst-overlap_ncep-hrrr diff --git a/tests/WE2E/machine_suites/coverage.jet b/tests/WE2E/machine_suites/coverage.jet index 5078e127ef..53308090b1 100644 --- a/tests/WE2E/machine_suites/coverage.jet +++ b/tests/WE2E/machine_suites/coverage.jet @@ -9,7 +9,3 @@ grid_RRFS_AK_3km_ics_FV3GFS_lbcs_FV3GFS_suite_HRRR grid_RRFS_CONUS_13km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16_plot grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15p2 grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_RRFS_v1beta -vx-det_long-fcst_custom-vx-config_aiml-panguweather -vx-det_multicyc_long-fcst-overlap_nssl-mpas -vx-det_multicyc_last-obs-00z_ncep-hrrr -vx-det_multicyc_no-fcst-overlap_ncep-hrrr From f13988926c33ddceca456dbc9a1b2b575658f86f Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 3 Nov 2024 02:59:47 -0700 Subject: [PATCH 183/260] Bug fix related to removing "ADP" from "ADPSFC" and "ADPUPA". --- .../config.custom_ESGgrid_Great_Lakes_snow_8km.yaml | 2 +- .../config.MET_ensemble_verification_winter_wx.yaml | 2 +- ...nfig.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml | 2 +- ...config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml | 2 +- ...fig.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml | 2 +- .../config.vx-det_long-fcst_custom-vx-config_gfs.yaml | 2 +- .../config.vx-det_long-fcst_winter-wx_SRW-staged.yaml | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml b/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml index a55cc5f91a..ffacb0a8cb 100644 --- a/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml +++ b/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml @@ -60,4 +60,4 @@ verification: NDAS_OBS_DIR: '{{ workflow.EXPTDIR }}/NDAS_obs' NOHRSC_OBS_DIR: '{{ workflow.EXPTDIR }}/NOHRSC_obs' VX_FCST_MODEL_NAME: Michigan_Ontario_snow_8km - VX_FIELD_GROUPS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA", "ASNOW" ] + VX_FIELD_GROUPS: [ "APCP", "REFC", "RETOP", "SFC", "UPA", "ASNOW" ] diff --git a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml index fc6c9f56af..7f761117bb 100644 --- a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml @@ -31,7 +31,7 @@ global: DO_ENSEMBLE: true NUM_ENS_MEMBERS: 10 verification: - VX_FIELD_GROUPS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] + VX_FIELD_GROUPS: [ "APCP", "ASNOW", "REFC", "RETOP", "SFC", "UPA" ] OBS_NOHRSC_FN_TEMPLATES: [ 'ASNOW', '{%- set data_intvl_hrs = "%02d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} {{- "{valid?fmt=%Y%m%d}/sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' ] diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml index e5db0cd451..6e6caff5eb 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml @@ -60,4 +60,4 @@ verification: FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' # VX_FCST_OUTPUT_INTVL_HRS: 6 - VX_FIELD_GROUPS: [ "ADPSFC" ] + VX_FIELD_GROUPS: [ "SFC" ] diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml index 5411b0a34f..95b63a3d0c 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml @@ -60,4 +60,4 @@ verification: FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' # VX_FCST_OUTPUT_INTVL_HRS: 6 - VX_FIELD_GROUPS: [ "ADPSFC" ] + VX_FIELD_GROUPS: [ "SFC" ] diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml index 9a088cf468..796042fd81 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml @@ -60,4 +60,4 @@ verification: FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' # VX_FCST_OUTPUT_INTVL_HRS: 6 - VX_FIELD_GROUPS: [ "ADPSFC" ] + VX_FIELD_GROUPS: [ "SFC" ] diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml index 9fbca68833..d755752d5f 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml @@ -63,4 +63,4 @@ verification: FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.pgrb2.0p25.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' # VX_FCST_OUTPUT_INTVL_HRS: 6 - VX_FIELD_GROUPS: [ "ADPSFC" ] + VX_FIELD_GROUPS: [ "SFC" ] diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml index 11eaf7b63c..87b9f44631 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml @@ -58,5 +58,5 @@ verification: REMOVE_RAW_OBS_NOHRSC: false # VX_FCST_MODEL_NAME: 'Michigan_Ontario_snow_8km' - VX_FIELD_GROUPS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] + VX_FIELD_GROUPS: [ "APCP", "ASNOW", "REFC", "RETOP", "SFC", "UPA" ] VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/SRW' From 3ef4e810f2ef7ad35e3a57e471b448d7b2983bd5 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 3 Nov 2024 03:50:31 -0700 Subject: [PATCH 184/260] Bug fixes. --- scripts/exregional_run_met_pcpcombine.sh | 2 +- ush/config_defaults.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 23b10f6ff8..9ff0ee5ada 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -98,7 +98,7 @@ FIELDNAME_IN_MET_FILEDIR_NAMES="" set_vx_params \ obtype="${OBTYPE}" \ - field="${FIELD_GROUP}" \ + field_group="${FIELD_GROUP}" \ accum_hh="${ACCUM_HH}" \ outvarname_grid_or_point="grid_or_point" \ outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index aa4cdf99e0..10bf82c0a4 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2662,7 +2662,7 @@ verification: {%- if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %} {{- ".${ensmem_name}" }} {%- endif %} - {{- ".prslev.f{lead?fmt=%HHH}.${POST_OUTPUT_DOMAIN_NAME}_${VAR}_a${ACCUM_HH}h.nc" }}' + {{- ".prslev.f{lead?fmt=%HHH}.${POST_OUTPUT_DOMAIN_NAME}_${FIELD_GROUP}_a${ACCUM_HH}h.nc" }}' # # VX_NDIGITS_ENSMEM_NAMES: # Number of digits to assume/use in the forecast ensemble member identifier From 1fce8276c0ba8bd183aabaa461273bdbde9c9fb1 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 3 Nov 2024 03:50:52 -0700 Subject: [PATCH 185/260] Address Mike K.'s comment. --- ush/eval_metplus_timestr_tmpl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/eval_metplus_timestr_tmpl.py b/ush/eval_metplus_timestr_tmpl.py index edbe0e7012..205fee1593 100644 --- a/ush/eval_metplus_timestr_tmpl.py +++ b/ush/eval_metplus_timestr_tmpl.py @@ -52,7 +52,7 @@ def eval_metplus_timestr_tmpl(init_time, lhr, time_lag, fn_template, verbose=Fal ) parser.add_argument("-v", "--verbose", help="Verbose output", action="store_true") parser.add_argument("-i", "--init_time", help="Initial date in YYYYMMDDHH[mmss] format", type=str, default='') - parser.add_argument("-f", "--lhr", help="Forecast hour", type=int, required=True) + parser.add_argument("-l", "--lhr", help="Lead hour", type=int, required=True) parser.add_argument("-tl", "--time_lag", help="Hours of time lag for a time-lagged ensemble member", type=int, default=0) parser.add_argument("-ft", "--fn_template", help="Template for file names to search; see ??? for details on template settings", type=str, default='') From 22cdd37d3a0ef98736d1defcb7fed8375b6917c2 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 3 Nov 2024 14:54:35 -0700 Subject: [PATCH 186/260] Bug fixes related to (meta)task name and variable name changes. --- parm/wflow/verify_ens.yaml | 6 +++--- scripts/exregional_run_met_gridstat_or_pointstat_vx.sh | 4 ++-- .../config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml | 2 +- .../config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml | 2 +- .../config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml | 2 +- ...nfig.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml | 2 +- .../config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml | 2 +- .../config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml | 2 +- .../config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml | 2 +- ush/set_cycle_and_obs_timeinfo.py | 2 +- ush/setup.py | 2 +- 11 files changed, 14 insertions(+), 14 deletions(-) diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index 63d5392af1..07444cf757 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -39,7 +39,7 @@ metatask_GenEnsProd_EnsembleStat_APCP_all_accums: dependency: metataskdep_pcpcombine_fcst: attrs: - metatask: PcpCombine_APCP#ACCUM_HH#h_fcst_all_mems + metatask: PcpCombine_APCP#ACCUM_HH#h_all_mems task_run_MET_EnsembleStat_vx_APCP#ACCUM_HH#h: <<: *task_GenEnsProd_CCPA envars: @@ -144,7 +144,7 @@ metatask_GenEnsProd_EnsembleStat_REFC_RETOP: metatask_GenEnsProd_EnsembleStat_SFC_UPA: var: - FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["SFC", "UPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' task_run_MET_GenEnsProd_vx_#FIELD_GROUP#: &task_GenEnsProd_NDAS <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GENENSPROD_OR_ENSEMBLESTAT"' @@ -275,7 +275,7 @@ metatask_PointStat_ensmeanprob_SFC_UPA: statlc: mean prob metatask_PointStat_ens#statlc#_SFC_UPA: var: - FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["SFC", "UPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' task_run_MET_PointStat_vx_ens#statlc#_#FIELD_GROUP#: <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX_ENS#stat#"' diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index 2641080fed..654983c70e 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -94,8 +94,8 @@ FIELDNAME_IN_FCST_INPUT="" FIELDNAME_IN_MET_OUTPUT="" FIELDNAME_IN_MET_FILEDIR_NAMES="" -# Note that ACCUM_HH will not be defined for the REFC, RETOP, ADPSFC, and -# ADPUPA field groups. +# Note that ACCUM_HH will not be defined for the REFC, RETOP, SFC, and +# UPA field groups. set_vx_params \ obtype="${OBTYPE}" \ field_group="${FIELD_GROUP}" \ diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml index ced46215d0..c7caeec015 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml @@ -58,4 +58,4 @@ verification: VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml index 3ce4ff5f08..834c83d4f4 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml @@ -58,4 +58,4 @@ verification: VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml index a0f10d8b05..ba711145b5 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml @@ -58,4 +58,4 @@ verification: VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml index 429e8e0086..b82dfb5e42 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml @@ -60,4 +60,4 @@ verification: VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml index a7af3f27c9..5cd1b35ac2 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml @@ -59,4 +59,4 @@ verification: VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml index 3264c93eca..5972bfa002 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml @@ -60,4 +60,4 @@ verification: VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml index 97e1393864..f3e18104d2 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml @@ -60,4 +60,4 @@ verification: VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.linked/{{- verification.VX_FCST_MODEL_NAME }}' FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${VAR}_a${ACCUM_HH}h.nc' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index 36c20e126c..31f615d573 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -634,7 +634,7 @@ def get_obs_retrieve_times_by_day( = [{'obtype': 'CCPA', 'time_type': 'cumul', 'field_groups': ['APCP']}, {'obtype': 'NOHRSC', 'time_type': 'cumul', 'field_groups': ['ASNOW']}, {'obtype': 'MRMS', 'time_type': 'inst', 'field_groups': ['REFC', 'RETOP']}, - {'obtype': 'NDAS', 'time_type': 'inst', 'field_groups': ['ADPSFC', 'ADPUPA']} + {'obtype': 'NDAS', 'time_type': 'inst', 'field_groups': ['SFC', 'UPA']} ] # Create new list that has the same form as the list of dictionaries diff --git a/ush/setup.py b/ush/setup.py index b9ca7df848..e19864cee6 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -759,7 +759,7 @@ def _remove_tag(tasks, tag): "metatask_GenEnsProd_EnsembleStat_REFC_RETOP", "metatask_GridStat_ensprob_REFC_RETOP"] - vx_field_groups_all_by_obtype["NDAS"] = ["ADPSFC", "ADPUPA"] + vx_field_groups_all_by_obtype["NDAS"] = ["SFC", "UPA"] vx_metatasks_all_by_obtype["NDAS"] \ = ["task_get_obs_ndas", "task_run_MET_Pb2nc_obs_NDAS", From cc7dcdd838e3abd0775eb16a44084ae3ff3ff6d2 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 3 Nov 2024 17:47:06 -0700 Subject: [PATCH 187/260] Bug fix in METplus conf templates to account for the field group names 'ADPSFC' and 'ADPUPA' being changed to 'SFC' and 'UPA'. --- parm/metplus/EnsembleStat.conf | 32 ++++++++++++------------- parm/metplus/GenEnsProd.conf | 4 ++-- parm/metplus/GridStat_ensmean.conf | 8 +++---- parm/metplus/GridStat_or_PointStat.conf | 8 +++---- parm/metplus/PointStat_ensmean.conf | 8 +++---- parm/metplus/PointStat_ensprob.conf | 6 ++--- 6 files changed, 33 insertions(+), 33 deletions(-) diff --git a/parm/metplus/EnsembleStat.conf b/parm/metplus/EnsembleStat.conf index ce38b2d209..15ba1d9321 100644 --- a/parm/metplus/EnsembleStat.conf +++ b/parm/metplus/EnsembleStat.conf @@ -87,7 +87,7 @@ OBS_FILE_WINDOW_BEGIN = -300 OBS_FILE_WINDOW_END = 300 OBS_{{METPLUS_TOOL_NAME}}_WINDOW_BEGIN = 0 OBS_{{METPLUS_TOOL_NAME}}_WINDOW_END = 0 -{%- elif input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- elif input_field_group in ['SFC', 'UPA'] %} OBS_WINDOW_BEGIN = -1799 OBS_WINDOW_END = 1800 OBS_{{METPLUS_TOOL_NAME}}_WINDOW_BEGIN = {OBS_WINDOW_BEGIN} @@ -105,7 +105,7 @@ OBS_{{METPLUS_TOOL_NAME}}_WINDOW_END = {OBS_WINDOW_END} # ens.vld_thresh value in the MET config file {{METPLUS_TOOL_NAME}}_ENS_VLD_THRESH = 1.0 -{%- if input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- if input_field_group in ['SFC', 'UPA'] %} {{METPLUS_TOOL_NAME}}_OBS_QUALITY_INC = 0, 1, 2, 3, 9, NA #{{METPLUS_TOOL_NAME}}_OBS_QUALITY_EXC = @@ -118,7 +118,7 @@ OBS_{{METPLUS_TOOL_NAME}}_WINDOW_END = {OBS_WINDOW_END} {{METPLUS_TOOL_NAME}}_MET_OBS_ERR_TABLE = {MET_BASE}/table_files/obs_error_table.txt {%- elif input_field_group in ['REFC', 'RETOP'] %} {{METPLUS_TOOL_NAME}}_MET_OBS_ERR_TABLE = -{%- elif input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- elif input_field_group in ['SFC', 'UPA'] %} {{METPLUS_TOOL_NAME}}_MET_OBS_ERR_TABLE = {MET_BASE}/table_files/obs_error_table.txt {%- endif %} @@ -135,7 +135,7 @@ OBS_{{METPLUS_TOOL_NAME}}_WINDOW_END = {OBS_WINDOW_END} {%- set comment_or_null = '' %} {%- set regrid_to_grid = 'FCST' %} {%- set regrid_method = 'BUDGET' %} -{%- elif input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- elif input_field_group in ['SFC', 'UPA'] %} {%- set comment_or_null = '#' %} {%- set regrid_to_grid = 'NONE' %} {%- set regrid_method = 'BILIN' %} @@ -160,8 +160,8 @@ OBS_{{METPLUS_TOOL_NAME}}_WINDOW_END = {OBS_WINDOW_END} {{METPLUS_TOOL_NAME}}_DUPLICATE_FLAG = NONE {{METPLUS_TOOL_NAME}}_SKIP_CONST = TRUE {{METPLUS_TOOL_NAME}}_OBS_ERROR_FLAG = FALSE -{%- elif input_field_group in ['ADPSFC', 'ADPUPA'] %} -{{METPLUS_TOOL_NAME}}_MESSAGE_TYPE = {{input_field_group}} +{%- elif input_field_group in ['SFC', 'UPA'] %} +{{METPLUS_TOOL_NAME}}_MESSAGE_TYPE = {{fieldname_in_met_filedir_names}} {{METPLUS_TOOL_NAME}}_DUPLICATE_FLAG = NONE {{METPLUS_TOOL_NAME}}_SKIP_CONST = FALSE {{METPLUS_TOOL_NAME}}_OBS_ERROR_FLAG = FALSE @@ -197,7 +197,7 @@ OBS_{{METPLUS_TOOL_NAME}}_WINDOW_END = {OBS_WINDOW_END} {%- set comment_or_null = '' %} {%- elif input_field_group in ['REFC', 'RETOP'] %} {%- set comment_or_null = '' %} -{%- elif input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- elif input_field_group in ['SFC', 'UPA'] %} {%- set comment_or_null = '#' %} {%- endif %} {{comment_or_null}}{{METPLUS_TOOL_NAME}}_CLIMO_CDF_BINS = 1 @@ -207,7 +207,7 @@ OBS_{{METPLUS_TOOL_NAME}}_WINDOW_END = {OBS_WINDOW_END} {{METPLUS_TOOL_NAME}}_MASK_GRID = {%- elif input_field_group in ['REFC', 'RETOP'] %} {{METPLUS_TOOL_NAME}}_MASK_GRID = FULL -{%- elif input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- elif input_field_group in ['SFC', 'UPA'] %} {{METPLUS_TOOL_NAME}}_MASK_GRID = {%- endif %} @@ -483,7 +483,7 @@ FCST_VAR{{ns.var_count}}_OPTIONS = convert(x) = x * 3.28084 * 0.001; ;; Convert {{opts_indent}}ens_phist_bin_size = 0.05; {%- endif %} - {%- elif input_field_group == 'ADPSFC' %} + {%- elif input_field_group == 'SFC' %} {%- if field_fcst == 'HGT' %} FCST_VAR{{ns.var_count}}_OPTIONS = GRIB_lvl_typ = 215; @@ -499,7 +499,7 @@ FCST_VAR{{ns.var_count}}_OPTIONS = interp = { type = [ { method = NEAREST; width FCST_VAR{{ns.var_count}}_OPTIONS = GRIB2_pdt = 0; ;; Derive instantaneous 10-m wind from U/V components, overriding max 10-m wind. {%- endif %} - {%- elif input_field_group == 'ADPUPA' %} + {%- elif input_field_group == 'UPA' %} {%- if field_fcst == 'CAPE' %} FCST_VAR{{ns.var_count}}_OPTIONS = cnt_thresh = [ >0 ]; @@ -608,7 +608,7 @@ OBS_VAR{{ns.var_count}}_OPTIONS = censor_thresh = lt-20; {{opts_indent}}ens_phist_bin_size = 0.05; {%- endif %} - {%- elif input_field_group == 'ADPSFC' %} + {%- elif input_field_group == 'SFC' %} {%- if field_obs in ['DPT', 'TMP', 'WIND'] %} OBS_VAR{{ns.var_count}}_OPTIONS = obs_error = { flag = TRUE; dist_type = NONE; dist_parm = []; inst_bias_scale = 1.0; inst_bias_offset = 0.0; min = NA; max = NA; } @@ -616,7 +616,7 @@ OBS_VAR{{ns.var_count}}_OPTIONS = obs_error = { flag = TRUE; dist_type = NONE; d OBS_VAR{{ns.var_count}}_OPTIONS = GRIB_lvl_typ = 215 {%- endif %} - {%- elif input_field_group == 'ADPUPA' %} + {%- elif input_field_group == 'UPA' %} {%- if field_obs in ['DPT', 'HGT', 'TMP', 'WIND'] %} OBS_VAR{{ns.var_count}}_OPTIONS = obs_error = { flag = TRUE; dist_type = NONE; dist_parm = []; inst_bias_scale = 1.0; inst_bias_offset = 0.0; min = NA; max = NA; } @@ -649,7 +649,7 @@ OUTPUT_BASE = {{output_base}} # # Point observation input directory for {{MetplusToolName}}. # -{%- if input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- if input_field_group in ['SFC', 'UPA'] %} OBS_{{METPLUS_TOOL_NAME}}_POINT_INPUT_DIR = {{obs_input_dir}} {%- else %} OBS_{{METPLUS_TOOL_NAME}}_POINT_INPUT_DIR = @@ -657,7 +657,7 @@ OBS_{{METPLUS_TOOL_NAME}}_POINT_INPUT_DIR = # # Grid observation input directory for {{MetplusToolName}}. # -{%- if input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- if input_field_group in ['SFC', 'UPA'] %} OBS_{{METPLUS_TOOL_NAME}}_GRID_INPUT_DIR = {%- else %} OBS_{{METPLUS_TOOL_NAME}}_GRID_INPUT_DIR = {{obs_input_dir}} @@ -691,7 +691,7 @@ STAGING_DIR = {{staging_dir}} # Template for point observation input to {{MetplusToolName}} relative to # OBS_{{METPLUS_TOOL_NAME}}_POINT_INPUT_DIR. # -{%- if input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- if input_field_group in ['SFC', 'UPA'] %} OBS_{{METPLUS_TOOL_NAME}}_POINT_INPUT_TEMPLATE = {{obs_input_fn_template}} {%- else %} OBS_{{METPLUS_TOOL_NAME}}_POINT_INPUT_TEMPLATE = @@ -700,7 +700,7 @@ OBS_{{METPLUS_TOOL_NAME}}_POINT_INPUT_TEMPLATE = # Template for gridded observation input to {{MetplusToolName}} relative to # OBS_{{METPLUS_TOOL_NAME}}_GRID_INPUT_DIR. # -{%- if input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- if input_field_group in ['SFC', 'UPA'] %} OBS_{{METPLUS_TOOL_NAME}}_GRID_INPUT_TEMPLATE = {%- else %} OBS_{{METPLUS_TOOL_NAME}}_GRID_INPUT_TEMPLATE = {{obs_input_fn_template}} diff --git a/parm/metplus/GenEnsProd.conf b/parm/metplus/GenEnsProd.conf index 17005ecd1a..153eae196b 100644 --- a/parm/metplus/GenEnsProd.conf +++ b/parm/metplus/GenEnsProd.conf @@ -328,7 +328,7 @@ Set forecast field options. ENS_VAR{{ns.var_count}}_OPTIONS = convert(x) = x * 3.28084 * 0.001; ;; Convert from meters to kilofeet. {%- endif %} - {%- elif input_field_group == 'ADPSFC' %} + {%- elif input_field_group == 'SFC' %} {%- if field_fcst == 'HGT' %} ENS_VAR{{ns.var_count}}_OPTIONS = GRIB_lvl_typ = 215; @@ -344,7 +344,7 @@ ENS_VAR{{ns.var_count}}_OPTIONS = interp = { type = [ { method = NEAREST; width ENS_VAR{{ns.var_count}}_OPTIONS = GRIB2_pdt = 0; ;; Derive instantaneous 10-m wind from U/V components, overriding max 10-m wind. {%- endif %} - {%- elif input_field_group == 'ADPUPA' %} + {%- elif input_field_group == 'UPA' %} {%- if field_fcst == 'CAPE' %} ENS_VAR{{ns.var_count}}_OPTIONS = cnt_thresh = [ >0 ]; diff --git a/parm/metplus/GridStat_ensmean.conf b/parm/metplus/GridStat_ensmean.conf index 21d23ac4eb..7c3b3b7ad9 100644 --- a/parm/metplus/GridStat_ensmean.conf +++ b/parm/metplus/GridStat_ensmean.conf @@ -174,8 +174,8 @@ following dictionary. 'ASNOW': [], 'REFC': [], 'RETOP': [], - 'ADPSFC': ['TCDC', 'VIS', 'HGT'], - 'ADPUPA': []} %} + 'SFC': ['TCDC', 'VIS', 'HGT'], + 'UPA': []} %} {%- set fields_fcst_to_exclude = fields_fcst_to_exclude_by_field_group[input_field_group] %} {#- @@ -383,7 +383,7 @@ Set forecast field options. {%- endif %} {%- set opts_indent = ' '*opts_indent_len %} - {%- if input_field_group == 'ADPUPA' %} + {%- if input_field_group == 'UPA' %} {%- if field_fcst == 'CAPE' %} FCST_VAR{{ns.var_count}}_OPTIONS = cnt_thresh = [ >0 ]; @@ -462,7 +462,7 @@ Set observation field options. OBS_VAR{{ns.var_count}}_OPTIONS = convert(x) = 100.0*x; {%- endif %} - {%- elif input_field_group == 'ADPUPA' %} + {%- elif input_field_group == 'UPA' %} {%- if field_obs == 'CAPE' %} OBS_VAR{{ns.var_count}}_OPTIONS = cnt_thresh = [ >0 ]; diff --git a/parm/metplus/GridStat_or_PointStat.conf b/parm/metplus/GridStat_or_PointStat.conf index 865f1c8d14..155b028291 100644 --- a/parm/metplus/GridStat_or_PointStat.conf +++ b/parm/metplus/GridStat_or_PointStat.conf @@ -500,7 +500,7 @@ FCST_VAR{{ns.var_count}}_OPTIONS = set_attr_lead = "{lead?fmt=%H%M%S}"; {{opts_indent}}cnt_logic = UNION; {%- endif %} - {%- elif (input_field_group == 'ADPSFC') %} + {%- elif (input_field_group == 'SFC') %} {%- if (field_fcst in ['WIND']) %} {{opts_indent}}GRIB2_pdt = 0; ;; Derive instantaneous 10-m wind from U/V components, overriding max 10-m wind. @@ -518,7 +518,7 @@ FCST_VAR{{ns.var_count}}_OPTIONS = set_attr_lead = "{lead?fmt=%H%M%S}"; {{opts_indent}}desc = "CEILING"; {%- endif %} - {%- elif (input_field_group == 'ADPUPA') %} + {%- elif (input_field_group == 'UPA') %} {%- if (field_fcst in ['HGT']) %} {%- if (levels_fcst[0] in ['L0']) %} @@ -599,7 +599,7 @@ OBS_VAR{{ns.var_count}}_OPTIONS = convert(x) = x * 3280.84 * 0.001; {{opts_indent}}cnt_logic = UNION; {%- endif %} - {%- elif (input_field_group == 'ADPSFC') %} + {%- elif (input_field_group == 'SFC') %} {%- if (field_obs in ['WIND']) %} OBS_VAR{{ns.var_count}}_OPTIONS = GRIB2_pdt = 0; ;; Derive instantaneous 10-m wind from U/V components, overriding max 10-m wind. @@ -612,7 +612,7 @@ OBS_VAR{{ns.var_count}}_OPTIONS = GRIB_lvl_typ = 215; {{opts_indent}}interp = { type = [ { method = NEAREST; width = 1; } ]; } {%- endif %} - {%- elif (input_field_group == 'ADPUPA') %} + {%- elif (input_field_group == 'UPA') %} {%- if (field_obs in ['CAPE', 'MLCAPE']) %} OBS_VAR{{ns.var_count}}_OPTIONS = cnt_thresh = [ >0 ]; diff --git a/parm/metplus/PointStat_ensmean.conf b/parm/metplus/PointStat_ensmean.conf index 8637a7501d..fc9ccec85b 100644 --- a/parm/metplus/PointStat_ensmean.conf +++ b/parm/metplus/PointStat_ensmean.conf @@ -238,8 +238,8 @@ following dictionary. 'ASNOW': [], 'REFC': [], 'RETOP': [], - 'ADPSFC': ['TCDC', 'VIS', 'HGT'], - 'ADPUPA': []} %} + 'SFC': ['TCDC', 'VIS', 'HGT'], + 'UPA': []} %} {%- set fields_fcst_to_exclude = fields_fcst_to_exclude_by_field_group[input_field_group] %} {#- @@ -419,7 +419,7 @@ Set forecast field options. {%- endif %} {%- set opts_indent = ' '*opts_indent_len %} - {%- if input_field_group == 'ADPUPA' %} + {%- if input_field_group == 'UPA' %} {%- if field_fcst == 'CAPE' %} FCST_VAR{{ns.var_count}}_OPTIONS = cnt_thresh = [ >0 ]; @@ -481,7 +481,7 @@ Set observation field options. {%- set opts_indent_len = opts_indent_len - 1 %} {%- set opts_indent = ' '*opts_indent_len %} - {%- if input_field_group == 'ADPUPA' %} + {%- if input_field_group == 'UPA' %} {%- if field_obs == 'CAPE' %} OBS_VAR{{ns.var_count}}_OPTIONS = cnt_thresh = [ >0 ]; diff --git a/parm/metplus/PointStat_ensprob.conf b/parm/metplus/PointStat_ensprob.conf index 885ba121be..42ac254a4b 100644 --- a/parm/metplus/PointStat_ensprob.conf +++ b/parm/metplus/PointStat_ensprob.conf @@ -355,7 +355,7 @@ Set forecast field options. {%- endif %} {%- set opts_indent = ' '*opts_indent_len %} - {%- if input_field_group == 'ADPSFC' %} + {%- if input_field_group == 'SFC' %} {%- if field_fcst == 'HGT' %} FCST_VAR{{ns.var_count}}_OPTIONS = desc = "CEILING"; @@ -400,7 +400,7 @@ Set observation field options. {%- set opts_indent_len = opts_indent_len - 1 %} {%- set opts_indent = ' '*opts_indent_len %} - {%- if input_field_group == 'ADPSFC' %} + {%- if input_field_group == 'SFC' %} {%- if field_obs == 'CEILING' %} OBS_VAR{{ns.var_count}}_OPTIONS = GRIB_lvl_typ = 215; @@ -409,7 +409,7 @@ OBS_VAR{{ns.var_count}}_OPTIONS = GRIB_lvl_typ = 215; OBS_VAR{{ns.var_count}}_OPTIONS = interp = { type = [ { method = NEAREST; width = 1; } ]; } {%- endif %} - {%- elif input_field_group == 'ADPUPA' %} + {%- elif input_field_group == 'UPA' %} {%- if field_obs == 'CAPE' %} OBS_VAR{{ns.var_count}}_OPTIONS = cnt_thresh = [ >0 ]; From dc91a690d1f1f83e5ccb78218e7bf998bdf04c40 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 3 Nov 2024 17:51:21 -0700 Subject: [PATCH 188/260] Bug fix in vx configuration files to account for the field group names 'ADPSFC' and 'ADPUPA' being changed to 'SFC' and 'UPA'. --- .../metplus/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml | 2 +- parm/metplus/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml | 2 +- parm/metplus/vx_configs/vx_config_det.yaml | 4 ++-- parm/metplus/vx_configs/vx_config_ens.yaml | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/parm/metplus/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml b/parm/metplus/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml index 11bcb2e568..81425cc1a1 100644 --- a/parm/metplus/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml +++ b/parm/metplus/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml @@ -41,7 +41,7 @@ # it means the name of the field in the forecast data is RETOP while its # name in the observations is EchoTop18. # -ADPSFC: +SFC: TMP: Z2: [] UGRD: diff --git a/parm/metplus/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml b/parm/metplus/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml index 9b8e25ff59..dde2dd3302 100644 --- a/parm/metplus/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml +++ b/parm/metplus/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml @@ -41,7 +41,7 @@ # it means the name of the field in the forecast data is RETOP while its # name in the observations is EchoTop18. # -ADPSFC: +SFC: TMP: Z2: [] UGRD: diff --git a/parm/metplus/vx_configs/vx_config_det.yaml b/parm/metplus/vx_configs/vx_config_det.yaml index c20e192dcb..48b8aff97b 100644 --- a/parm/metplus/vx_configs/vx_config_det.yaml +++ b/parm/metplus/vx_configs/vx_config_det.yaml @@ -57,7 +57,7 @@ REFC: RETOP: RETOP%%EchoTop18: L0%%Z500: ['ge20', 'ge30', 'ge40', 'ge50'] -ADPSFC: +SFC: TMP: Z2: [] DPT: @@ -90,7 +90,7 @@ ADPSFC: L0%%Z0: ['ge1.0%%ge164&&le166'] CICEP%%PRWE: L0%%Z0: ['ge1.0%%ge174&&le176'] -ADPUPA: +UPA: TMP: P1000: &adpupa_tmp_threshes [] diff --git a/parm/metplus/vx_configs/vx_config_ens.yaml b/parm/metplus/vx_configs/vx_config_ens.yaml index 2608490565..4eb1524648 100644 --- a/parm/metplus/vx_configs/vx_config_ens.yaml +++ b/parm/metplus/vx_configs/vx_config_ens.yaml @@ -21,7 +21,7 @@ REFC: RETOP: RETOP%%EchoTop18: L0%%Z500: ['ge20', 'ge30', 'ge40', 'ge50'] -ADPSFC: +SFC: TMP: Z2: ['ge268', 'ge273', 'ge278', 'ge293', 'ge298', 'ge303'] DPT: @@ -34,7 +34,7 @@ ADPSFC: L0: ['lt1609', 'lt8045', 'ge8045'] HGT%%CEILING: L0: ['lt152', 'lt305', 'lt914'] -ADPUPA: +UPA: TMP: P850: ['ge288', 'ge293', 'ge298'] P700: ['ge273', 'ge278', 'ge283'] From 8eb38f240635e85366260cd913fa64b5296df850 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 3 Nov 2024 17:52:18 -0700 Subject: [PATCH 189/260] Fix typo. --- scripts/exregional_run_met_gridstat_or_pointstat_vx.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index 654983c70e..84db54ea6f 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -69,7 +69,7 @@ Entering script: \"${scrfunc_fn}\" In directory: \"${scrfunc_dir}\" This is the ex-script for the task that runs the METplus ${MetplusToolName} -tool to perform deterministic verification of the specified field gropup +tool to perform deterministic verification of the specified field group (FIELD_GROUP) for a single forecast. ========================================================================" # From 8caae0a49aa52d1db6a2292d11faa314ae62a04d Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 3 Nov 2024 17:53:40 -0700 Subject: [PATCH 190/260] Bug fix in ex-script and an auxiliary bash script to account for the field group names 'ADPSFC' and 'ADPUPA' being changed to 'SFC' and 'UPA'. --- scripts/exregional_run_met_pb2nc_obs.sh | 2 +- ush/set_vx_params.sh | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/exregional_run_met_pb2nc_obs.sh b/scripts/exregional_run_met_pb2nc_obs.sh index d1d055fe66..63ea5ca760 100755 --- a/scripts/exregional_run_met_pb2nc_obs.sh +++ b/scripts/exregional_run_met_pb2nc_obs.sh @@ -118,7 +118,7 @@ FIELDNAME_IN_MET_FILEDIR_NAMES="" set_vx_params \ obtype="${OBTYPE}" \ - field_group="ADP${FIELD_GROUP}" \ + field_group="${FIELD_GROUP}" \ accum_hh="${ACCUM_HH}" \ outvarname_grid_or_point="grid_or_point" \ outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ diff --git a/ush/set_vx_params.sh b/ush/set_vx_params.sh index e17a360c38..993e45ac67 100644 --- a/ush/set_vx_params.sh +++ b/ush/set_vx_params.sh @@ -221,18 +221,18 @@ this observation type (obtype) and field group (field_group) combination: _grid_or_point_="point" case "${field_group}" in - "ADPSFC") + "SFC") fieldname_in_obs_input="" fieldname_in_fcst_input="" - fieldname_in_MET_output="${field_group}" - fieldname_in_MET_filedir_names="${field_group}" + fieldname_in_MET_output="ADP${field_group}" + fieldname_in_MET_filedir_names="ADP${field_group}" ;; - "ADPUPA") + "UPA") fieldname_in_obs_input="" fieldname_in_fcst_input="" - fieldname_in_MET_output="${field_group}" - fieldname_in_MET_filedir_names="${field_group}" + fieldname_in_MET_output="ADP${field_group}" + fieldname_in_MET_filedir_names="ADP${field_group}" ;; *) From f0ccb7829e9fdb6b31808d18f0229d2676942981 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 3 Nov 2024 18:38:54 -0700 Subject: [PATCH 191/260] Fix typo. --- scripts/exregional_run_met_gridstat_or_pointstat_vx.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index 84db54ea6f..0531d21755 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -282,7 +282,7 @@ export LOGDIR # if [ -z "${VX_LEADHR_LIST}" ]; then print_err_msg_exit "\ -The list of forecast hours for which to run METplus is empty: +The list of lead hours for which to run METplus is empty: VX_LEADHR_LIST = [${VX_LEADHR_LIST}]" fi # From f1bd90a2da4a415b2f8634874dcd27f09463ea2a Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Sun, 3 Nov 2024 18:39:52 -0700 Subject: [PATCH 192/260] Bug fix: For ensemble vx tasks, add omitted cycle dates to the end of the names of the METplus log files. --- scripts/exregional_run_met_genensprod_or_ensemblestat.sh | 2 +- .../exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh | 4 ++-- .../exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index d02adddf77..a7ec52ad6a 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -307,7 +307,7 @@ fi # metplus_config_tmpl_bn="${MetplusToolName}" metplus_config_bn="${MetplusToolName}_${FIELDNAME_IN_MET_FILEDIR_NAMES}" -metplus_log_bn="${metplus_config_bn}" +metplus_log_bn="${metplus_config_bn}_$CDATE" # # Add prefixes and suffixes (extensions) to the base file names. # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index 424756d72b..5ecc588316 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -249,8 +249,8 @@ fi # First, set the base file names. # metplus_config_tmpl_bn="${MetplusToolName}_ensmean" -metplus_config_bn="${MetplusToolName}_ensmean_${FIELDNAME_IN_MET_FILEDIR_NAMES}" -metplus_log_bn="${metplus_config_bn}" +metplus_config_bn="${MetplusToolName}_${FIELDNAME_IN_MET_FILEDIR_NAMES}_ensmean" +metplus_log_bn="${metplus_config_bn}_$CDATE" # # Add prefixes and suffixes (extensions) to the base file names. # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index e6ad107e81..c7693fe06c 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -249,8 +249,8 @@ fi # First, set the base file names. # metplus_config_tmpl_bn="${MetplusToolName}_ensprob" -metplus_config_bn="${MetplusToolName}_ensprob_${FIELDNAME_IN_MET_FILEDIR_NAMES}" -metplus_log_bn="${metplus_config_bn}" +metplus_config_bn="${MetplusToolName}_${FIELDNAME_IN_MET_FILEDIR_NAMES}_ensprob" +metplus_log_bn="${metplus_config_bn}_$CDATE" # # Add prefixes and suffixes (extensions) to the base file names. # From ddf1a552ee6a35ff602c744f6b9c43df1a85ca13 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 4 Nov 2024 04:03:32 -0700 Subject: [PATCH 193/260] Modify ensemble vx task and metatask names to match those for deterministic vx. Make corresponding changes in the docs. --- .../BuildingRunningTesting/RunSRW.rst | 26 +++++++++---------- parm/wflow/verify_ens.yaml | 22 ++++++++-------- ush/setup.py | 8 +++--- 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index fb8261697f..4689b94308 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -839,11 +839,11 @@ about metatasks. .. _VXWorkflowTasksTable: -.. list-table:: Verification (VX) Workflow Tasks and Metatasks in the SRW App +.. list-table:: Default Verification (VX) Workflow Tasks and Metatasks in the SRW App :widths: 5 95 :header-rows: 1 - * - Workflow Task (``taskgroup``) + * - Workflow (Meta)Task (``taskgroup``) - Task Description * - :bolditalic:`task_get_obs_ccpa` (``verify_pre.yaml``) @@ -1004,41 +1004,41 @@ about metatasks. is included in ``VX_FIELD_GROUPS``, and the ones for ``UPA`` are included only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'UPA'`` is included in ``VX_FIELD_GROUPS``. - * - :bolditalic:`metatask_GridStat_ensmeanprob_APCP_all_accums` (``verify_ens.yaml``) + * - :bolditalic:`metatask_GridStat_APCP_all_accums_ensmeanprob` (``verify_ens.yaml``) - Set of tasks that run grid-to-grid verification of the ensemble mean of APCP and grid-to-grid probabilistic verification of the ensemble of APCP forecasts as a whole. In rocoto, the tasks under this metatask for - ensemble mean verification are named ``run_MET_GridStat_vx_ensmean_APCP{accum_intvl}h``, and the ones for - ensemble probabilistic verification are named ``run_MET_GridStat_vx_ensprob_APCP{accum_intvl}h``, where + ensemble mean verification are named ``run_MET_GridStat_vx_APCP{accum_intvl}h_ensmean``, and the ones for + ensemble probabilistic verification are named ``run_MET_GridStat_vx_APCP{accum_intvl}h_ensprob``, where ``{accum_intvl}`` is the accumulation interval (in hours, e.g. ``01``, ``03``, ``06``, etc) for which the tasks are being run. This metatask is included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'APCP'`` is included in ``VX_FIELD_GROUPS``. - * - :bolditalic:`metatask_GridStat_ensmeanprob_ASNOW_all_accums` (``verify_ens.yaml``) + * - :bolditalic:`metatask_GridStat_ASNOW_all_accums_ensmeanprob` (``verify_ens.yaml``) - Set of tasks that run grid-to-grid verification of the ensemble mean of ASNOW and grid-to-grid probabilistic verification of the ensemble of ASNOW forecasts as a whole. In rocoto, the tasks under this metatask for - ensemble mean verification are named ``run_MET_GridStat_vx_ensmean_ASNOW{accum_intvl}h``, and the ones for - ensemble probabilistic verification are named ``run_MET_GridStat_vx_ensprob_ASNOW{accum_intvl}h``, where + ensemble mean verification are named ``run_MET_GridStat_vx_ASNOW{accum_intvl}h_ensmean``, and the ones for + ensemble probabilistic verification are named ``run_MET_GridStat_vx_ASNOW{accum_intvl}h_ensprob``, where ``{accum_intvl}`` is the accumulation interval (in hours, e.g. ``01``, ``03``, ``06``, etc) for which the tasks are being run. These tasks will be included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'ASNOW'`` is included in ``VX_FIELD_GROUPS``. - * - :bolditalic:`metatask_GridStat_ensprob_REFC_RETOP` (``verify_ens.yaml``) + * - :bolditalic:`metatask_GridStat_REFC_RETOP_ensprob` (``verify_ens.yaml``) - Set of tasks that run grid-to-grid probabilistic verification of the ensemble of :term:`composite reflectivity` (represented by the verification field group ``REFC``) and :term:`echo top` (represented by the field group ``RETOP``). (Note that there is no grid-to-grid verification of the ensemble mean of these quantities.) - In rocoto, the tasks under this metatask are named ``run_MET_GridStat_vx_ensprob_{field_group}``, where + In rocoto, the tasks under this metatask are named ``run_MET_GridStat_vx_{field_group}_ensprob``, where ``{field_group}`` is the field group (in this case either ``REFC`` or ``RETOP``) for which the task is being run. The task for ``REFC`` is included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'REFC'`` is included in ``VX_FIELD_GROUPS``, and the one for ``RETOP`` is included only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'RETOP'`` is included in ``VX_FIELD_GROUPS``. - * - :bolditalic:`metatask_PointStat_ensmeanprob_SFC_UPA` (``verify_ens.yaml``) + * - :bolditalic:`metatask_PointStat_SFC_UPA_ensmeanprob` (``verify_ens.yaml``) - Set of tasks that run grid-to-grid verification of the ensemble mean of surface fields (represented by the verification field group ``SFC``) and upper-air fields (represented by the verification field group ``UPA``) as well as grid-to-grid probabilistic verification of the ensemble of the surface and upper-air field forecasts as a whole. In rocoto, the tasks under this metatask for ensemble mean verification are named - ``run_MET_PointStat_vx_ensmean_{field_group}``, and the ones for ensemble probabilistic verification are - named ``run_MET_PointStat_vx_ensprob_{field_group}``, where ``{field_group}`` is the field group (in this + ``run_MET_PointStat_vx_{field_group}_ensmean``, and the ones for ensemble probabilistic verification are + named ``run_MET_PointStat_vx_{field_group}_ensprob``, where ``{field_group}`` is the field group (in this case either ``SFC`` or ``UPA``) on which the task is being run. The tasks for ``SFC`` are included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'SFC'`` is included in ``VX_FIELD_GROUPS``, and the ones for ``UPA`` are included only if ``DO_ENSEMBLE`` is set to ``True`` in diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index 07444cf757..eb13e97e1c 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -192,14 +192,14 @@ metatask_GenEnsProd_EnsembleStat_SFC_UPA: attrs: task: run_MET_GenEnsProd_vx_#FIELD_GROUP# -metatask_GridStat_ensmeanprob_APCP_all_accums: +metatask_GridStat_APCP_all_accums_ensmeanprob: var: stat: MEAN PROB statlc: mean prob - metatask_GridStat_ens#statlc#_APCP_all_accums: + metatask_GridStat_APCP_all_accums_ens#statlc#: var: ACCUM_HH: '{% for ah in verification.VX_APCP_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' - task_run_MET_GridStat_vx_ens#statlc#_APCP#ACCUM_HH#h: + task_run_MET_GridStat_vx_APCP#ACCUM_HH#h_ens#statlc#: <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX_ENS#stat#"' envars: @@ -219,14 +219,14 @@ metatask_GridStat_ensmeanprob_APCP_all_accums: attrs: task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h -metatask_GridStat_ensmeanprob_ASNOW_all_accums: +metatask_GridStat_ASNOW_all_accums_ensmeanprob: var: stat: MEAN PROB statlc: mean prob - metatask_GridStat_ens#statlc#_ASNOW_all_accums: + metatask_GridStat_ASNOW_all_accums_ens#statlc#: var: ACCUM_HH: '{% for ah in verification.VX_ASNOW_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' - task_run_MET_GridStat_vx_ens#statlc#_ASNOW#ACCUM_HH#h: + task_run_MET_GridStat_vx_ASNOW#ACCUM_HH#h_ens#statlc#: <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX_ENS#stat#"' envars: @@ -246,10 +246,10 @@ metatask_GridStat_ensmeanprob_ASNOW_all_accums: attrs: task: run_MET_GenEnsProd_vx_ASNOW#ACCUM_HH#h -metatask_GridStat_ensprob_REFC_RETOP: +metatask_GridStat_REFC_RETOP_ensprob: var: FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' - task_run_MET_GridStat_vx_ensprob_#FIELD_GROUP#: + task_run_MET_GridStat_vx_#FIELD_GROUP#_ensprob: <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX_ENSPROB"' envars: @@ -269,14 +269,14 @@ metatask_GridStat_ensprob_REFC_RETOP: attrs: task: run_MET_GenEnsProd_vx_#FIELD_GROUP# -metatask_PointStat_ensmeanprob_SFC_UPA: +metatask_PointStat_SFC_UPA_ensmeanprob: var: stat: MEAN PROB statlc: mean prob - metatask_PointStat_ens#statlc#_SFC_UPA: + metatask_PointStat_SFC_UPA_ens#statlc#: var: FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["SFC", "UPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' - task_run_MET_PointStat_vx_ens#statlc#_#FIELD_GROUP#: + task_run_MET_PointStat_vx_#FIELD_GROUP#_ens#statlc#: <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX_ENS#stat#"' envars: diff --git a/ush/setup.py b/ush/setup.py index e19864cee6..85e0cedca1 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -741,7 +741,7 @@ def _remove_tag(tasks, tag): "metatask_PcpCombine_APCP_all_accums_all_mems", "metatask_GridStat_APCP_all_accums_all_mems", "metatask_GenEnsProd_EnsembleStat_APCP_all_accums", - "metatask_GridStat_ensmeanprob_APCP_all_accums"] + "metatask_GridStat_APCP_all_accums_ensmeanprob"] vx_field_groups_all_by_obtype["NOHRSC"] = ["ASNOW"] vx_metatasks_all_by_obtype["NOHRSC"] \ @@ -750,14 +750,14 @@ def _remove_tag(tasks, tag): "metatask_PcpCombine_ASNOW_all_accums_all_mems", "metatask_GridStat_ASNOW_all_accums_all_mems", "metatask_GenEnsProd_EnsembleStat_ASNOW_all_accums", - "metatask_GridStat_ensmeanprob_ASNOW_all_accums"] + "metatask_GridStat_ASNOW_all_accums_ensmeanprob"] vx_field_groups_all_by_obtype["MRMS"] = ["REFC", "RETOP"] vx_metatasks_all_by_obtype["MRMS"] \ = ["task_get_obs_mrms", "metatask_GridStat_REFC_RETOP_all_mems", "metatask_GenEnsProd_EnsembleStat_REFC_RETOP", - "metatask_GridStat_ensprob_REFC_RETOP"] + "metatask_GridStat_REFC_RETOP_ensprob"] vx_field_groups_all_by_obtype["NDAS"] = ["SFC", "UPA"] vx_metatasks_all_by_obtype["NDAS"] \ @@ -765,7 +765,7 @@ def _remove_tag(tasks, tag): "task_run_MET_Pb2nc_obs_NDAS", "metatask_PointStat_SFC_UPA_all_mems", "metatask_GenEnsProd_EnsembleStat_SFC_UPA", - "metatask_PointStat_ensmeanprob_SFC_UPA"] + "metatask_PointStat_SFC_UPA_ensmeanprob"] # If there are no field groups specified for verification, remove those # tasks that are common to all observation types. From 9dcec551ff42e7db51fc25ae975d555e4aab84e2 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 4 Nov 2024 05:40:46 -0700 Subject: [PATCH 194/260] Minor fixes to code comments. --- ush/config_defaults.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 10bf82c0a4..8c406040ab 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2336,11 +2336,11 @@ global: PRINT_DIFF_PGR: false #---------------------------- -# verification (vx) parameters +# Verification (VX) parameters #----------------------------- verification: # - # General Verification Parameters + # General VX Parameters # ------------------------------- # # VX_FIELD_GROUPS: @@ -2397,7 +2397,7 @@ verification: # METPLUS_VERBOSITY_LEVEL: 2 # - # Observation-Specific Parameters + # VX Parameters for Observations # ------------------------------- # # Note: @@ -2601,7 +2601,7 @@ verification: # NUM_MISSING_OBS_FILES_MAX: 2 # - # Forecast-Specific Parameters + # VX Parameters for Forecasts # ---------------------------- # # VX_FCST_MODEL_NAME: From 81bd5b31a7c9c939937b257d7290a2814b606029 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 4 Nov 2024 05:41:35 -0700 Subject: [PATCH 195/260] Bug fixe for ensemble vx of ASNOW related to (meta)task name changes. --- parm/wflow/verify_ens.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index eb13e97e1c..45cf5385be 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -74,7 +74,7 @@ metatask_GenEnsProd_EnsembleStat_ASNOW_all_accums: and: metataskdep_pcpcombine_fcst: attrs: - metatask: PcpCombine_ASNOW#ACCUM_HH#h_fcst_all_mems + metatask: PcpCombine_ASNOW#ACCUM_HH#h_all_mems task_run_MET_EnsembleStat_vx_ASNOW#ACCUM_HH#h: <<: *task_GenEnsProd_NOHRSC envars: From 46af6f7277265796440757ba94344ce7edfe39cf Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 4 Nov 2024 08:33:34 -0700 Subject: [PATCH 196/260] Updates to docs. --- .../BuildingRunningTesting/RunSRW.rst | 109 +++++++++++++----- 1 file changed, 81 insertions(+), 28 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 4689b94308..7ee105dc61 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -641,12 +641,21 @@ To use METplus verification, MET and METplus modules need to be installed. To t tasks: taskgroups: '{{ ["parm/wflow/prep.yaml", "parm/wflow/coldstart.yaml", "parm/wflow/post.yaml", "parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' -:numref:`Table %s ` indicates which verification capabilities/workflow tasks each ``verify_*.yaml`` file enables. -Users must add ``verify_pre.yaml`` anytime they want to run verification (VX); it runs preprocessing tasks that are necessary -for both deterministic and ensemble VX, including retrieval of obs files from various data stores (e.g. NOAA's HPSS) if those -files do not already exist on disk at the locations specified by some of the parameters in the ``verification:`` section of -``config_defaults.yaml`` and/or ``config.yaml`` (see ?? for details). -Then users can add ``verify_det.yaml`` for deterministic VX or ``verify_ens.yaml`` for ensemble VX (or both). Note that ensemble VX requires the user to be running an ensemble forecast or to stage ensemble forecast files in an appropriate location. +:numref:`Table %s ` indicates which workflow (meta)tasks each ``verify_*.yaml`` file enables. +Users must include ``verify_pre.yaml`` anytime they want to run verification (VX) because this contains +preprocessing tasks that are necessary for both deterministic and ensemble VX, including retrieval of +obs files from various data stores (e.g. NOAA HPSS) if those files do not already exist on disk (the +files must exist at the locations specified by the variables ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1]`` +in the ``verification:`` section of ``config.yaml``; see discussion below for details). +Then users can add ``verify_det.yaml`` for deterministic VX, ``verify_ens.yaml`` for ensemble VX, +or both if they want to run ensemble VX on an ensemble forecast but also run deterministic VX on +each ensemble member. + +Note that ensemble VX requires the user to either run an ensemble forecast with the SRW App or to stage +ensemble forecast files (at the locations specified by the variables ``VX_FCST_INPUT_BASEDIR``, +``FCST_SUBDIR_TEMPLATE``, and ``FCST_FN_TEMPLATE`` in the ``verification:`` section of ``config.yaml``). +In either case, ``DO_ENSEMBLE`` in ``config.yaml`` must be set to ``True``. + .. _VX-yamls: @@ -659,11 +668,16 @@ Then users can add ``verify_det.yaml`` for deterministic VX or ``verify_ens.yaml * - verify_pre.yaml - Enables (meta)tasks that are prerequisites for both deterministic and ensemble verification (vx) * - verify_det.yaml - - Enables (meta)tasks that perform deterministic vx on a single forecast or on each member of an ensemble forecast + - Enables (meta)tasks that perform deterministic VX on a single forecast or on each member of an ensemble forecast * - verify_ens.yaml - - Enables (meta)tasks that perform ensemble vx on an ensemble of forecasts as a whole (must set ``DO_ENSEMBLE: true`` in ``config.yaml``) + - Enables (meta)tasks that perform ensemble VX on an ensemble of forecasts as a whole (requires ``DO_ENSEMBLE`` + to be set to ``True`` in ``config.yaml``) -The ``verify_*.yaml`` files include the definitions of several common verification tasks by default. Individual verification tasks appear in :numref:`Table %s `. The tasks in the ``verify_*.yaml`` files are independent of each other, so users may want to turn some off depending on the needs of their experiment. To turn off a task, simply include its entry from ``verify_*.yaml`` as an empty YAML entry in ``config.yaml``. For example, to turn off PointStat tasks: +The ``verify_*.yaml`` files include by default the definitions of several common verification tasks and metatasks. +These default verification (meta)tasks are described in :numref:`Table %s `. The tasks in the +``verify_*.yaml`` files are independent of each other, so users may want to turn some off depending on the needs of +their experiment. To turn off a task, simply include its entry from ``verify_*.yaml`` as an empty YAML entry in +``config.yaml``. For example, to turn off PointStat tasks: .. code-block:: console @@ -676,22 +690,23 @@ The ``verify_*.yaml`` files include the definitions of several common verificati More information about configuring the ``rocoto:`` section can be found in :numref:`Section %s `. -If users have access to NOAA :term:`HPSS` but have not pre-staged the obs data, the default ``verify_pre.yaml`` -taskgroup will activate a set of ``get_obs_...`` workflow tasks that will attempt to retrieve the required +If users have access to NOAA :term:`HPSS` but have not pre-staged the obs data, the taskgroup in ``verify_pre.yaml`` +will by default activate a set of ``get_obs_...`` workflow tasks that will attempt to retrieve the required files from a data store such as NOAA HPSS. In this case, the variables ``*_OBS_DIR`` in ``config.yaml`` must be set to the base directories under which users want the files to reside, and the variables ``OBS_*_FN_TEMPLATES[1]`` must be set to METplus file name templates (possibly including leading subdirectories relative to ``*_OBS_DIR``) that will be used to name the obs files. (Here, the ``*`` represents any one of the obs types :term:`CCPA`, -:term:`NOHRSC`, :term:`MRMS`, and :term:`NDAS`.) +:term:`NOHRSC`, :term:`MRMS`, and :term:`NDAS`, and the ``[1]`` in ``OBS_*_FN_TEMPLATES[1]`` refers to the second +element of ``OBS_*_FN_TEMPLATES``; the first element should not be changed). Users who do not have access to NOAA HPSS and do not have the data on their system will need to download -:term:`CCPA`, :term:`MRMS`, and :term:`NDAS` data manually from collections of publicly available data, -such as the ones listed `here `__. +:term:`CCPA`, :term:`NOHRSC`, :term:`MRMS`, and/or :term:`NDAS` data manually from collections of publicly +available data, such as the ones listed `here `__. Users who have already staged the observation data needed for verification on their system (i.e., the :term:`CCPA`, :term:`NOHRSC`, :term:`MRMS`, and/or :term:`NDAS` data) should set ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1]`` in ``config.yaml`` to match those staging locations and -file names For example, for a case in which all four types of obs are needed for vx, these variables +file names. For example, for a case in which all four types of obs are needed for VX, these variables might be set as follows: .. code-block:: console @@ -709,20 +724,59 @@ might be set as follows: 'RETOP', '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' ] OBS_NDAS_FN_TEMPLATES: [ 'SFC_UPA', 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' ] -If one of the days encompassed by the experiment was 20240429, and if one of the hours during -that day at which vx will be performed was 03, then, taking the CCPA obs type as an example, +If one of the days encompassed by the experiment is 20240429, and if one of the hours during +that day at which VX will be performed is 03, then, taking the CCPA obs type as an example, one of the ``get_obs_ccpa_...`` tasks in the workflow will look for a CCPA file on disk corresponding to this day and hour at ``/path/to/UFS_SRW_data/develop/obs_data/ccpa/20240429/ccpa.t03z.01h.hrap.conus.gb2`` -As described above, if this file does not exist, it will try to retrieve it from a data store -and place it at this location. +As described above, if this file does not exist, the ``get_obs`` task will try to retrieve it +from a data store and place it at this location. After adding the VX tasks to the ``rocoto:`` section and the data paths to the ``verification:`` section, users can proceed to generate the experiment, which will perform VX tasks in addition to the default workflow tasks. + +Note that inclusion of the ``verify_*.yaml`` files under the ``rocoto: tasks: taskgroups:`` section of +``config.yaml`` does not mean all the (eta)tasks in those files will necessarily be included in the workflow. +This is because the VX tasks are grouped into field groups, and only those (meta)tasks in ``verify_*.yaml`` +associated with field groups that are included in the list ``VX_FIELD_GROUPS`` in ``config.yaml`` +are included in the worklow. +Each field group represents one or more meteorologial fields that can be verified. The valid field +groups and their descriptions are given in :numref:`Table %s `. +Thus, setting + +.. code-block:: console + + VX_FIELD_GROUPS: [ 'APCP', 'REFC', 'RETOP', 'SFC', 'UPA' ] + +will run the VX (meta)tasks for all field groups except accumulated snowfall. + + +.. _VXFieldGroupDescsTable: + +.. list-table:: Valid Verification Field Groups and Descriptions + :widths: 20 50 + :header-rows: 1 + + * - Field Group + - Description + * - APCP + - Accumulated precipitation for the accumulation intervals specified in ``VX_APCP_ACCUMS_HRS`` + * - ASNOW + - Accumulated snowfall for the accumulation intervals specified in ``VX_APCP_ACCUMS_HRS`` + * - REFC + - Composite reflectivity + * - RETOP + - Echo top + * - SFC + - Surface fields + * - UPA + - Upper-air fields + + .. _GenerateWorkflow: Generate the SRW App Workflow @@ -827,15 +881,14 @@ In addition to the baseline tasks described in :numref:`Table %s ` below. -The ``taskgroup`` entry after the name of each task or metatask indicates the taskgroup file that must be -included in the user's ``config.yaml`` file under ``rocoto: tasks: taskgroups:`` in order for that task or -metatask to be considered for inclusion in the workflow (see :numref:`Section %s ` for more -details). Metatasks define a set of tasks in the workflow based on multiple values of one or more parameters -such as the ensemble member index, the accumulation interval (for cumulative fields such as accumulated -precipitation), and the name of the verificaiton field group (see description of ``VX_FIELD_GROUPS`` in -:numref:`Section %s `). See :numref:`Section %s ` for more details -about metatasks. +The METplus verification tasks and metatasks that are included by default in ``verify_*.yaml`` are described +in :numref:`Table %s `. The ``taskgroup`` entry after the name of each (meta)task indicates +the taskgroup file that must be included in the user's ``config.yaml`` file under ``rocoto: tasks: taskgroups:`` +in order for that (meta)task to be considered for inclusion in the workflow (see :numref:`Section %s ` +for details). As described in :numref:`Section %s `, metatasks define a set of tasks in the +workflow based on multiple values of one or more parameters such as the ensemble member index, the accumulation +interval (for cumulative fields such as accumulated precipitation), and the name of the verificaiton field group +(see description of ``VX_FIELD_GROUPS`` in :numref:`Section %s `). .. _VXWorkflowTasksTable: From 4484bc0172848b78f08b32c509c00b6ab2d1ada0 Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Mon, 4 Nov 2024 11:02:36 -0700 Subject: [PATCH 197/260] Get minor changes from the feature/daily_obs_tasks_doc_mods branch. --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 2 +- doc/UsersGuide/Reference/Glossary.rst | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 7ee105dc61..72cf85f494 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -622,7 +622,7 @@ The output files (in ``.png`` format) will be located in the ``postprd`` directo Configure METplus Verification Suite (Optional) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Users who want to use the METplus verification suite to evaluate their forecasts need to add additional information to their machine file (``ush/machine/.yaml``) [what would need to change in the machine file?] or their ``config.yaml`` file. Other users may skip to the next step (:numref:`Section %s: Generate the SRW App Workflow `). +Users who want to use the METplus verification suite to evaluate their forecasts need to add additional information to their machine file (``ush/machine/.yaml``) or their ``config.yaml`` file. Other users may skip to the next step (:numref:`Section %s: Generate the SRW App Workflow `). .. note:: If METplus users update their METplus installation, they must update the module load statements in ``ufs-srweather-app/modulefiles/tasks//run_vx.local`` to correspond to their system's updated installation: diff --git a/doc/UsersGuide/Reference/Glossary.rst b/doc/UsersGuide/Reference/Glossary.rst index 7ffc569b21..7b8489f82d 100644 --- a/doc/UsersGuide/Reference/Glossary.rst +++ b/doc/UsersGuide/Reference/Glossary.rst @@ -26,7 +26,7 @@ Glossary chgres_cube The preprocessing software used to create initial and boundary condition files to - “cold start” the forecast model. It is part of :term:`UFS_UTILS`. + "cold start" the forecast model. It is part of :term:`UFS_UTILS`. CIN Convective Inhibition. @@ -87,7 +87,7 @@ Glossary Extended Schmidt Gnomonic (ESG) grid. The ESG grid uses the map projection developed by Jim Purser of NOAA :term:`EMC` (:cite:t:`Purser_2020`). ESMF - `Earth System Modeling Framework `__. The ESMF defines itself as “a suite of software tools for developing high-performance, multi-component Earth science modeling applications.” + `Earth System Modeling Framework `__. The ESMF defines itself as "a suite of software tools for developing high-performance, multi-component Earth science modeling applications." ex-scripts Scripting layer (contained in ``ufs-srweather-app/scripts/``) that should be called by a :term:`J-job ` for each workflow componentto run a specific task or sub-task in the workflow. The different scripting layers are described in detail in the :nco:`NCO Implementation Standards document ` @@ -215,7 +215,7 @@ Glossary The branch of physical geography dealing with mountains. Parameterizations - Simplified functions that approximate the effects of small-scale processes (e.g., microphysics, gravity wave drag) that cannot be explicitly resolved by a model grid’s representation of the earth. + Simplified functions that approximate the effects of small-scale processes (e.g., microphysics, gravity wave drag) that cannot be explicitly resolved by a model grid's representation of the earth. RAP `Rapid Refresh `__. The continental-scale NOAA hourly-updated assimilation/modeling system operational at :term:`NCEP`. RAP covers North America and is comprised primarily of a numerical forecast model and an analysis/assimilation system to initialize that model. RAP is complemented by the higher-resolution 3km High-Resolution Rapid Refresh (:term:`HRRR`) model. From 3ddc95c72bdce4acfd6c8dc6842a0f4203adbb4e Mon Sep 17 00:00:00 2001 From: gerard ketefian Date: Thu, 7 Nov 2024 08:41:05 -0700 Subject: [PATCH 198/260] Update documentation on how to run the verification capabilities in the SRW. --- .../BuildingRunningTesting/RunSRW.rst | 371 +++++++++++++----- .../CustomizingTheWorkflow/ConfigWorkflow.rst | 3 + 2 files changed, 278 insertions(+), 96 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 72cf85f494..c27332ae38 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -617,15 +617,21 @@ The output files (in ``.png`` format) will be located in the ``postprd`` directo * To configure an experiment to run METplus verification tasks, see the :ref:`next section `. * Otherwise, skip to :numref:`Section %s ` to generate the workflow. + .. _VXConfig: Configure METplus Verification Suite (Optional) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Users who want to use the METplus verification suite to evaluate their forecasts need to add additional information to their machine file (``ush/machine/.yaml``) or their ``config.yaml`` file. Other users may skip to the next step (:numref:`Section %s: Generate the SRW App Workflow `). +Users who want to use the METplus verification (VX) suite to evaluate their forecasts or to evaluate +a staged forecast (e.g. from another forecasting system) need to add additional information to their +machine file (``ush/machine/.yaml``) or their ``config.yaml`` file. Other users may skip +to the next step (:numref:`Section %s: Generate the SRW App Workflow `). + +To use METplus verification, MET and METplus modules need to be installed on the system. .. note:: - If METplus users update their METplus installation, they must update the module load statements in ``ufs-srweather-app/modulefiles/tasks//run_vx.local`` to correspond to their system's updated installation: + If users update their METplus installation, they must also update the module load statements in ``ufs-srweather-app/modulefiles/tasks//run_vx.local`` to correspond to their system's updated installation: .. code-block:: console @@ -633,81 +639,228 @@ Users who want to use the METplus verification suite to evaluate their forecasts module load met/ module load metplus/ -To use METplus verification, MET and METplus modules need to be installed. To turn on verification tasks in the workflow, include the ``parm/wflow/verify_*.yaml`` file(s) in the ``rocoto: tasks: taskgroups:`` section of ``config.yaml``. For example: + +Background +`````````````` +Whether generated by the SRW App or another forecasting system, a forecasting experiment consists +of one or more forecast periods known as cycles. If there is one forecast per cycle, the experiment +is referred to briefly as a deterministic forecast, and if there are multiple, it is referred to as +an ensemble forecast. Verification of a deterministic forecast is known (unsurprisingly) as +deterministic VX, while verification of an ensemble forecast as a whole is known as ensemble VX. +It is also possible to consider each member of an ensemble separately and verify each such member +deterministically. + +The SRW App allows users to include in the Rocoto XML that defines the workflow various tasks that +perform deterministic and/or ensemble VX. The forecast files to be verified may be generated as part +of the SRW experiment that is performing the verification, or they may be pre-generated files that +are staged somewhere on disk. In the latter case, the forecast files may have been generated from a +previous SRW experiment, or they may have been generated from another forecasting system altogether +(see :numref:`Section %s ` for the procedure to stage forecast files). In the SRW +App, the flag ``DO_ENSEMBLE`` in the ``global:`` section of ``config.yaml`` specifies whether the +(generated or staged) forecast files to be verified constitute a deterministic or an ensemble forecast. +Setting ``DO_ENSEMBLE: False`` (the default) causes the SRW App workflow to assume that the forecast +is deterministic, while setting ``DO_ENSEMBLE: True`` causes it to assume that the forecast is ensemble. +In the latter case, the number of ensemble members must be specified via the variable ``NUM_ENS_MEMBERS``, +also found in the ``global:`` section of ``config.yaml``. + +Both deterministic and ensemble VX require observation and forecast files as well as possible preprocessing +of those files. Thus, whenever deterministic or ensemble VX tasks are included in the workflow, preprocessing +(meta)tasks must also be included that check for the presence of the required obs and forecast files on disk, +retrieve obs files if necessary from a data store such as NOAA HPSS (see note below regarding forecast files), +and preprocess both types of files as needed. We refer to these collectively as the VX preprocessing tasks. + +.. note:: + Currently, the SRW App workflow does not support the ability to retrieve forecast files from data stores; + these must either be generated by the forecast model in the SRW App or be manually staged by the user. + See :numref:`Section %s ` for details. + + +Adding VX Tasks to the Workflow +`````````````````````````````````` +To add verification tasks to the workflow, users must include the VX taskgroup files ``verify_pre.yaml``, +``verify_det.yaml``, and/or ``verify_ens.yaml`` (all located in the ``parm/wflow`` directory) in the ``rocoto: +tasks: taskgroups:`` section of ``config.yaml``. :numref:`Table %s ` specifies the set of workflow +VX (meta)tasks that each ``verify_*.yaml`` file defines. As implied above, users must add ``verify_pre.yaml`` +to ``rocoto: tasks: taskgroups:`` anytime they want to run deterministic and/or ensemble VX because this +contains VX preprocessing tasks that are required by both VX types. Then users can add ``verify_det.yaml`` +to run deterministic VX on either a deterministic forecast or on each member of an ensemble forecast, they +can add ``verify_ens.yaml`` to run ensemble VX on an ensemble forecast, or they can add both if they want to +run both deterministic and ensemble VX on an ensemble forecast (where the deterministic VX is performed on +each member of the ensemble). + +.. _VX-yamls: + +.. list-table:: Verification YAML Task Groupings + :widths: 20 50 + :header-rows: 1 + + * - Taskgroup File + - Taskgroup Description + * - ``verify_pre.yaml`` + - Defines (meta)tasks that run the VX preprocessing tasks that are prerequisites for both deterministic + and ensemble VX. + * - ``verify_det.yaml`` + - Defines (meta)tasks that perform deterministic VX on a single forecast or on each member of an ensemble + forecast (the latter requires ``DO_ENSEMBLE`` and ``NUM_ENS_MEMBERS`` in ``config.yaml`` to be set to + ``True`` and the number of ensemble members, respectively). + * - ``verify_ens.yaml`` + - Defines (meta)tasks that perform ensemble VX on an ensemble of forecasts as a whole (requires ``DO_ENSEMBLE`` + and ``NUM_ENS_MEMBERS`` in ``config.yaml`` to be set to ``True`` and the number of ensemble members, + respectively). + +For example, to enable deterministic VX, ``rocoto: tasks: taskgroups:`` may be set as follows: .. code-block:: console rocoto: tasks: - taskgroups: '{{ ["parm/wflow/prep.yaml", "parm/wflow/coldstart.yaml", "parm/wflow/post.yaml", "parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + taskgroups: '{{ ["parm/wflow/prep.yaml", "parm/wflow/coldstart.yaml", "parm/wflow/post.yaml", + "parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +This setting can apply to either a deterministic or an ensemble forecast. In the latter case, it will +cause deterministic VX will be performed on each member of the ensemble (but not on the ensemble as a +whole). Note that with this setting, the weather model will be run as part of the workflow to generate +forecast output because ``prep.yaml``, ``coldstart.yaml``, and ``post.yaml`` are also included in +``rocoto: tasks: taskgroups:``. Whether these forecasts are deterministic or ensemble depends on +whether ``DO_ENSEMBLE`` in ``config.yaml`` is set to ``False`` or ``True``, respectively (and, if +``True``, ``NUM_ENS_MEMBERS`` must be set to the number of ensemble members). Similarly, to enable +ensemble VX for an ensemble forecast as well as deterministic VX for each member of that ensemble, +``rocoto: tasks: taskgroups:`` may be set as follows: -:numref:`Table %s ` indicates which workflow (meta)tasks each ``verify_*.yaml`` file enables. -Users must include ``verify_pre.yaml`` anytime they want to run verification (VX) because this contains -preprocessing tasks that are necessary for both deterministic and ensemble VX, including retrieval of -obs files from various data stores (e.g. NOAA HPSS) if those files do not already exist on disk (the -files must exist at the locations specified by the variables ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1]`` -in the ``verification:`` section of ``config.yaml``; see discussion below for details). -Then users can add ``verify_det.yaml`` for deterministic VX, ``verify_ens.yaml`` for ensemble VX, -or both if they want to run ensemble VX on an ensemble forecast but also run deterministic VX on -each ensemble member. +.. code-block:: console -Note that ensemble VX requires the user to either run an ensemble forecast with the SRW App or to stage -ensemble forecast files (at the locations specified by the variables ``VX_FCST_INPUT_BASEDIR``, -``FCST_SUBDIR_TEMPLATE``, and ``FCST_FN_TEMPLATE`` in the ``verification:`` section of ``config.yaml``). -In either case, ``DO_ENSEMBLE`` in ``config.yaml`` must be set to ``True``. + rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/prep.yaml", "parm/wflow/coldstart.yaml", "parm/wflow/post.yaml", + "parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml" "parm/wflow/verify_ens.yaml"]|include }}' + +If deterministic VX of each ensemble member is not desired, ``verify_det.yaml`` must be left out of the +above. Note that, as in the previous example, this setting of ``rocoto: tasks: taskgroups:`` will cause +the workflow to run the weather model to generate forecast output because ``prep.yaml``, ``coldstart.yaml``, +and ``post.yaml`` are again included, but in this case, ``DO_ENSEMBLE`` **must be** set to ``True`` (and +``NUM_ENS_MEMBERS`` set appropriately) in ``config.yaml`` because inclusion of ``verify_ens.yaml`` requires +that the forecast be an ensemble one. + +If users want to manually stage the forecast files instead of generating them with the SRW's native weather +model (see :numref:`Section %s ` for the procedure), they must exclude ``prep.yaml``, +``coldstart.yaml``, and ``post.yaml`` from the examples above. Also, regardless of whether the forecast +files are generated by the SRW App or staged manually by the user, if the forecast to be verified is an +ensemble one, in the ``global:`` section of ``config.yaml`` users must set ``DO_ENSEMBLE`` to ``True`` +and ``NUM_ENS_MEMBERS`` to the number of ensemble members. This tells the workflow to look for multiple +forecasts for each cycle instead of just one (as well as the number of such forecasts). +More information about configuring the ``rocoto:`` section can be found in :numref:`Section %s `. -.. _VX-yamls: -.. list-table:: Verification YAML Task Groupings - :widths: 20 50 +VX Taskgroup Organization and VX Field Groups +````````````````````````````````````````````````` +The VX (meta)tasks in the ``verify_*.yaml`` taskgroup files are described in detail in :numref:`Table %s +`. They are organized as follows. + +The (meta)tasks in ``verify_pre.yaml`` each +operate on a single obs type (except for ``metatask_check_post_output_all_mems``, which operates on the +forecast(s) and checks for the presence of all necessary forecast files), while the ones in ``verify_det.yaml`` +and ``verify_ens.yaml`` operate on one or more verification field groups. A verification field group +represents one or more meteorologial fields that are operated on (e.g. verified) together in a single +call to one of the METplus tools (such as GridStat, PointStat, GenEnsProd, and EnsembleStat), and each +field group has associated with it an obs type against which those forecast fields are verified. The +set of valid VX field groups, the obs types they are associated with, and a brief description of the +fields they include are given in :numref:`Table %s `. + +.. _VXFieldGroupDescsTable: + +.. list-table:: Valid Verification Field Groups and Descriptions + :widths: 20 20 60 :header-rows: 1 - * - File - - Description - * - verify_pre.yaml - - Enables (meta)tasks that are prerequisites for both deterministic and ensemble verification (vx) - * - verify_det.yaml - - Enables (meta)tasks that perform deterministic VX on a single forecast or on each member of an ensemble forecast - * - verify_ens.yaml - - Enables (meta)tasks that perform ensemble VX on an ensemble of forecasts as a whole (requires ``DO_ENSEMBLE`` - to be set to ``True`` in ``config.yaml``) + * - VX Field Group + - Associated Obs Type + - Fields Included in Group + * - APCP + - CCPA + - Accumulated precipitation for the accumulation intervals specified in ``VX_APCP_ACCUMS_HRS`` + * - ASNOW + - NOHRSC + - Accumulated snowfall for the accumulation intervals specified in ``VX_APCP_ACCUMS_HRS`` + * - REFC + - MRMS + - Composite reflectivity + * - RETOP + - MRMS + - Echo top + * - SFC + - NDAS + - Various surface and near-surface fields (e.g. at the surface, 2 m, 10 m, etc) + * - UPA + - NDAS + - Various upper-air fields (e.g. at 800 mb, 500 mb, etc) + +The list ``VX_FIELD_GROUPS`` in the ``verification:`` section of ``config.yaml`` specifies the VX field +groups for which to run verification. Thus, inclusion of a ``verify_*.yaml`` taskgroup file under the +``rocoto: tasks: taskgroups:`` section of ``config.yaml`` does not mean that all the (meta)tasks in that +file will necessarily be included in the workflow. This is because, in order to avoid unwanted computation, +only those (meta)tasks in ``verify_det.yaml`` and/or ``verify_ens.yaml`` that operate on field groups +included in ``VX_FIELD_GROUPS`` will appear in the Rocoto XML, and only those (meta)tasks in ``verify_pre.yaml`` +that operate on obs types associated with one of the field groups in ``VX_FIELD_GROUPS`` will appear in +the Rocoto XML. Thus, for example, setting -The ``verify_*.yaml`` files include by default the definitions of several common verification tasks and metatasks. -These default verification (meta)tasks are described in :numref:`Table %s `. The tasks in the -``verify_*.yaml`` files are independent of each other, so users may want to turn some off depending on the needs of -their experiment. To turn off a task, simply include its entry from ``verify_*.yaml`` as an empty YAML entry in -``config.yaml``. For example, to turn off PointStat tasks: +.. code-block:: console + + VX_FIELD_GROUPS: [ 'APCP', 'REFC', 'RETOP', 'SFC', 'UPA' ] + +in ``config.yaml`` and including all three taskgroups ``verify_*.yaml`` in ``rocoto: tasks: taskgroups:`` +will add to the Rocoto XML the VX (meta)tasks for all valid field groups except those for accumulated +snowfall (``'ASNOW'``) and its associated obs type (:term:`NOHRSC`). In other words, all the (meta)tasks +in :numref:`Table %s `. will be included in the Rocoto XML except for those +associated with the :term:`NOHRSC` obs type and the ``'ASNOW'`` field group. Users might want to set +``VX_FIELD_GROUPS`` in this way for example because the forecast experiment they are verifying is for a +summer period for which ``ASNOW`` is not relevant. + + +Staging Observation Files +`````````````````````````````````` +The taskgroup in ``verify_pre.yaml`` defines a set of workflow tasks named ``get_obs_*`` --- where the ``*`` +represents any one of the supported obs types :term:`CCPA`, :term:`NOHRSC`, :term:`MRMS`, and :term:`NDAS` --- +that will first check for the existence of the obs files required for VX at the locations on disk specified +by the variables ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1,3,...]`` in the ``verification:`` section of +``config.yaml``. The ``*_OBS_DIR`` are the base directories in which the obs files are or should be +staged, and the ``OBS_*_FN_TEMPLATES[1,3,...]`` are the file name templates (with METplus time strings +used for templating; see example below). The ``[1,3,...]`` in ``OBS_*_FN_TEMPLATES[1,3,...]`` refer +to the second, fourth, etc elements of ``OBS_*_FN_TEMPLATES`` and correspond to the various sets of files +that the obs type contains. (The first, third, etc elements, i.e. ``OBS_*_FN_TEMPLATES[0,2,...]``, +indicate the VX field groups for which the respective sets of obs files are used to verify and should +not be changed.) Note that ``OBS_*_FN_TEMPLATES[1,3,...]`` may include leading subdirectories and are +relative to the obs type's ``*_OBS_DIR``. Thus, the templates for the full paths to the obs files are +given by .. code-block:: console - rocoto: - tasks: - taskgroups: '{{ ["parm/wflow/prep.yaml", "parm/wflow/coldstart.yaml", "parm/wflow/post.yaml", "parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' - metatask_vx_ens_member: - metatask_PointStat_mem#mem#: + {*_OBS_DIR}/{OBS_*_FN_TEMPLATES[1,3,...]} +If the obs files exist at the locations specified by these variables, then the ``get_obs_*`` tasks will +succeed and the workflow will move on to subsequent tasks. If one or more obs files do not exist, the +``get_obs_*`` tasks will attempt to retrieve the required files from a data store such as NOAA HPSS and +place them in the locations specified by ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1,3,...]``. Assuming +that attempt is successful, the workflow will move on to subsequent tasks. Thus: + + * Users who have the obs files already available (staged) on their system only need to set ``*_OBS_DIR`` + and ``OBS_*_FN_TEMPLATES[1,3,...]`` in ``config.yaml`` to match those staging locations and file names. + + * Users who do not have the obs files available on their systems and do not have access to NOAA HPSS + need to download :term:`CCPA`, :term:`NOHRSC`, :term:`MRMS`, and/or :term:`NDAS` files manually + from collections of publicly available data such as the ones listed `here `__. + Then, as above, they must set ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1,3,...]`` to match those + staging locations and file names. + + * Users who have access to a data store that hosts the necessary files (e.g. NOAA HPSS) do not need to + manually stage the obs data because the ``get_obs_*`` tasks will retrieve the necessary obs and place + them in the locations specified by ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1,3,...]``. The default + values of these variables are such that the files are placed under the experiment directory, but + users may change these if they want the retrieved files to be placed elsewhere. -More information about configuring the ``rocoto:`` section can be found in :numref:`Section %s `. -If users have access to NOAA :term:`HPSS` but have not pre-staged the obs data, the taskgroup in ``verify_pre.yaml`` -will by default activate a set of ``get_obs_...`` workflow tasks that will attempt to retrieve the required -files from a data store such as NOAA HPSS. In this case, the variables ``*_OBS_DIR`` in ``config.yaml`` must -be set to the base directories under which users want the files to reside, and the variables ``OBS_*_FN_TEMPLATES[1]`` -must be set to METplus file name templates (possibly including leading subdirectories relative to ``*_OBS_DIR``) -that will be used to name the obs files. (Here, the ``*`` represents any one of the obs types :term:`CCPA`, -:term:`NOHRSC`, :term:`MRMS`, and :term:`NDAS`, and the ``[1]`` in ``OBS_*_FN_TEMPLATES[1]`` refers to the second -element of ``OBS_*_FN_TEMPLATES``; the first element should not be changed). - -Users who do not have access to NOAA HPSS and do not have the data on their system will need to download -:term:`CCPA`, :term:`NOHRSC`, :term:`MRMS`, and/or :term:`NDAS` data manually from collections of publicly -available data, such as the ones listed `here `__. - -Users who have already staged the observation data needed for verification on their system (i.e., the -:term:`CCPA`, :term:`NOHRSC`, :term:`MRMS`, and/or :term:`NDAS` data) should set -``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1]`` in ``config.yaml`` to match those staging locations and -file names. For example, for a case in which all four types of obs are needed for VX, these variables -might be set as follows: +As an example, consider a case in which all four types of obs are needed for verification. Then ``*_OBS_DIR`` +and ``OBS_*_FN_TEMPLATES`` might be set as follows: .. code-block:: console @@ -724,57 +877,83 @@ might be set as follows: 'RETOP', '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' ] OBS_NDAS_FN_TEMPLATES: [ 'SFC_UPA', 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' ] -If one of the days encompassed by the experiment is 20240429, and if one of the hours during -that day at which VX will be performed is 03, then, taking the CCPA obs type as an example, -one of the ``get_obs_ccpa_...`` tasks in the workflow will look for a CCPA file on disk -corresponding to this day and hour at +Now further consider the CCPA obs type. If one of the days encompassed by the forecast(s) is 20240429, +then the ``get_obs_ccpa`` task associated with this day will check for the existence of the set of obs +files given by -``/path/to/UFS_SRW_data/develop/obs_data/ccpa/20240429/ccpa.t03z.01h.hrap.conus.gb2`` +``/path/to/UFS_SRW_data/develop/obs_data/ccpa/20240429/ccpa.t{HH}z.01h.hrap.conus.gb2`` -As described above, if this file does not exist, the ``get_obs`` task will try to retrieve it -from a data store and place it at this location. +where ``{HH}`` takes on all hours of this day at which the verification requires CCPA obs. For example, +if performing (deterministic or ensemble) VX on 1-hour APCP for a 3-hour forecast that starts at 06z, +``{HH}`` will take on the values 07, 08, and 09. Then the files that ``get_obs_ccpa`` will look for +are: + +.. code-block:: console -After adding the VX tasks to the ``rocoto:`` section and the data paths to the ``verification:`` -section, users can proceed to generate the experiment, which will perform VX tasks in addition -to the default workflow tasks. + /path/to/UFS_SRW_data/develop/obs_data/ccpa/20240429/ccpa.t07z.01h.hrap.conus.gb2 + /path/to/UFS_SRW_data/develop/obs_data/ccpa/20240429/ccpa.t08z.01h.hrap.conus.gb2 + /path/to/UFS_SRW_data/develop/obs_data/ccpa/20240429/ccpa.t09z.01h.hrap.conus.gb2 +If all these exist, ``get_obs_ccpa`` will simply confirm their existence and will not need to retrieve +any files. If not, it will try to retrieve the files from a data store such as NOAA HPSS and place them +at the above locations. -Note that inclusion of the ``verify_*.yaml`` files under the ``rocoto: tasks: taskgroups:`` section of -``config.yaml`` does not mean all the (eta)tasks in those files will necessarily be included in the workflow. -This is because the VX tasks are grouped into field groups, and only those (meta)tasks in ``verify_*.yaml`` -associated with field groups that are included in the list ``VX_FIELD_GROUPS`` in ``config.yaml`` -are included in the worklow. -Each field group represents one or more meteorologial fields that can be verified. The valid field -groups and their descriptions are given in :numref:`Table %s `. -Thus, setting + +.. _VXStageFcstFiles: + +Staging Forecast Files +`````````````````````````````````` +As noted above, the SRW App currently does not support the ability to retrieve forecast files from +data stores. Thus, the forecast files must either be generated by the forecast model in the SRW App, +or they must be manually staged by the user. Note that manually staged forecast files do not have +to be ones generated by the SRW App; they can be outputs from another forecasting system. + +The locations of the forecast files are defined by the variables ``VX_FCST_INPUT_BASEDIR``, +``FCST_SUBDIR_TEMPLATE``, and ``FCST_FN_TEMPLATE`` in the ``verification:`` section of ``config.yaml``. +``VX_FCST_INPUT_BASEDIR`` is the base directory in which the files are located, ``FCST_SUBDIR_TEMPLATE`` +is a template specifying a set of subdirectories under ``VX_FCST_INPUT_BASEDIR``, and ``FCST_FN_TEMPLATE`` +is the file name template. As with the obs, the templating in ``FCST_SUBDIR_TEMPLATE`` and +``FCST_FN_TEMPLATE`` uses METplus time strings. Thus, the full template to the forecast files +is given by .. code-block:: console - VX_FIELD_GROUPS: [ 'APCP', 'REFC', 'RETOP', 'SFC', 'UPA' ] + {VX_FCST_INPUT_BASEDIR}/{FCST_SUBDIR_TEMPLATE}/{FCST_FN_TEMPLATE} -will run the VX (meta)tasks for all field groups except accumulated snowfall. +If the forecast files are manually staged, then these three variables must be set such that they +together point to the locations of the staged files. If they are generated by the SRW App, then +the user does not need to set these variables; they will by default be set to point to the forecast +files. -.. _VXFieldGroupDescsTable: +Summary +`````````````` +In summary, users must take the following steps to enable VX tasks in the SRW App workflow: -.. list-table:: Valid Verification Field Groups and Descriptions - :widths: 20 50 - :header-rows: 1 + #. Add the necessary VX taskgroup files ``verify_*.yaml`` to the ``rocoto: tasks: taskgroups:`` + section of ``config.yaml``. ``verify_pre.yaml`` must always be added; ``verify_det.yaml`` + must be added to enable deterministic VX (either of a deterministic forecast or of each + member of an ensemble forecast); and ``verify_ens.yaml`` must be added to enable ensemble + VX (of an ensemble forecast as a whole). - * - Field Group - - Description - * - APCP - - Accumulated precipitation for the accumulation intervals specified in ``VX_APCP_ACCUMS_HRS`` - * - ASNOW - - Accumulated snowfall for the accumulation intervals specified in ``VX_APCP_ACCUMS_HRS`` - * - REFC - - Composite reflectivity - * - RETOP - - Echo top - * - SFC - - Surface fields - * - UPA - - Upper-air fields + #. If performing ensemble verification and/or deterministic verification of ensemble members + (i.e. if the forecast to be verified is an ensemble), in the ``global:`` section of ``config.yaml`` + set ``DO_ENSEMBLE`` to ``True`` and ``NUM_ENS_MEMBERS`` to the number of ensemble members. + + #. If manually staging the obs files (e.g. because users don't have access to NOAA HPSS), set + the variables ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1,3,...]`` in the ``verification:`` section + of ``config.yaml`` to the locations of these files on disk (where the ``*`` in these variable + names can be any of the supported obs types). + + #. If manually staging the forecast files (as opposed to generating them by running the weather + model in the SRW App), set the forecast file paths to the locations of these files on disk + using the variables ``VX_FCST_INPUT_BASEDIR``, ``FCST_SUBDIR_TEMPLATE``, and ``FCST_FN_TEMPLATE`` + in the ``verification:`` section of ``config.yaml``. + + #. Specify the field groups to verify in the list ``VX_FIELD_GROUPS`` in the ``verification:`` + section of ``config.yaml``. Valid values for field groups are given in :numref:`Table %s `. + +After completing these steps, users can proceed to generate the experiment (see :numref:`Section %s `) .. _GenerateWorkflow: diff --git a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst index 81d72d5144..04a45e1b02 100644 --- a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst +++ b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst @@ -1659,12 +1659,15 @@ VX Parameters for Observations ------------------------------------- .. note:: + The observation types that the SRW App can currently retrieve (if necessary) and use in verification are: + * CCPA (Climatology-Calibrated Precipitation Analysis) * NOHRSC (National Operational Hydrologic Remote Sensing Center) * MRMS (Multi-Radar Multi-Sensor) * NDAS (NAM Data Assimilation System) + The script ``ush/get_obs.py`` contains further details on the files and directory structure of each obs type. From e17ad8ce82a8f507dc540df6a842f90779b37a41 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Wed, 11 Dec 2024 00:14:38 +0000 Subject: [PATCH 199/260] Re-hard-code to v12 beta version again, GNU is just too damn slow. Also, fix problems with POINT_STAT_MESSAGE_TYPE for Airnow obs --- ...gional_run_met_gridstat_or_pointstat_vx.sh | 32 ++++++++++++++++++- scripts/exregional_run_met_pcpcombine.sh | 9 ++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index 7d7f8b05fa..34afed6f46 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -206,7 +206,27 @@ elif [ "${grid_or_point}" = "point" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Ascii2nc_obs" OBS_INPUT_FN_TEMPLATE="${OBS_AERONET_FN_TEMPLATE_ASCII2NC_OUTPUT}" elif [ "${OBTYPE}" = "AIRNOW" ]; then - FIELDNAME_IN_MET_FILEDIR_NAMES="AIRNOW_HOURLY_AQOBS" + # It's very annoying that the names for specifying Airnow format are slightly different + # for ASCII2NC and Pointstat. This logic deals with that. + if [ -z "${AIRNOW_INPUT_FORMAT}" ]; then + if [[ "${OBS_AIRNOW_FN_TEMPLATES[1]}" == *"HourlyData"* ]]; then + FIELDNAME_IN_MET_FILEDIR_NAMES="AIRNOW_HOURLY" + elif [[ "${OBS_AIRNOW_FN_TEMPLATES[1]}" == *"HourlyAQObs"* ]]; then + FIELDNAME_IN_MET_FILEDIR_NAMES="AIRNOW_HOURLY_AQOBS" + else + print_err_msg_exit "Invalid AIRNOW_INPUT_FORMAT=${AIRNOW_INPUT_FORMAT}" + fi + else + if [[ "${AIRNOW_INPUT_FORMAT}" == "airnowhourly" ]]; then + FIELDNAME_IN_MET_FILEDIR_NAMES="AIRNOW_HOURLY" + elif [[ "${AIRNOW_INPUT_FORMAT}" == "airnowhourlyaqobs" ]]; then + FIELDNAME_IN_MET_FILEDIR_NAMES="AIRNOW_HOURLY_AQOBS" + else + print_err_msg_exit "Could not automatically determine format of Airnow observations;\ +check your filenames (OBS_AIRNOW_FN_TEMPLATE=${OBS_AIRNOW_FN_TEMPLATE}) +or manually set variable AIRNOW_INPUT_FORMAT" + fi + fi OBS_INPUT_DIR="${vx_output_basedir}/metprd/Ascii2nc_obs" OBS_INPUT_FN_TEMPLATE="${OBS_AIRNOW_FN_TEMPLATE_ASCII2NC_OUTPUT}" else @@ -430,6 +450,16 @@ fi # #----------------------------------------------------------------------- # +#TEMPORARILY POINTING TO BETA RELEASE +MET_ROOT=/contrib/met/12.0.0-beta6 +MET_INSTALL_DIR=${MET_ROOT} +MET_BIN_EXEC=${MET_INSTALL_DIR}/bin +MET_BASE=${MET_INSTALL_DIR}/share/met +METPLUS_ROOT=/contrib/METplus/METplus-6.0.0-beta6 +METPLUS_PATH=${METPLUS_ROOT} +MET_ROOT=/contrib/met/12.0.0-beta6 +#TEMPORARILY POINTING TO BETA RELEASE + print_info_msg "$VERBOSE" " Calling METplus to run MET's ${metplus_tool_name} tool for field(s): ${FIELDNAME_IN_MET_FILEDIR_NAMES}" ${METPLUS_PATH}/ush/run_metplus.py \ diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 4ce3c297e7..b5e557b61f 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -444,6 +444,15 @@ fi # #----------------------------------------------------------------------- # +#TEMPORARILY POINTING TO BETA RELEASE +MET_ROOT=/contrib/met/12.0.0-beta6 +MET_INSTALL_DIR=${MET_ROOT} +MET_BIN_EXEC=${MET_INSTALL_DIR}/bin +MET_BASE=${MET_INSTALL_DIR}/share/met +METPLUS_ROOT=/contrib/METplus/METplus-6.0.0-beta6 +METPLUS_PATH=${METPLUS_ROOT} +MET_ROOT=/contrib/met/12.0.0-beta6 +#TEMPORARILY POINTING TO BETA RELEASE print_info_msg "$VERBOSE" " Calling METplus to run MET's ${metplus_tool_name} tool for field(s): ${FIELDNAME_IN_MET_FILEDIR_NAMES}" ${METPLUS_PATH}/ush/run_metplus.py \ From 9d192ef928ca3b1b2c1eafaef2feec1bd9becdd2 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Wed, 11 Dec 2024 00:15:10 +0000 Subject: [PATCH 200/260] Fix obs variable name for PM2.5 --- parm/metplus/vx_configs/vx_config_det.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/metplus/vx_configs/vx_config_det.yaml b/parm/metplus/vx_configs/vx_config_det.yaml index 1eb69de479..e55a30adf1 100644 --- a/parm/metplus/vx_configs/vx_config_det.yaml +++ b/parm/metplus/vx_configs/vx_config_det.yaml @@ -211,7 +211,7 @@ AOD: AOTK%%AOD: L0%%L550: [] PM25: - MASSDEN%%PM25: + MASSDEN%%PM2.5: Z8%%A1: [] PM10: MASSDEN%%PM10: From 743452d797574aba5aef76ef5f9e0274c0ab0264 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Wed, 11 Dec 2024 00:16:10 +0000 Subject: [PATCH 201/260] Update config_defaults.yaml - Better description for METPLUS_VERBOSITY_LEVEL - Convert all the REMOVE_RAW_OBS_* vars to a single REMOVE_RAW_OBS_DIRS variable. Way less complicated and you should never absolutely NEED to specify this by ob type - Better descriptions for some AQ variables Includes mods to get_obs.py for REMOVE_RAW_OBS_DIRS --- ush/config_defaults.yaml | 28 +++++++++++++--------------- ush/get_obs.py | 2 +- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index bf3436221b..089b66c073 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2403,8 +2403,9 @@ verification: # ------------------------------- # # METPLUS_VERBOSITY_LEVEL: - # Logging verbosity level used by METplus verification tools. 0 to 5, - # with 0 quiet and 5 loudest. + # Logging verbosity level used by METplus verification tools. 0 to 9, + # with 0 having the fewest log messages and 9 having the most. Levels 5 + # and above can result in very large log files and slower tool execution.. # METPLUS_VERBOSITY_LEVEL: 2 # @@ -2571,19 +2572,15 @@ verification: # '{valid?fmt=%Y%m%d}/HourlyAQObs_{valid?fmt=%Y%m%d%H}.dat' OBS_AIRNOW_FN_TEMPLATES: [ 'PM', '{valid?fmt=%Y%m%d}/HourlyData_{valid?fmt=%Y%m%d%H}.dat' ] # - # REMOVE_RAW_OBS_DIRS_[CCPA|NOHRSC|MRMS|NDAS]: + # REMOVE_RAW_OBS_DIRS # Flag specifying whether to remove the "raw" observation directories - # after retrieving the specified type of obs (CCPA, NOHRSC, MRMS, or - # NOHRSC) from a data store (e.g. NOAA's HPSS). The raw directories - # are the ones in which the observation files are placed immediately + # after retrieving observations from a data store (e.g. NOAA's HPSS). + # The raw directories are the ones in which the observation files are placed # after pulling them from the data store but before performing any # processing on them such as renaming the files and/or reorganizing # their directory structure. # - REMOVE_RAW_OBS_CCPA: True - REMOVE_RAW_OBS_NOHRSC: True - REMOVE_RAW_OBS_MRMS: True - REMOVE_RAW_OBS_NDAS: True + REMOVE_RAW_OBS_DIRS: True # # OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: # METplus template for the names of the NetCDF files generated by the @@ -2611,17 +2608,18 @@ verification: OBS_AIRNOW_FN_TEMPLATE_ASCII2NC_OUTPUT: 'hourly_airnow_obs_{valid?fmt=%Y%m%d%H}.nc' # # AIRNOW_INPUT_FORMAT: - # Observation format for ASCII Airnow observations. Valid options can be found in METplus - # users guide: https://met.readthedocs.io/en/latest/Users_Guide/reformat_point.html#ascii2nc-tool + # Observation format for ASCII Airnow observations. Valid options are AIRNOW_HOURLY_AQOBS and + # AIRNOW_HOURLY; for more information see the METplus users guide: + # https://met.readthedocs.io/en/latest/Users_Guide/reformat_point.html#ascii2nc-tool # If not specified or set to a blank string, will attempt to determine its value based on the # value of OBS_AIRNOW_FN_TEMPLATE # - # AIRNOW_DATA_STORES: - # Location(s) to retrieve AIRNOW data from. Valid values are "aws" and/or "hpss", see + # OBS_DATA_STORES: + # Location(s) to retrieve observation data from. Valid values are "aws" and/or "hpss", see # parm/data_locations.yaml for info on these data stores. # AIRNOW_INPUT_FORMAT: "" - AIRNOW_DATA_STORES: aws + OBS_DATA_STORES: aws # # OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT: diff --git a/ush/get_obs.py b/ush/get_obs.py index 2ec21cf9b7..6d990443cf 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -603,7 +603,7 @@ def get_obs(config, obtype, yyyymmdd_task): # Whether to remove raw observations after processed directories have # been created from them. - remove_raw_obs = vx_config[f'REMOVE_RAW_OBS_{obtype}'] + remove_raw_obs = vx_config[f'REMOVE_RAW_OBS_DIRS'] # Base directory that will contain the archive subdirectories in which # the files extracted from each archive (tar) file will be placed. We From 4fbda332dc114349664ff40f71b2246f2dad03ee Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Thu, 12 Dec 2024 20:03:36 +0000 Subject: [PATCH 202/260] Rename Pointstat conf files to avoid overwrite problem with PM tasks, add matched-pairs to stat files for smoke variables --- parm/metplus/GridStat_or_PointStat.conf | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/parm/metplus/GridStat_or_PointStat.conf b/parm/metplus/GridStat_or_PointStat.conf index 155b028291..d20fe5fa00 100644 --- a/parm/metplus/GridStat_or_PointStat.conf +++ b/parm/metplus/GridStat_or_PointStat.conf @@ -699,7 +699,7 @@ OBS_{{METPLUS_TOOL_NAME}}_WINDOW_END = {OBS_WINDOW_END} # {{METPLUS_TOOL_NAME}}_ONCE_PER_FIELD = False -{{METPLUS_TOOL_NAME}}_OUTPUT_PREFIX = {MODEL}_{{fieldname_in_met_filedir_names}}_{OBTYPE} +{{METPLUS_TOOL_NAME}}_OUTPUT_PREFIX = {MODEL}_{{fieldname_in_met_filedir_names}}_{{input_field_group}} # Climatology data {%- set comment_or_null = '' %} @@ -762,7 +762,9 @@ OBS_{{METPLUS_TOOL_NAME}}_WINDOW_END = {OBS_WINDOW_END} #{{METPLUS_TOOL_NAME}}_OUTPUT_FLAG_ECNT = #{{METPLUS_TOOL_NAME}}_OUTPUT_FLAG_RPS = #{{METPLUS_TOOL_NAME}}_OUTPUT_FLAG_ECLV = -#{{METPLUS_TOOL_NAME}}_OUTPUT_FLAG_MPR = +{%- if (input_field_group in ['AOD', 'PM25', "PM10"]) %} +{{METPLUS_TOOL_NAME}}_OUTPUT_FLAG_MPR = STAT +{%- endif %} #{{METPLUS_TOOL_NAME}}_OUTPUT_FLAG_ORANK = {%- endif %} From ff2359317e2ab97caa77c300e7c15beb142ec3ce Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Thu, 12 Dec 2024 20:05:43 +0000 Subject: [PATCH 203/260] Fix PM pointstat tasks to read output files from PCP Combine --- parm/metplus/vx_configs/vx_config_det.yaml | 4 ++-- .../exregional_run_met_gridstat_or_pointstat_vx.sh | 13 +++++++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/parm/metplus/vx_configs/vx_config_det.yaml b/parm/metplus/vx_configs/vx_config_det.yaml index e55a30adf1..5ea36b2da0 100644 --- a/parm/metplus/vx_configs/vx_config_det.yaml +++ b/parm/metplus/vx_configs/vx_config_det.yaml @@ -211,8 +211,8 @@ AOD: AOTK%%AOD: L0%%L550: [] PM25: - MASSDEN%%PM2.5: + PM25%%PM2.5: Z8%%A1: [] PM10: - MASSDEN%%PM10: + PM10%%PM10: Z8%%A1: [] diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index 34afed6f46..7a5ff40412 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -201,10 +201,14 @@ elif [ "${grid_or_point}" = "point" ]; then if [ "${OBTYPE}" = "NDAS" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT}" + FCST_INPUT_DIR="${vx_fcst_input_basedir}" + FCST_INPUT_FN_TEMPLATE="${FCST_SUBDIR_TEMPLATE:+${FCST_SUBDIR_TEMPLATE}/}${FCST_FN_TEMPLATE}" elif [ "${OBTYPE}" = "AERONET" ]; then FIELDNAME_IN_MET_FILEDIR_NAMES="AERONET_AOD" OBS_INPUT_DIR="${vx_output_basedir}/metprd/Ascii2nc_obs" OBS_INPUT_FN_TEMPLATE="${OBS_AERONET_FN_TEMPLATE_ASCII2NC_OUTPUT}" + FCST_INPUT_DIR="${vx_fcst_input_basedir}" + FCST_INPUT_FN_TEMPLATE="${FCST_SUBDIR_TEMPLATE:+${FCST_SUBDIR_TEMPLATE}/}${FCST_FN_TEMPLATE}" elif [ "${OBTYPE}" = "AIRNOW" ]; then # It's very annoying that the names for specifying Airnow format are slightly different # for ASCII2NC and Pointstat. This logic deals with that. @@ -227,15 +231,16 @@ check your filenames (OBS_AIRNOW_FN_TEMPLATE=${OBS_AIRNOW_FN_TEMPLATE}) or manually set variable AIRNOW_INPUT_FORMAT" fi fi + ACCUM_HH='01' OBS_INPUT_DIR="${vx_output_basedir}/metprd/Ascii2nc_obs" OBS_INPUT_FN_TEMPLATE="${OBS_AIRNOW_FN_TEMPLATE_ASCII2NC_OUTPUT}" + # The forecast input for Airnow obs is the output from PcP combine + FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}/metprd/PcpCombine_fcst" + FCST_INPUT_FN_TEMPLATE=$( eval echo ${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT} ) else print_err_msg_exit "Invalid OBTYPE for PointStat: ${OBTYPE}" fi - FCST_INPUT_DIR="${vx_fcst_input_basedir}" - FCST_INPUT_FN_TEMPLATE="${FCST_SUBDIR_TEMPLATE:+${FCST_SUBDIR_TEMPLATE}/}${FCST_FN_TEMPLATE}" - fi OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_INPUT_FN_TEMPLATE} ) FCST_INPUT_FN_TEMPLATE=$( eval echo ${FCST_INPUT_FN_TEMPLATE} ) @@ -330,7 +335,7 @@ fi # First, set the base file names. # metplus_config_tmpl_bn="GridStat_or_PointStat" -metplus_config_bn="${MetplusToolName}_${FIELDNAME_IN_MET_FILEDIR_NAMES}_${ensmem_name}" +metplus_config_bn="${MetplusToolName}_${FIELDNAME_IN_MET_FILEDIR_NAMES}_${FIELD_GROUP}_${ensmem_name}" metplus_log_bn="${metplus_config_bn}_$CDATE" # # Add prefixes and suffixes (extensions) to the base file names. From 4110bb43632a6d53081a99d7ac0666a1a86b1fc7 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Thu, 12 Dec 2024 20:06:23 +0000 Subject: [PATCH 204/260] Run PCP Combine for hour zero for PM tasks --- scripts/exregional_run_met_pcpcombine.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index b5e557b61f..79a195fba6 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -223,8 +223,14 @@ fi #----------------------------------------------------------------------- # vx_intvl="$((10#${ACCUM_HH}))" +#Airnow obs use PCP_Combine simply to combine two fields, so run for every hour +if [ "${OBTYPE}" = "AIRNOW" ]; then + lhr_min=0 +else + lhr_min=${vx_intvl} +fi VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ - --lhr_min="${vx_intvl}" \ + --lhr_min="${lhr_min}" \ --lhr_max="${FCST_LEN_HRS}" \ --lhr_intvl="${vx_intvl}" \ --skip_check_files ) || \ From 713273d5f59ca286467bb9f407d66a68342b54fa Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Thu, 19 Dec 2024 19:03:30 +0000 Subject: [PATCH 205/260] Modify test files for new combined REMOVE_RAW_OBS_DIRS setting --- ...g.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml | 5 +---- ...fig.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml | 5 +---- ....vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml | 5 +---- .../config.vx-det_long-fcst_custom-vx-config_gfs.yaml | 5 +---- .../config.vx-det_long-fcst_winter-wx_SRW-staged.yaml | 5 +---- .../config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml | 5 +---- .../config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml | 5 +---- .../config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml | 5 +---- ...onfig.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml | 5 +---- .../config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml | 5 +---- .../config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml | 5 +---- .../config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml | 5 +---- 12 files changed, 12 insertions(+), 48 deletions(-) diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml index f4d71ceeb8..e57369e175 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml @@ -45,10 +45,7 @@ verification: NDAS_OBS_DIR: "{{ platform.TEST_GDAS_OBS_DIR }}" # Do not remove raw obs files to be able to verify that only the necessary # raw files are fetched from HPSS (if it's necessary to fetch obs files). - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false + REMOVE_RAW_OBS_DIRS: false # OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', '{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] # diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml index caa917be41..be95cb3d11 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml @@ -45,10 +45,7 @@ verification: NDAS_OBS_DIR: "{{ platform.TEST_GDAS_OBS_DIR }}" # Do not remove raw obs files to be able to verify that only the necessary # raw files are fetched from HPSS (if it's necessary to fetch obs files). - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false + REMOVE_RAW_OBS_DIRS: false # OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', '{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] # diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml index cf1fd79ad3..8fbd0a7be7 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml @@ -45,10 +45,7 @@ verification: NDAS_OBS_DIR: "{{ platform.TEST_GDAS_OBS_DIR }}" # Do not remove raw obs files to be able to verify that only the necessary # raw files are fetched from HPSS (if it's necessary to fetch obs files). - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false + REMOVE_RAW_OBS_DIRS: false # OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', '{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] # diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml index 5ea940f055..e9855f75b9 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml @@ -48,10 +48,7 @@ verification: NDAS_OBS_DIR: "{{ platform.TEST_GDAS_OBS_DIR }}" # Do not remove raw obs files to be able to verify that only the necessary # raw files are fetched from HPSS (if it's necessary to fetch obs files). - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false + REMOVE_RAW_OBS_DIRS: false # OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', '{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] # diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml index 2a9fe731a0..6a27dababf 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml @@ -52,10 +52,7 @@ verification: NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" # Do not remove raw obs files to be able to verify that only the necessary # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false + REMOVE_RAW_OBS_DIRS: false # VX_FCST_MODEL_NAME: 'Michigan_Ontario_snow_8km' VX_FIELD_GROUPS: [ "APCP", "ASNOW", "REFC", "RETOP", "SFC", "UPA" ] diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml index 0be883f1e8..67235fec5d 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml @@ -50,10 +50,7 @@ verification: NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" # Do not remove raw obs files to be able to verify that only the necessary # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false + REMOVE_RAW_OBS_DIRS: false # VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '{{- "/".join([platform.TEST_EXTRN_MDL_SOURCE_BASEDIR, verification.VX_FCST_MODEL_NAME]) }}' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml index 80654ec42d..1692dbbc40 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml @@ -50,10 +50,7 @@ verification: NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" # Do not remove raw obs files to be able to verify that only the necessary # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false + REMOVE_RAW_OBS_DIRS: false # VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '{{- "/".join([platform.TEST_EXTRN_MDL_SOURCE_BASEDIR, verification.VX_FCST_MODEL_NAME]) }}' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml index 18508af72e..a0aa0e56c7 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml @@ -50,10 +50,7 @@ verification: NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" # Do not remove raw obs files to be able to verify that only the necessary # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false + REMOVE_RAW_OBS_DIRS: false # VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '{{- "/".join([platform.TEST_EXTRN_MDL_SOURCE_BASEDIR, verification.VX_FCST_MODEL_NAME]) }}' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml index 2745c580e3..247866df5c 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml @@ -52,10 +52,7 @@ verification: NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" # Do not remove raw obs files to be able to verify that only the necessary # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false + REMOVE_RAW_OBS_DIRS: false # VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' VX_FCST_INPUT_BASEDIR: '{{- "/".join([platform.TEST_EXTRN_MDL_SOURCE_BASEDIR, verification.VX_FCST_MODEL_NAME]) }}' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml index fbd67884a5..706baf7c57 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml @@ -51,10 +51,7 @@ verification: NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" # Do not remove raw obs files to be able to verify that only the necessary # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false + REMOVE_RAW_OBS_DIRS: false # VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' VX_FCST_INPUT_BASEDIR: '{{- "/".join([platform.TEST_EXTRN_MDL_SOURCE_BASEDIR, verification.VX_FCST_MODEL_NAME]) }}' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml index 85f55c8fe4..77b795dba0 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml @@ -52,10 +52,7 @@ verification: NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" # Do not remove raw obs files to be able to verify that only the necessary # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false + REMOVE_RAW_OBS_DIRS: false # VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' VX_FCST_INPUT_BASEDIR: '{{- "/".join([platform.TEST_EXTRN_MDL_SOURCE_BASEDIR, verification.VX_FCST_MODEL_NAME]) }}' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml index c65fb74ec4..62a58e0741 100644 --- a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml @@ -52,10 +52,7 @@ verification: NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" # Do not remove raw obs files to be able to verify that only the necessary # raw files are fetched from HPSS. - REMOVE_RAW_OBS_CCPA: false - REMOVE_RAW_OBS_MRMS: false - REMOVE_RAW_OBS_NDAS: false - REMOVE_RAW_OBS_NOHRSC: false + REMOVE_RAW_OBS_DIRS: false # VX_FCST_MODEL_NAME: 'hrrr_ncep' VX_FCST_INPUT_BASEDIR: '{{- "/".join([platform.TEST_EXTRN_MDL_SOURCE_BASEDIR, verification.VX_FCST_MODEL_NAME]) }}' From 4fd187211b1547fceaa1d046d70aa6b242e81a2a Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Thu, 19 Dec 2024 19:04:21 +0000 Subject: [PATCH 206/260] Move METPLUS import in get_obs.py so we can still use --help flag if environment is not set --- ush/get_obs.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/ush/get_obs.py b/ush/get_obs.py index 6d990443cf..a8de1d7c1a 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -16,14 +16,6 @@ load_yaml_config, ) from mrms_pull_topofhour import mrms_pull_topofhour -try: - sys.path.append(os.environ['METPLUS_ROOT']) -except: - print("\nERROR ERROR ERROR\n") - print("Environment variable METPLUS_ROOT must be set to use this script\n") - raise -from metplus.util import string_template_substitution as sts - def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): """ @@ -983,6 +975,15 @@ def parse_args(argv): if __name__ == "__main__": args = parse_args(sys.argv[1:]) + # We import METPLUS after parse_args so that we can still call the script with -h + try: + sys.path.append(os.environ['METPLUS_ROOT']) + except: + print("\nERROR ERROR ERROR\n") + print("Environment variable METPLUS_ROOT must be set to use this script\n") + raise + from metplus.util import string_template_substitution as sts + # Set up logging. # If the name/path of a log file has been specified in the command line # arguments, place the logging output in it (existing log files of the From a4ace4f1bf373856a1b1cc5d46f0a4d4eed1cc92 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Thu, 19 Dec 2024 19:06:02 +0000 Subject: [PATCH 207/260] Fix duplicate line in field_groups definition --- ush/set_cycle_and_obs_timeinfo.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py index 47515bba6f..55df984255 100644 --- a/ush/set_cycle_and_obs_timeinfo.py +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -635,7 +635,6 @@ def get_obs_retrieve_times_by_day( {'obtype': 'NOHRSC', 'time_type': 'cumul', 'field_groups': ['ASNOW']}, {'obtype': 'MRMS', 'time_type': 'inst', 'field_groups': ['REFC', 'RETOP']}, {'obtype': 'NDAS', 'time_type': 'inst', 'field_groups': ['SFC', 'UPA']}, - {'obtype': 'NDAS', 'time_type': 'inst', 'field_groups': ['ADPSFC', 'ADPUPA']}, {'obtype': 'AERONET', 'time_type': 'inst', 'field_groups': ['AOD']}, {'obtype': 'AIRNOW', 'time_type': 'inst', 'field_groups': ['PM25', 'PM10']} ] @@ -664,7 +663,6 @@ def get_obs_retrieve_times_by_day( # We refer to these times as the vx comparison times. vx_compare_times_by_day = dict() for obs_dict in obs_info: - obtype = obs_dict['obtype'] obs_time_type = obs_dict['time_type'] @@ -826,7 +824,6 @@ def get_obs_retrieve_times_by_day( # but grouped by cycle start date, regroup them by day and save results # in obs_retrieve_times_by_day. for obs_dict in obs_info: - obtype = obs_dict['obtype'] obs_time_type = obs_dict['time_type'] From 36bd951ac426307ba8158f50799f132dd8b849a1 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Mon, 6 Jan 2025 22:08:00 +0000 Subject: [PATCH 208/260] Verify full domain, not just CONUS, and deal with correct censor values for bad AOD data --- parm/metplus/GridStat_or_PointStat.conf | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/parm/metplus/GridStat_or_PointStat.conf b/parm/metplus/GridStat_or_PointStat.conf index d20fe5fa00..c4b9711337 100644 --- a/parm/metplus/GridStat_or_PointStat.conf +++ b/parm/metplus/GridStat_or_PointStat.conf @@ -149,7 +149,7 @@ OBTYPE = {{obtype}} # Verification Masking regions # Indicate which grid and polygon masking region, if applicable # -{{METPLUS_TOOL_NAME}}_MASK_GRID = +{{METPLUS_TOOL_NAME}}_MASK_GRID = FULL {%- if (METPLUS_TOOL_NAME == 'POINT_STAT') %} # @@ -625,6 +625,10 @@ OBS_VAR{{ns.var_count}}_OPTIONS = desc = "RI"; {%- endif %} {%- endif %} + {%- elif (input_field_group == 'AOD') %} + +OBS_VAR{{ns.var_count}}_OPTIONS = censor_thresh = <0; censor_val = -9999; + {%- endif %} {#- From d3aed7ec93881e614f9a76f0cdff00abee5fa153 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Tue, 28 Jan 2025 21:11:17 +0000 Subject: [PATCH 209/260] Fix field_table modification for UFS_FIRE --- ush/generate_FV3LAM_wflow.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ush/generate_FV3LAM_wflow.py b/ush/generate_FV3LAM_wflow.py index f724cd6852..05ad31b1dc 100755 --- a/ush/generate_FV3LAM_wflow.py +++ b/ush/generate_FV3LAM_wflow.py @@ -279,15 +279,16 @@ def generate_FV3LAM_wflow( ) cp_vrfy(expt_config["workflow"]["CCPP_PHYS_SUITE_IN_CCPP_FP"], expt_config["workflow"]["CCPP_PHYS_SUITE_FP"]) - # If UFS_FIRE, update FIELD_TABLE - field_table_append = """# smoke tracer for UFS_FIRE + # If UFS_FIRE, update FIELD_TABLE + if expt_config['fire'].get('UFS_FIRE'): + field_table_append = """# smoke tracer for UFS_FIRE "TRACER", "atmos_mod", "fsmoke" "longname", "fire smoke" "units", "kg/kg" "profile_type", "fixed", "surface_value=0.0" /\n""" - with open(FIELD_TABLE_FP, "a+", encoding='UTF-8') as file: - file.write(field_table_append) + with open(expt_config["workflow"]["FIELD_TABLE_FP"], "a+", encoding='UTF-8') as file: + file.write(field_table_append) # # Copy the field dictionary file from its location in the From 5404b9a902c83485cd183082255f9ba28490a260 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Thu, 13 Feb 2025 23:30:54 +0000 Subject: [PATCH 210/260] Fix issue with "STALLED" jobs logic in WE2E tests --- tests/WE2E/utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/WE2E/utils.py b/tests/WE2E/utils.py index d46d52b468..8d0b204d89 100755 --- a/tests/WE2E/utils.py +++ b/tests/WE2E/utils.py @@ -233,6 +233,8 @@ def update_expt_status(expt: dict, name: str, refresh: bool = False, debug: bool * **DYING:** One or more tasks have died (status DEAD), so this experiment has an error. Experiment monitoring will continue until all previously submitted tasks are in either status DEAD or status SUCCEEDED (see next entry). * **DEAD:** One or more tasks are in status DEAD, and other previously submitted jobs are either DEAD or SUCCEEDED. This experiment will no longer be monitored. * **ERROR:** Could not read the Rocoto database (``.db``) file. This will require manual intervention to solve, so the experiment will no longer be monitored. + * **STALLED:** All submitted jobs are SUCCEEDED but one or more jobs have not been submitted; if this state persists, it will become "STUCK". + * **STUCK:** All submitted jobs are SUCCEEDED but one or more jobs have not been submitted for multiple iterations; this can indicate system-level throttling or a problem with Rocoto dependencies. * **RUNNING:** One or more jobs are in status RUNNING, and other previously submitted jobs are in status QUEUED, SUBMITTED, or SUCCEEDED. This is a normal state; experiment monitoring will continue. * **QUEUED:** One or more jobs are in status QUEUED, and some others may be in status SUBMITTED or SUCCEEDED. This is a normal state; experiment monitoring will continue. * **SUCCEEDED:** All jobs are in status SUCCEEDED; experiment monitoring will continue for one more cycle in case there are unsubmitted jobs remaining. @@ -330,7 +332,8 @@ def update_expt_status(expt: dict, name: str, refresh: bool = False, debug: bool # If all task statuses are "SUCCEEDED", set the experiment status to "SUCCEEDED". This # will trigger a final check using rocotostat to make sure there are no remaining un- # started tests. - expt["status"] = "SUCCEEDED" + if expt["status"] not in ['STALLED', 'STUCK']: + expt["status"] = "SUCCEEDED" elif expt["status"] == "CREATED": # Some platforms (including Hera) can have a problem with rocoto jobs not submitting # properly due to build-ups of background processes. This will resolve over time as @@ -535,7 +538,8 @@ def compare_rocotostat(expt_dict,name): # If we're already tracking this task, continue if expt_dict.get(taskname): - continue + if expt_dict.get(taskname).get('status') not in ['STALLED', 'STUCK']: + continue # Otherwise, extract information into dictionary of untracked tasks untracked_tasks.append(taskname) From 9dd85907a78944dd85e9cdc47d78f7b3609bc126 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Fri, 14 Feb 2025 16:12:20 +0000 Subject: [PATCH 211/260] Update Hera modulefiles for MET 12/METplus6 --- modulefiles/build_hera_intel.lua | 2 +- modulefiles/tasks/hera/run_vx.local.lua | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modulefiles/build_hera_intel.lua b/modulefiles/build_hera_intel.lua index a4e3d434b1..2b88744cf9 100644 --- a/modulefiles/build_hera_intel.lua +++ b/modulefiles/build_hera_intel.lua @@ -8,7 +8,7 @@ whatis([===[Loads libraries needed for building the UFS SRW App on Hera ]===]) prepend_path("MODULEPATH","/contrib/sutils/modulefiles") load("sutils") -prepend_path("MODULEPATH", "/scratch1/NCEPDEV/nems/role.epic/spack-stack/spack-stack-1.6.0/envs/fms-2024.01/install/modulefiles/Core") +prepend_path("MODULEPATH", "/contrib/spack-stack/spack-stack-1.6.0/envs/fms-2024.01/install/modulefiles/Core") stack_intel_ver=os.getenv("stack_intel_ver") or "2021.5.0" load(pathJoin("stack-intel", stack_intel_ver)) diff --git a/modulefiles/tasks/hera/run_vx.local.lua b/modulefiles/tasks/hera/run_vx.local.lua index 920a62587f..fb50f6e79e 100644 --- a/modulefiles/tasks/hera/run_vx.local.lua +++ b/modulefiles/tasks/hera/run_vx.local.lua @@ -1,8 +1,8 @@ --[[ Compiler-specific modules are used for met and metplus libraries --]] -local met_ver = (os.getenv("met_ver") or "11.1.1") -local metplus_ver = (os.getenv("metplus_ver") or "5.1.0") +local met_ver = (os.getenv("met_ver") or "12.0.1") +local metplus_ver = (os.getenv("metplus_ver") or "6.0.0") if (mode() == "load") then load(pathJoin("met", met_ver)) load(pathJoin("metplus",metplus_ver)) From e607bf4bef89f91038907cd3554283ce575a4aff Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Fri, 14 Feb 2025 16:13:10 +0000 Subject: [PATCH 212/260] Fix some workflow dependencies from NDAS pb2nc ready file renaming --- parm/wflow/verify_det.yaml | 4 ++-- parm/wflow/verify_ens.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index 22bb56c240..acbd1d64a2 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -172,11 +172,11 @@ metatask_PointStat: {%- for n in range(0, num_obs_days) %} {%- set yyyymmdd = workflow.OBS_DAYS_ALL_CYCLES_INST[n] %} {%- if n == 0 %} - {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/#METAOBTYPE#_nc_obs_" ~ yyyymmdd ~ "_ready.txt" }} + {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/NDAS_nc_obs_" ~ yyyymmdd ~ "_ready.txt" }} {%- else %} {{- indent ~ "
\n" }} {{- indent ~ "\n" }} - {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/#METAOBTYPE#_nc_obs_" ~ yyyymmdd ~ "_ready.txt" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/NDAS_nc_obs_" ~ yyyymmdd ~ "_ready.txt" }} {%- endif %} {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} {%- endfor %}' diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index d3601c14d8..22d1601b67 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -180,11 +180,11 @@ metatask_GenEnsProd_EnsembleStat_SFC_UPA: {%- for n in range(0, num_obs_days) %} {%- set yyyymmdd = workflow.OBS_DAYS_ALL_CYCLES_INST[n] %} {%- if n == 0 %} - {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_ndas_" ~ yyyymmdd ~ "_complete.txt" }} + {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/NDAS_nc_obs_" ~ yyyymmdd ~ "_ready.txt" }} {%- else %} {{- indent ~ "\n" }} {{- indent ~ "\n" }} - {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_ndas_" ~ yyyymmdd ~ "_complete.txt" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/NDAS_nc_obs_" ~ yyyymmdd ~ "_ready.txt" }} {%- endif %} {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} {%- endfor %}' From 597e5a816f75062fd8db8c72cc0ce2e195d2307d Mon Sep 17 00:00:00 2001 From: "Michael J. Kavulich, Jr." Date: Tue, 18 Feb 2025 23:01:40 +0000 Subject: [PATCH 213/260] Un-hard-code the last MET tools --- scripts/exregional_run_met_gridstat_or_pointstat_vx.sh | 9 --------- scripts/exregional_run_met_pcpcombine.sh | 9 --------- 2 files changed, 18 deletions(-) diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index 7a5ff40412..a72fa25292 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -455,15 +455,6 @@ fi # #----------------------------------------------------------------------- # -#TEMPORARILY POINTING TO BETA RELEASE -MET_ROOT=/contrib/met/12.0.0-beta6 -MET_INSTALL_DIR=${MET_ROOT} -MET_BIN_EXEC=${MET_INSTALL_DIR}/bin -MET_BASE=${MET_INSTALL_DIR}/share/met -METPLUS_ROOT=/contrib/METplus/METplus-6.0.0-beta6 -METPLUS_PATH=${METPLUS_ROOT} -MET_ROOT=/contrib/met/12.0.0-beta6 -#TEMPORARILY POINTING TO BETA RELEASE print_info_msg "$VERBOSE" " Calling METplus to run MET's ${metplus_tool_name} tool for field(s): ${FIELDNAME_IN_MET_FILEDIR_NAMES}" diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 79a195fba6..79f867f7c8 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -450,15 +450,6 @@ fi # #----------------------------------------------------------------------- # -#TEMPORARILY POINTING TO BETA RELEASE -MET_ROOT=/contrib/met/12.0.0-beta6 -MET_INSTALL_DIR=${MET_ROOT} -MET_BIN_EXEC=${MET_INSTALL_DIR}/bin -MET_BASE=${MET_INSTALL_DIR}/share/met -METPLUS_ROOT=/contrib/METplus/METplus-6.0.0-beta6 -METPLUS_PATH=${METPLUS_ROOT} -MET_ROOT=/contrib/met/12.0.0-beta6 -#TEMPORARILY POINTING TO BETA RELEASE print_info_msg "$VERBOSE" " Calling METplus to run MET's ${metplus_tool_name} tool for field(s): ${FIELDNAME_IN_MET_FILEDIR_NAMES}" ${METPLUS_PATH}/ush/run_metplus.py \ From a00eae9d8ab8df3b2cfc003e1afa44e9170f58dc Mon Sep 17 00:00:00 2001 From: "Michael J. Kavulich, Jr." Date: Tue, 18 Feb 2025 23:03:35 +0000 Subject: [PATCH 214/260] Upgrade all MET/METplus to new versions --- modulefiles/tasks/derecho/run_vx.local.lua | 4 ++-- modulefiles/tasks/gaea/run_vx.local.lua | 4 ++-- modulefiles/tasks/hercules/run_vx.local.lua | 4 ++-- modulefiles/tasks/jet/run_vx.local.lua | 4 ++-- modulefiles/tasks/noaacloud/run_vx.local.lua | 4 ++-- modulefiles/tasks/orion/run_vx.local.lua | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/modulefiles/tasks/derecho/run_vx.local.lua b/modulefiles/tasks/derecho/run_vx.local.lua index 2613f8ae0e..2120a89e6b 100644 --- a/modulefiles/tasks/derecho/run_vx.local.lua +++ b/modulefiles/tasks/derecho/run_vx.local.lua @@ -2,8 +2,8 @@ Compiler-specific modules are used for met and metplus libraries --]] -local met_ver = (os.getenv("met_ver") or "11.1.0") -local metplus_ver = (os.getenv("metplus_ver") or "5.1.0") +local met_ver = (os.getenv("met_ver") or "12.0.1") +local metplus_ver = (os.getenv("metplus_ver") or "6.0.0") if (mode() == "load") then load(pathJoin("met", met_ver)) load(pathJoin("metplus",metplus_ver)) diff --git a/modulefiles/tasks/gaea/run_vx.local.lua b/modulefiles/tasks/gaea/run_vx.local.lua index 57cdfbb1cc..b26d9364ed 100644 --- a/modulefiles/tasks/gaea/run_vx.local.lua +++ b/modulefiles/tasks/gaea/run_vx.local.lua @@ -1,8 +1,8 @@ --[[ Compiler-specific modules are used for met and metplus libraries --]] -local met_ver = (os.getenv("met_ver") or "11.1.0") -local metplus_ver = (os.getenv("metplus_ver") or "5.1.0") +local met_ver = (os.getenv("met_ver") or "12.0.1") +local metplus_ver = (os.getenv("metplus_ver") or "6.0.0") if (mode() == "load") then load(pathJoin("met", met_ver)) load(pathJoin("metplus",metplus_ver)) diff --git a/modulefiles/tasks/hercules/run_vx.local.lua b/modulefiles/tasks/hercules/run_vx.local.lua index 737fc4f7cc..fb50f6e79e 100644 --- a/modulefiles/tasks/hercules/run_vx.local.lua +++ b/modulefiles/tasks/hercules/run_vx.local.lua @@ -1,8 +1,8 @@ --[[ Compiler-specific modules are used for met and metplus libraries --]] -local met_ver = (os.getenv("met_ver") or "11.1.0") -local metplus_ver = (os.getenv("metplus_ver") or "5.1.0") +local met_ver = (os.getenv("met_ver") or "12.0.1") +local metplus_ver = (os.getenv("metplus_ver") or "6.0.0") if (mode() == "load") then load(pathJoin("met", met_ver)) load(pathJoin("metplus",metplus_ver)) diff --git a/modulefiles/tasks/jet/run_vx.local.lua b/modulefiles/tasks/jet/run_vx.local.lua index 737fc4f7cc..fb50f6e79e 100644 --- a/modulefiles/tasks/jet/run_vx.local.lua +++ b/modulefiles/tasks/jet/run_vx.local.lua @@ -1,8 +1,8 @@ --[[ Compiler-specific modules are used for met and metplus libraries --]] -local met_ver = (os.getenv("met_ver") or "11.1.0") -local metplus_ver = (os.getenv("metplus_ver") or "5.1.0") +local met_ver = (os.getenv("met_ver") or "12.0.1") +local metplus_ver = (os.getenv("metplus_ver") or "6.0.0") if (mode() == "load") then load(pathJoin("met", met_ver)) load(pathJoin("metplus",metplus_ver)) diff --git a/modulefiles/tasks/noaacloud/run_vx.local.lua b/modulefiles/tasks/noaacloud/run_vx.local.lua index 737fc4f7cc..fb50f6e79e 100644 --- a/modulefiles/tasks/noaacloud/run_vx.local.lua +++ b/modulefiles/tasks/noaacloud/run_vx.local.lua @@ -1,8 +1,8 @@ --[[ Compiler-specific modules are used for met and metplus libraries --]] -local met_ver = (os.getenv("met_ver") or "11.1.0") -local metplus_ver = (os.getenv("metplus_ver") or "5.1.0") +local met_ver = (os.getenv("met_ver") or "12.0.1") +local metplus_ver = (os.getenv("metplus_ver") or "6.0.0") if (mode() == "load") then load(pathJoin("met", met_ver)) load(pathJoin("metplus",metplus_ver)) diff --git a/modulefiles/tasks/orion/run_vx.local.lua b/modulefiles/tasks/orion/run_vx.local.lua index 737fc4f7cc..fb50f6e79e 100644 --- a/modulefiles/tasks/orion/run_vx.local.lua +++ b/modulefiles/tasks/orion/run_vx.local.lua @@ -1,8 +1,8 @@ --[[ Compiler-specific modules are used for met and metplus libraries --]] -local met_ver = (os.getenv("met_ver") or "11.1.0") -local metplus_ver = (os.getenv("metplus_ver") or "5.1.0") +local met_ver = (os.getenv("met_ver") or "12.0.1") +local metplus_ver = (os.getenv("metplus_ver") or "6.0.0") if (mode() == "load") then load(pathJoin("met", met_ver)) load(pathJoin("metplus",metplus_ver)) From cf412be7944ec38443a7c13cbc13a5daf73f3c3e Mon Sep 17 00:00:00 2001 From: "Michael J. Kavulich, Jr." Date: Tue, 18 Feb 2025 23:05:56 +0000 Subject: [PATCH 215/260] Fix name of PM25 obs for HPSS data --- parm/metplus/vx_configs/vx_config_det.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/metplus/vx_configs/vx_config_det.yaml b/parm/metplus/vx_configs/vx_config_det.yaml index 5ea36b2da0..7f83a65192 100644 --- a/parm/metplus/vx_configs/vx_config_det.yaml +++ b/parm/metplus/vx_configs/vx_config_det.yaml @@ -211,7 +211,7 @@ AOD: AOTK%%AOD: L0%%L550: [] PM25: - PM25%%PM2.5: + PM25%%PM25: Z8%%A1: [] PM10: PM10%%PM10: From 5288774174b3e8219dcc023ff08b1d2d9b63a006 Mon Sep 17 00:00:00 2001 From: "Michael J. Kavulich, Jr." Date: Tue, 18 Feb 2025 23:06:47 +0000 Subject: [PATCH 216/260] Fix "available hours" variable for generalized pointstat task --- parm/wflow/verify_det.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index acbd1d64a2..d276fd81a9 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -141,6 +141,7 @@ metatask_PointStat: FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["SFC", "UPA", "AOD", "PM25", "PM10"] %}{{ "%s " % var }}{% endif %}{% endfor %}' METAOBTYPE: '{% for var in verification.VX_FIELD_GROUPS %}{% if var =="SFC" or var =="UPA" %}NDAS {% elif var =="AOD" %}AERONET {% elif var =="PM25" or var =="PM10" %}AIRNOW {% endif %}{% endfor %}' METAOBS_DIR: '{% for var in verification.VX_FIELD_GROUPS %}{% if var =="SFC" or var =="UPA" %}&NDAS_OBS_DIR; {% elif var =="AOD" %}&AERONET_OBS_DIR; {% elif var =="PM25" or var =="PM10" %}&AIRNOW_OBS_DIR; {% endif %}{% endfor %}' + METAOB_AVAIL_HRS: '{% for var in verification.VX_FIELD_GROUPS %}{% if var =="SFC" or var =="UPA" %} {{- verification.NDAS_OBS_AVAIL_INTVL_HRS }} {% elif var =="AOD" %}{{- verification.AERONET_OBS_AVAIL_INTVL_HRS }} {% elif var =="PM25" or var =="PM10" %}{{- verification.AIRNOW_OBS_AVAIL_INTVL_HRS }} {% endif %}{% endfor %}' metatask_PointStat_#FIELD_GROUP#_all_mems: var: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' @@ -155,7 +156,7 @@ metatask_PointStat: OBTYPE: '#METAOBTYPE#' ENSMEM_INDX: "#mem#" SLASH_ENSMEM_SUBDIR_OR_NULL: '{% if global.DO_ENSEMBLE %}{{ "/mem#mem#" }}{% endif %}' - OBS_AVAIL_INTVL_HRS: '{{- verification.NDAS_OBS_AVAIL_INTVL_HRS }}' + OBS_AVAIL_INTVL_HRS: '#METAOB_AVAIL_HRS#' FCST_LEVEL: 'all' FCST_THRESH: 'all' walltime: 01:00:00 @@ -172,11 +173,11 @@ metatask_PointStat: {%- for n in range(0, num_obs_days) %} {%- set yyyymmdd = workflow.OBS_DAYS_ALL_CYCLES_INST[n] %} {%- if n == 0 %} - {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/NDAS_nc_obs_" ~ yyyymmdd ~ "_ready.txt" }} + {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/#METAOBTYPE#_nc_obs_" ~ yyyymmdd ~ "_ready.txt" }} {%- else %} {{- indent ~ "\n" }} {{- indent ~ "\n" }} - {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/NDAS_nc_obs_" ~ yyyymmdd ~ "_ready.txt" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/#METAOBTYPE#_nc_obs_" ~ yyyymmdd ~ "_ready.txt" }} {%- endif %} {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} {%- endfor %}' From 3526e79bb23d6ba3d57e74c2ef0492905a55f9d5 Mon Sep 17 00:00:00 2001 From: "Michael J. Kavulich, Jr." Date: Wed, 19 Feb 2025 05:06:48 +0000 Subject: [PATCH 217/260] Update smoke/dust test for latest data --- ...config.MET_verification_smoke_only_vx.yaml | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml b/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml index d27a36d2fe..3df8855ac8 100644 --- a/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml @@ -1,13 +1,13 @@ metadata: description: |- This test checks the capability of the workflow to run verification tasks - for smoke data (AERONET AOD and AIRNOW PM). + for smoke data (AERONET AOD and AIRNOW PM). Also does SFC and NOHRSC snow verification user: RUN_ENVIR: community workflow: CCPP_PHYS_SUITE: FV3_RAP - DATE_FIRST_CYCL: '2023121700' - DATE_LAST_CYCL: '2023121700' + DATE_FIRST_CYCL: '2024011100' + DATE_LAST_CYCL: '2024011100' FCST_LEN_HRS: 24 PREEXISTING_DIR_METHOD: rename rocoto: @@ -18,16 +18,19 @@ rocoto: walltime: 01:00:00 verification: VX_FCST_MODEL_NAME: RRFS_smoke_test - VX_FIELD_GROUPS: [ "APCP", "REFC", "RETOP", "SFC", "UPA", "AOD", "PM25"] -# VX_FIELD_GROUPS: [ "AOD", "PM25", "PM10" ] - VX_FCST_INPUT_BASEDIR: /scratch2/BMC/fv3lam/kavulich/smoke_verif/smoke_vx/fcst - FCST_SUBDIR_TEMPLATE: '{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}' - FCST_FN_TEMPLATE: 'rrfs.t{init?fmt=%H?shift=-${time_lag}}z.prslev.f{lead?fmt=%HHH?shift=${time_lag}}.conus_3km.grib2' + VX_FIELD_GROUPS: [ "SFC", "AOD", "PM25", "PM10"] + VX_FCST_INPUT_BASEDIR: /lfs6/BMC/gsd-fv3-dev/Michael.Kavulich/smoke_vx/WE2E_test_data/RRFS + FCST_SUBDIR_TEMPLATE: '' + FCST_FN_TEMPLATE: 'rrfs.t00z.prslev.f{lead?fmt=%HHH}.conus.grib2' + # Because these files are retrieved from HPSS and not AWS, they have slightly different filenames OBS_AIRNOW_FN_TEMPLATES: [ 'PM', '{valid?fmt=%Y%m%d}/HourlyAQObs_{valid?fmt=%Y%m%d%H}.dat' ] AIRNOW_INPUT_FORMAT: airnowhourlyaqobs - CCPA_OBS_DIR: /scratch2/BMC/fv3lam/kavulich/smoke_verif/rebase_june/staged_case/CCPA_obs - MRMS_OBS_DIR: /scratch2/BMC/fv3lam/kavulich/smoke_verif/rebase_june/staged_case/MRMS_obs - NDAS_OBS_DIR: /scratch2/BMC/fv3lam/kavulich/smoke_verif/rebase_june/staged_case/NDAS_obs - AERONET_OBS_DIR: /scratch2/BMC/fv3lam/kavulich/smoke_verif/rebase_june/staged_case/AERONET_obs/ - AIRNOW_OBS_DIR: /scratch2/BMC/fv3lam/kavulich/smoke_verif/rebase_june/staged_case/AIRNOW_obs/ +# CCPA_OBS_DIR: /lfs6/BMC/gsd-fv3-dev/Michael.Kavulich/smoke_vx/staged_case/CCPA_obs +# MRMS_OBS_DIR: /lfs6/BMC/gsd-fv3-dev/Michael.Kavulich/smoke_vx/staged_case/MRMS_obs +# NDAS_OBS_DIR: '{{ workflow.EXPTDIR }}/ndas' +# AERONET_OBS_DIR: '{{ workflow.EXPTDIR }}/aeronet' +# AIRNOW_OBS_DIR: '{{ workflow.EXPTDIR }}/airnow' + NDAS_OBS_DIR: /lfs6/BMC/gsd-fv3-dev/Michael.Kavulich/smoke_vx/WE2E_test_data/ndas + AERONET_OBS_DIR: /lfs6/BMC/gsd-fv3-dev/Michael.Kavulich/smoke_vx/WE2E_test_data/aeronet + AIRNOW_OBS_DIR: /lfs6/BMC/gsd-fv3-dev/Michael.Kavulich/smoke_vx/WE2E_test_data/airnow From 6d98d7e21916b4e3537c99fc98be9f4cbbb6aff5 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Wed, 19 Feb 2025 08:34:57 -0700 Subject: [PATCH 218/260] Add docstring for new function, pylint suggestions --- ush/generate_FV3LAM_wflow.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/ush/generate_FV3LAM_wflow.py b/ush/generate_FV3LAM_wflow.py index 05ad31b1dc..ac2acde904 100755 --- a/ush/generate_FV3LAM_wflow.py +++ b/ush/generate_FV3LAM_wflow.py @@ -342,6 +342,7 @@ def generate_FV3LAM_wflow( # in the flattened expt_config dictionary # TODO: Reference all these variables in their respective # dictionaries, instead. + # pylint: disable=undefined-variable import_vars(dictionary=flatten_dict(expt_config)) export_vars(source_dict=flatten_dict(expt_config)) settings = {} @@ -437,7 +438,6 @@ def generate_FV3LAM_wflow( settings["nam_sfcperts"] = nam_sfcperts_dict - settings_str = cfg_to_yaml_str(settings) # #----------------------------------------------------------------------- # @@ -578,6 +578,15 @@ def generate_FV3LAM_wflow( return EXPTDIR def setup_fv3_namelist(expt_config,debug): + """ + Updates parameters specific to the FV3ATM namelist for the run_fcst step. + + Args: + expt_dict (dict): The full experiment configuration dictionary + debug (bool): Enable extra output for debugging + Returns: + EXPTDIR (str) : The full path of the directory where this experiment has been generated + """ # From here on out, going back to setting variables for everything # in the flattened expt_config dictionary @@ -586,6 +595,8 @@ def setup_fv3_namelist(expt_config,debug): import_vars(dictionary=flatten_dict(expt_config)) export_vars(source_dict=flatten_dict(expt_config)) + # pylint: disable=undefined-variable + log_info( f""" Setting parameters in weather model's namelist file (FV3_NML_FP): @@ -600,7 +611,7 @@ def setup_fv3_namelist(expt_config,debug): kice = None if SDF_USES_RUC_LSM: kice = 9 - # + # # Set lsoil, which is the number of input soil levels provided in the # chgres_cube output NetCDF file. This is the same as the parameter # nsoill_out in the namelist file for chgres_cube. [On the other hand, @@ -610,7 +621,7 @@ def setup_fv3_namelist(expt_config,debug): # lsoil as the one used to set nsoill_out in exregional_make_ics.sh. # See that script for details. # - # NOTE: + # NOTE: # May want to remove lsoil from FV3.input.yml (and maybe input.nml.FV3). # Also, may want to set lsm here as well depending on SDF_USES_RUC_LSM. # @@ -875,7 +886,6 @@ def setup_logging(logfile: str = "log.generate_FV3LAM_wflow", debug: bool = Fals ) sys.exit(1) - # pylint: disable=undefined-variable # Note workflow generation completion log_info( f""" @@ -883,7 +893,7 @@ def setup_logging(logfile: str = "log.generate_FV3LAM_wflow", debug: bool = Fals Experiment generation completed. The experiment directory is: - EXPTDIR='{EXPTDIR}' + EXPTDIR='{expt_dir}' ======================================================================== """ From 9d566feb47e21e8ce2237d078f10f17ff5033f35 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Wed, 19 Feb 2025 11:32:11 -0700 Subject: [PATCH 219/260] First round of documentation updates for smoke and dust verification --- .../BuildingRunningTesting/RunSRW.rst | 54 ++++++++++++++++-- .../CustomizingTheWorkflow/ConfigWorkflow.rst | 57 ++++++++++++------- doc/UsersGuide/Reference/Glossary.rst | 9 ++- ush/get_obs.py | 2 +- 4 files changed, 96 insertions(+), 26 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index b66b399652..6100bd22a5 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -628,7 +628,9 @@ a staged forecast (e.g. from another forecasting system) need to add additional machine file (``ush/machine/.yaml``) or their ``config.yaml`` file. Other users may skip to the next step (:numref:`Section %s: Generate the SRW App Workflow `). -To use METplus verification, MET and METplus modules need to be installed on the system. +To use METplus verification, MET and METplus modules need to be installed on the system. In order +to ensure all capabilities work as expected, users should use at a minimum the latest release of +these tools as of the latest SRW release: MET 12.0.1 and METplus 6.0.0. .. note:: If users update their METplus installation, they must also update the module load statements in ``ufs-srweather-app/modulefiles/tasks//run_vx.local`` to correspond to their system's updated installation: @@ -794,6 +796,16 @@ fields they include are given in :numref:`Table %s `. * - UPA - NDAS - Various upper-air fields (e.g. at 800 mb, 500 mb, etc) + * - AOD + - AERONET + - Aerosol Optical Depth + * - PM25 + - AIRNOW + - Volumetric mass of particulate matter diameter 2.5 microns or less + * - PM10 + - AIRNOW + - Volumetric mass of particulate matter diameter 10 microns or less + The ``VX_FIELD_GROUPS`` list in the ``verification:`` section of ``config.yaml`` specifies the VX field groups for which to run verification. In order to avoid unwanted computation, the Rocoto XML will include @@ -818,7 +830,7 @@ summer period for which ``ASNOW`` is not relevant. Staging Observation Files `````````````````````````````````` The taskgroup in ``verify_pre.yaml`` defines a set of workflow tasks named ``get_obs_*``, where the ``*`` -represents any one of the supported obs types: :term:`CCPA`, :term:`NOHRSC`, :term:`MRMS`, and :term:`NDAS`. These ``get_obs_*`` tasks +represents any one of the supported obs types: :term:`CCPA`, :term:`NOHRSC`, :term:`MRMS`, :term:`NDAS`, :term:`AERONET`, and :term:`AIRNOW`. These ``get_obs_*`` tasks will first check on disk for the existence of the obs files required for VX using the locations specified by the variables ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1,3,...]`` in the ``verification:`` section of ``config.yaml``. The ``*_OBS_DIR`` are the base directories in which the obs files are or should be @@ -844,11 +856,11 @@ place them in the locations specified by ``{*_OBS_DIR}/{OBS_*_FN_TEMPLATES[1,3,. that attempt is successful, the workflow will move on to subsequent tasks. Thus: * Users who have the obs files already available (staged) on their system only need to set ``*_OBS_DIR`` - and ``OBS_*_FN_TEMPLATES[1,3,...]`` in ``config.yaml`` to match those staging locations and file names. + and ``OBS_*_FN_TEMPLATES[1,3,...]`` in ``config.yaml`` to match those staging locations and file names. * Users who do not have the obs files available on their systems and do not have access to NOAA HPSS - need to download :term:`CCPA`, :term:`NOHRSC`, :term:`MRMS`, and/or :term:`NDAS` files manually - from collections of publicly available data. + need to download :term:`CCPA`, :term:`NOHRSC`, :term:`MRMS`, :term:`NDAS`, :term:`AERONET`, and/or + :term:`AIRNOW` files manually from collections of publicly available data. Then, as above, they must set ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1,3,...]`` to match those staging locations and file names. @@ -870,12 +882,22 @@ and ``OBS_*_FN_TEMPLATES`` might be set as follows: NOHRSC_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/nohrsc MRMS_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/mrms NDAS_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/ndas + AERONET_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/aeronet + AIRNOW_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/airnow OBS_CCPA_FN_TEMPLATES: [ 'APCP', '{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2' ] OBS_NOHRSC_FN_TEMPLATES: [ 'ASNOW', 'sfav2_CONUS_6h_{valid?fmt=%Y%m%d%H}_grid184.grb2' ] OBS_MRMS_FN_TEMPLATES: [ 'REFC', '{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2', 'RETOP', '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' ] OBS_NDAS_FN_TEMPLATES: [ 'SFC_UPA', 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' ] + OBS_AERONET_FN_TEMPLATES: [ 'AOD', '{valid?fmt=%Y%m%d}/{valid?fmt=%Y%m%d}.lev15' ] + OBS_AIRNOW_FN_TEMPLATES: [ 'PM', '{valid?fmt=%Y%m%d}/HourlyAQObs_{valid?fmt=%Y%m%d%H}.dat' ] + +.. note:: + For AIRNOW obs retrieved from AWS (see ``parm/data_locations.yml``, the default value should be + replaced with: + + ``OBS_AIRNOW_FN_TEMPLATES: [ 'PM', '{valid?fmt=%Y%m%d}/HourlyData_{valid?fmt=%Y%m%d%H}.dat' ]`` Now further consider the CCPA obs type. If one of the days encompassed by the forecast(s) is 20240429, then the ``get_obs_ccpa`` task associated with this day will check for the existence of the set of obs @@ -1102,9 +1124,26 @@ interval (for cumulative fields such as accumulated precipitation), and the name from a data store (e.g. NOAA :term:`HPSS`) and place them in those locations. This task is included in the workflow only if ``'SFC'`` and/or ``'UPA'`` are included in ``VX_FIELD_GROUPS``. + * - :bolditalic:`task_get_obs_aeronet` (``verify_pre.yaml``) + - Checks for existence of staged :term:`AERONET` obs files at locations specified by ``AERONET_OBS_DIR`` + and ``OBS_AERONET_FN_TEMPLATES``. If any files do not exist, it attempts to retrieve all the files + from a data store (e.g. NOAA :term:`HPSS`) and place them in those locations. This task is included + in the workflow only if ``'AOD'``is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`task_get_obs_airnow` (``verify_pre.yaml``) + - Checks for existence of staged :term:`AIRNOW` obs files at locations specified by ``AIRNOW_OBS_DIR`` + and ``OBS_AIRNOW_FN_TEMPLATES``. If any files do not exist, it attempts to retrieve all the files + from a data store (e.g. NOAA :term:`HPSS`) and place them in those locations. This task is included + in the workflow only if ``'PM25'`` and/or ``'PM10'`` are included in ``VX_FIELD_GROUPS``. + * - :bolditalic:`task_run_MET_Pb2nc_obs_NDAS` (``verify_pre.yaml``) - Converts NDAS obs prepbufr files to NetCDF format. + * - :bolditalic:`metatask_ASCII2nc_obs` (``verify_pre.yaml``) + - Set of tasks that convert observations in ASCII text format to NetCDF files that can be processed by + :term:`METplus`; these observation types include AERONET and AIRNOW. This metatask is included in the + workflow only if ``'AOD'``, ``'PM25'``, or ``'PM10'`` are included in ``VX_FIELD_GROUPS``. + * - :bolditalic:`metatask_PcpCombine_APCP_all_accums_obs_CCPA` (``verify_pre.yaml``) - Set of tasks that generate NetCDF files containing observed APCP for the accumulation intervals specified in ``VX_APCP_ACCUMS_HRS``. Files for accumulation intervals larger than the one @@ -1127,6 +1166,11 @@ interval (for cumulative fields such as accumulated precipitation), and the name the task is being run. This metatask is included in the workflow only if ``'ASNOW'`` is included in ``VX_FIELD_GROUPS``. + * - :bolditalic:`metatask_PcpCombine_fcst_PM_all_mems` (``verify_pre.yaml``) + - Set of tasks that convert the raw forecast output of particulate matter into the appropriate bins for + PM 2.5 and PM10. This metatask is included in the workflow only if ``'PM25'`` or ``'PM10'`` is included in + ``VX_FIELD_GROUPS``. + * - :bolditalic:`metatask_check_post_output_all_mems` (``verify_pre.yaml``) - Set of tasks that ensure that the post-processed forecast files required for verification exist in the locations specified by ``VX_FCST_INPUT_BASEDIR``, ``FCST_SUBDIR_TEMPLATE``, and ``FCST_FN_TEMPLATE``. diff --git a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst index 50d343f57e..b91892813e 100644 --- a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst +++ b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst @@ -199,8 +199,8 @@ These directories are used only by the ``run_WE2E_tests.py`` script, so they are ``TEST_ALT_EXTRN_MDL_SYSBASEDIR_ICS``, ``TEST_ALT_EXTRN_MDL_SYSBASEDIR_LBCS``: (Default: "") These parameters are used by the testing script to test the mechanism that allows users to point to a data stream on disk. They set up a sandbox location that mimics the stream in a more controlled way and test the ability to access :term:`ICS` or :term:`LBCS`, respectively. -``TEST_CCPA_OBS_DIR``, ``TEST_MRMS_OBS_DIR``, ``TEST_NDAS_OBS_DIR``: (Default: "") -These parameters are used by the testing script to test the mechanism that allows user to point to data streams on disk for observation data for verification tasks. They test the ability for users to set ``CCPA_OBS_DIR``, ``MRMS_OBS_DIR``, and ``NDAS_OBS_DIR`` respectively. +``TEST_CCPA_OBS_DIR``, ``TEST_MRMS_OBS_DIR``, ``TEST_NDAS_OBS_DIR``, ``TEST_NOHRSC_OBS_DIR``, ``TEST_AERONET_OBS_DIR``, ``TEST_AIRNOW_OBS_DIR``: (Default: "") +These parameters are used by the testing script to test the mechanism that allows user to point to data streams on disk for observation data for verification tasks. They test the ability for users to set ``CCPA_OBS_DIR``, ``MRMS_OBS_DIR``, ``NDAS_OBS_DIR``, etc. ``TEST_VX_FCST_INPUT_BASEDIR``: (Default: "") The path to user-staged forecast files for WE2E testing of verificaton using user-staged forecast files in a known location on a given platform. @@ -619,7 +619,7 @@ Pre-Existing Directory Parameter * **"delete":** The preexisting directory is deleted and a new directory (having the same name as the original preexisting directory) is created. - * **"rename":** The preexisting directory is renamed and a new directory (having the same name as the original pre-existing directory) is created. The new name of the preexisting directory consists of its original name and the suffix "_old###", where ``###`` is a 3-digit integer chosen to make the new name unique. + * **"rename":** The preexisting directory is renamed and a new directory (having the same name as the original pre-existing directory) is created. The new name of the preexisting directory consists of its original name and the suffix "_old_YYYYMMDD_HHmmss", where ``YYYYMMDD_HHmmss`` is the full date and time of the rename * **"reuse":** This method will keep the preexisting directory intact. However, when the preexisting directory is ``$EXPDIR``, this method will save all old files to a subdirectory ``oldxxx/`` and then populate new files into the ``$EXPDIR`` directory. This is useful to keep ongoing runs uninterrupted; rocotoco ``*db`` files and previous cycles will stay and hence there is no need to manually copy or move ``*db`` files and previous cycles back, and there is no need to manually restart related rocoto tasks failed during the workflow generation process. This method may be best suited for incremental system reuses. @@ -1662,7 +1662,7 @@ METplus-Specific Parameters ----------------------------------- ``METPLUS_VERBOSITY_LEVEL``: (Default: ``2``) - Logging verbosity level used by METplus verification tools. Valid values: 0 to 5, with 0 quiet and 5 loudest. + Logging verbosity level used by METplus verification tools. Valid values: 0 to 9, with 0 having the fewest log messages and 9 having the most. Levels 5 and above can result in very large log files and slower tool execution. VX Parameters for Observations @@ -1677,11 +1677,13 @@ VX Parameters for Observations * NOHRSC (National Operational Hydrologic Remote Sensing Center) * MRMS (Multi-Radar Multi-Sensor) * NDAS (NAM Data Assimilation System) + * AERONET (Aerosol Robotic Network) + * AIRNOW (AirNow air quality reports) The script ``ush/get_obs.py`` contains further details on the files and directory structure of each obs type. -``[CCPA|NOHRSC|MRMS|NDAS]_OBS_AVAIL_INTVL_HRS``: (Defaults: [1|6|1|1]) +``[CCPA|NOHRSC|MRMS|NDAS|AERONET|AIRNOW]_OBS_AVAIL_INTVL_HRS``: (Defaults: [1|6|1|1|24|1]) Time interval (in hours) at which the various types of obs are available on NOAA's HPSS. @@ -1690,8 +1692,8 @@ VX Parameters for Observations is the shortest output interval for forecasts, i.e. the forecasts cannot (yet) support sub-hourly output. -``[CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/[ccpa|nohrsc|mrms|ndas]"``) - Base directory in which CCPA, NOHRSC, MRMS, or NDAS obs files needed by +``[CCPA|NOHRSC|MRMS|NDAS|AERONET|AIRNOW]_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/[ccpa|nohrsc|mrms|ndas|aeronet|airnow]"``) + Base directory in which CCPA, NOHRSC, MRMS, NDAS, AERONET, or AIRNOW obs files needed by the verification tasks are located. If the files do not exist, they will be retrieved and placed under this directory. Note that: @@ -1704,7 +1706,7 @@ VX Parameters for Observations that need to be corrected during obs retrieval. This is described in more detail in the script ``ush/get_obs.py``. -``OBS_[CCPA|NOHRSC|MRMS|NDAS]_FN_TEMPLATES``: +``OBS_[CCPA|NOHRSC|MRMS|NDAS|AERONET|AIRNOW]_FN_TEMPLATES``: **Defaults:** ``OBS_CCPA_FN_TEMPLATES``: @@ -1732,6 +1734,16 @@ VX Parameters for Observations [ 'SFCandUPA', 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' ] + ``OBS_AERONET_FN_TEMPLATES``: + .. code-block:: console + + [ 'AOD', '{valid?fmt=%Y%m%d}/{valid?fmt=%Y%m%d}.lev15' ] + + ``OBS_AIRNOW_FN_TEMPLATES``: + .. code-block:: console + + [ 'PM', '{valid?fmt=%Y%m%d}/HourlyData_{valid?fmt=%Y%m%d%H}.dat' ] + File name templates for various obs types. These are meant to be used in METplus configuration files and thus contain METplus time formatting strings. Each of these variables is a python list containing pairs of @@ -1840,7 +1852,7 @@ VX Parameters for Observations {{- "ccpa.t{valid?fmt=%H}z." ~ obs_avail_intvl_hrs ~ "h.hrap.conus.gb2_a${ACCUM_HH}h.nc" }} METplus template for the names of the NetCDF files generated by the - worfklow verification tasks that call METplus's PcpCombine tool on + workflow verification tasks that call METplus's PcpCombine tool on CCPA observations. These files will contain observed accumulated precipitation in NetCDF format for various accumulation intervals. @@ -1853,26 +1865,33 @@ VX Parameters for Observations {{- "sfav2_CONUS_" ~ obs_avail_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2_a${ACCUM_HH}h.nc" }} METplus template for the names of the NetCDF files generated by the - worfklow verification tasks that call METplus's PcpCombine tool on + workflow verification tasks that call METplus's PcpCombine tool on NOHRSC observations. These files will contain observed accumulated - snowfall for various accumulaton intervals. + snowfall for various accumulation intervals. + +``OBS_AERONET_FN_TEMPLATE_ASCII2NC_OUTPUT``: (Default: ``'hourly_aeronet_obs_{valid?fmt=%Y%m%d}00.nc'``) + + METplus template for the names of the NetCDF files generated by the + workflow verification tasks that call METplus's ASCII2NC tool on + AERONET observations. + +``OBS_AIRNOW_FN_TEMPLATE_ASCII2NC_OUTPUT``: (Default: ``'hourly_airnow_obs_{valid?fmt=%Y%m%d}00.nc'``) + + METplus template for the names of the NetCDF files generated by the + workflow verification tasks that call METplus's ASCII2NC tool on + AIRNOW observations. ``OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT``: (Default: ``'${OBS_NDAS_FN_TEMPLATES[1]}.nc'``) METplus template for the names of the NetCDF files generated by the - worfklow verification tasks that call METplus's Pb2nc tool on the + workflow verification tasks that call METplus's Pb2nc tool on the prepbufr files in NDAS observations. These files will contain the observed surface (SFC) and upper-air (UPA) fields in NetCDF format (instead of NDAS's native prepbufr format). -``NUM_MISSING_OBS_FILES_MAX``: (Default: 2) +``NUM_MISSING_OBS_FILES_MAX``: (Default: 0) For verification tasks that need observational data, this specifies the maximum number of observation files that may be missing. If more than this number are missing, the verification task will error out. - This is a crude way of checking that there are enough obs to conduct - verification (crude because this number should probably depend on the - field being verified, the time interval between observations, the - length of the forecast, etc; an alternative may be to specify the - maximum allowed fraction of obs files that can be missing). VX Parameters for Forecasts @@ -1941,7 +1960,7 @@ VX Parameters for Forecasts {{- ".prslev.f{lead?fmt=%HHH}.${POST_OUTPUT_DOMAIN_NAME}_${VAR}_a${ACCUM_HH}h.nc" }} METplus template for the names of the NetCDF files generated by the - worfklow verification tasks that call METplus's PcpCombine tool on + workflow verification tasks that call METplus's PcpCombine tool on forecast output. These files will contain forecast accumulated precipitation in NetCDF format for various accumulation intervals. diff --git a/doc/UsersGuide/Reference/Glossary.rst b/doc/UsersGuide/Reference/Glossary.rst index 5b60c5b40d..9db8528383 100644 --- a/doc/UsersGuide/Reference/Glossary.rst +++ b/doc/UsersGuide/Reference/Glossary.rst @@ -12,6 +12,12 @@ Glossary advection According to the American Meteorological Society (AMS) definition, `advection `_ is "The process of transport of an atmospheric property solely by the mass motion (velocity field) of the atmosphere." In common parlance, advection is movement of atmospheric substances that are carried around by the wind. + AERONET + The "`AErosol RObotic NETwork `_": A worldwide ground-based remote sensing aerosol networks established by NASA and PHOTONS. The SRW verification tasks can use "Level 1.5" (cloud-screened and quality-controlled) aerosol optical depth observations. + + AIRNOW + A North American ground-level air quality measurement network. The SRW verification tasks can use PM2.5 and PM10 observations. More information available at https://www.airnow.gov/ + AQM The `Air Quality Model `__ (AQM) is a UFS Application that dynamically couples the Community Multiscale Air Quality (:term:`CMAQ`) model with the UFS Weather Model through the :term:`NUOPC` Layer to simulate temporal and spatial variations of atmospheric compositions (e.g., ozone and aerosol compositions). The CMAQ, treated as a column chemistry model, updates concentrations of chemical species (e.g., ozone and aerosol compositions) at each integration time step. The transport terms (e.g., :term:`advection` and diffusion) of all chemical species are handled by the UFS Weather Model as :term:`tracers`. @@ -159,7 +165,8 @@ Glossary The `Modern-Era Retrospective analysis for Research and Applications, Version 2 `__ provides satellite observation data back to 1980. According to NASA, "It was introduced to replace the original MERRA dataset because of the advances made in the assimilation system that enable assimilation of modern hyperspectral radiance and microwave observations, along with GPS-Radio Occultation datasets. It also uses NASA's ozone profile observations that began in late 2004. Additional advances in both the GEOS model and the GSI assimilation system are included in MERRA-2. Spatial resolution remains about the same (about 50 km in the latitudinal direction) as in MERRA." MET - The `Model Evaluation Tools `__ is a highly-configurable, state-of-the-art suite of verification tools developed at the :term:`DTC`. + METplus + The `Model Evaluation Tools `__ is a highly-configurable, state-of-the-art suite of verification tools developed at the :term:`DTC`. `METplus `_ is a suite of python wrappers providing low-level automation of the MET tools. MPI MPI stands for Message Passing Interface. An MPI is a standardized communication system used in parallel programming. It establishes portable and efficient syntax for the exchange of messages and data between multiple processors that are used by a single computer program. An MPI is required for high-performance computing (HPC) systems. diff --git a/ush/get_obs.py b/ush/get_obs.py index a8de1d7c1a..14193fbf57 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -321,7 +321,7 @@ def get_obs(config, obtype, yyyymmdd_task): containing a single text file that contains all of the day's observations. AIRNOW Air Quality Particulate Matter (PM25, PM10) observations: - ------------------------------------------------------------- + ---------------------------------------------------------------- For AIRNOW, the archive interval is 24 hours. There is one archive per day containing one text file per hour that contains all the observation for that hour. From 92de91bc86e724c9147fc1194916ae011ed7e229 Mon Sep 17 00:00:00 2001 From: "Michael J. Kavulich, Jr." Date: Wed, 19 Feb 2025 20:20:55 +0000 Subject: [PATCH 220/260] Hopefully last round of real changes! - Remove the convoluted logic of trying to automatically determine the ASCII2NC_INPUT_FORMAT for AIRNOW...instead just use documentation - Of course, I had to replace this with another not-so-great bit of logic for the different observation name type with airnow from AWS - Reduce FCST length in new test to 18 hours to make it quicker but still get daytime hours for AOD, remove unneeded variables - Better default values for AIRNOW and AERONET obs dirs, OBS_AIRNOW_FN_TEMPLATES, AIRNOW_INPUT_FORMAT - New variables to specify the data store (hpss or aws) for observations. Only tested with AIRNOW, but should work for others if given the right settings --- scripts/exregional_run_met_ascii2nc_obs.sh | 14 +-------- ...gional_run_met_gridstat_or_pointstat_vx.sh | 29 ++++++++---------- tests/WE2E/machine_suites/coverage.jet | 1 + ...config.MET_verification_smoke_only_vx.yaml | 10 +------ ush/config_defaults.yaml | 30 +++++++++++-------- ush/get_obs.py | 21 +++++++++---- 6 files changed, 48 insertions(+), 57 deletions(-) diff --git a/scripts/exregional_run_met_ascii2nc_obs.sh b/scripts/exregional_run_met_ascii2nc_obs.sh index c0771e425c..eba6ca6523 100755 --- a/scripts/exregional_run_met_ascii2nc_obs.sh +++ b/scripts/exregional_run_met_ascii2nc_obs.sh @@ -107,19 +107,7 @@ if [ "${OBTYPE}" = "AERONET" ]; then elif [ "${OBTYPE}" = "AIRNOW" ]; then OBS_INPUT_FN_TEMPLATE=${OBS_AIRNOW_FN_TEMPLATES[1]} OUTPUT_FN_TEMPLATE=${OBS_AIRNOW_FN_TEMPLATE_ASCII2NC_OUTPUT} - if [ -z "${AIRNOW_INPUT_FORMAT}" ]; then - if [[ "${OBS_AIRNOW_FN_TEMPLATES[1]}" == *"HourlyData"* ]]; then - ASCII2NC_INPUT_FORMAT=airnowhourly - elif [[ "${OBS_AIRNOW_FN_TEMPLATES[1]}" == *"HourlyAQObs"* ]]; then - ASCII2NC_INPUT_FORMAT=airnowhourlyaqobs - else - print_err_msg_exit "Could not automatically determine format of Airnow observations;\ -check your filenames (OBS_AIRNOW_FN_TEMPLATE=${OBS_AIRNOW_FN_TEMPLATE}) -or manually set variable AIRNOW_INPUT_FORMAT" - fi - else - ASCII2NC_INPUT_FORMAT=${AIRNOW_INPUT_FORMAT} - fi + ASCII2NC_INPUT_FORMAT=${AIRNOW_INPUT_FORMAT} else print_err_msg_exit "\nNo filename template set for OBTYPE \"${OBTYPE}\"!" fi diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index a72fa25292..d42d07356c 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -212,24 +212,12 @@ elif [ "${grid_or_point}" = "point" ]; then elif [ "${OBTYPE}" = "AIRNOW" ]; then # It's very annoying that the names for specifying Airnow format are slightly different # for ASCII2NC and Pointstat. This logic deals with that. - if [ -z "${AIRNOW_INPUT_FORMAT}" ]; then - if [[ "${OBS_AIRNOW_FN_TEMPLATES[1]}" == *"HourlyData"* ]]; then - FIELDNAME_IN_MET_FILEDIR_NAMES="AIRNOW_HOURLY" - elif [[ "${OBS_AIRNOW_FN_TEMPLATES[1]}" == *"HourlyAQObs"* ]]; then - FIELDNAME_IN_MET_FILEDIR_NAMES="AIRNOW_HOURLY_AQOBS" - else - print_err_msg_exit "Invalid AIRNOW_INPUT_FORMAT=${AIRNOW_INPUT_FORMAT}" - fi + if [[ "${AIRNOW_INPUT_FORMAT}" == "airnowhourly" ]]; then + FIELDNAME_IN_MET_FILEDIR_NAMES="AIRNOW_HOURLY" + elif [[ "${AIRNOW_INPUT_FORMAT}" == "airnowhourlyaqobs" ]]; then + FIELDNAME_IN_MET_FILEDIR_NAMES="AIRNOW_HOURLY_AQOBS" else - if [[ "${AIRNOW_INPUT_FORMAT}" == "airnowhourly" ]]; then - FIELDNAME_IN_MET_FILEDIR_NAMES="AIRNOW_HOURLY" - elif [[ "${AIRNOW_INPUT_FORMAT}" == "airnowhourlyaqobs" ]]; then - FIELDNAME_IN_MET_FILEDIR_NAMES="AIRNOW_HOURLY_AQOBS" - else - print_err_msg_exit "Could not automatically determine format of Airnow observations;\ -check your filenames (OBS_AIRNOW_FN_TEMPLATE=${OBS_AIRNOW_FN_TEMPLATE}) -or manually set variable AIRNOW_INPUT_FORMAT" - fi + print_err_msg_exit "Invalid AIRNOW_INPUT_FORMAT: ${AIRNOW_INPUT_FORMAT}" fi ACCUM_HH='01' OBS_INPUT_DIR="${vx_output_basedir}/metprd/Ascii2nc_obs" @@ -357,6 +345,7 @@ vx_config_dict=$(<"${vx_config_fp}") # included in the yaml-formatted variable "settings" below. vx_config_dict=$( printf "%s\n" "${vx_config_dict}" | sed 's/^/ /' ) # +# #----------------------------------------------------------------------- # # Generate the METplus configuration file from its jinja template. @@ -448,6 +437,12 @@ $settings" print_err_msg_exit "${message_txt}" fi fi +# Ugly hack to deal with different obs variable name (PM25 -->PM2.5) for +# data retrieved from AWS +if [[ "${AIRNOW_INPUT_FORMAT}" == "airnowhourly" ]]; then + sed -i -e 's/OBS_VAR1_NAME = PM25/OBS_VAR1_NAME = PM2.5/g' ${metplus_config_fp} +fi + # #----------------------------------------------------------------------- # diff --git a/tests/WE2E/machine_suites/coverage.jet b/tests/WE2E/machine_suites/coverage.jet index 8c79a0b700..23a3375e7a 100644 --- a/tests/WE2E/machine_suites/coverage.jet +++ b/tests/WE2E/machine_suites/coverage.jet @@ -9,3 +9,4 @@ get_from_HPSS_ics_RAP_lbcs_RAP grid_RRFS_AK_3km_ics_FV3GFS_lbcs_FV3GFS_suite_HRRR grid_RRFS_CONUS_13km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16_plot grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15p2 +MET_verification_smoke_only_vx diff --git a/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml b/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml index 3df8855ac8..9092962090 100644 --- a/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml @@ -8,7 +8,7 @@ workflow: CCPP_PHYS_SUITE: FV3_RAP DATE_FIRST_CYCL: '2024011100' DATE_LAST_CYCL: '2024011100' - FCST_LEN_HRS: 24 + FCST_LEN_HRS: 18 PREEXISTING_DIR_METHOD: rename rocoto: tasks: @@ -22,14 +22,6 @@ verification: VX_FCST_INPUT_BASEDIR: /lfs6/BMC/gsd-fv3-dev/Michael.Kavulich/smoke_vx/WE2E_test_data/RRFS FCST_SUBDIR_TEMPLATE: '' FCST_FN_TEMPLATE: 'rrfs.t00z.prslev.f{lead?fmt=%HHH}.conus.grib2' - # Because these files are retrieved from HPSS and not AWS, they have slightly different filenames - OBS_AIRNOW_FN_TEMPLATES: [ 'PM', '{valid?fmt=%Y%m%d}/HourlyAQObs_{valid?fmt=%Y%m%d%H}.dat' ] - AIRNOW_INPUT_FORMAT: airnowhourlyaqobs -# CCPA_OBS_DIR: /lfs6/BMC/gsd-fv3-dev/Michael.Kavulich/smoke_vx/staged_case/CCPA_obs -# MRMS_OBS_DIR: /lfs6/BMC/gsd-fv3-dev/Michael.Kavulich/smoke_vx/staged_case/MRMS_obs -# NDAS_OBS_DIR: '{{ workflow.EXPTDIR }}/ndas' -# AERONET_OBS_DIR: '{{ workflow.EXPTDIR }}/aeronet' -# AIRNOW_OBS_DIR: '{{ workflow.EXPTDIR }}/airnow' NDAS_OBS_DIR: /lfs6/BMC/gsd-fv3-dev/Michael.Kavulich/smoke_vx/WE2E_test_data/ndas AERONET_OBS_DIR: /lfs6/BMC/gsd-fv3-dev/Michael.Kavulich/smoke_vx/WE2E_test_data/aeronet AIRNOW_OBS_DIR: /lfs6/BMC/gsd-fv3-dev/Michael.Kavulich/smoke_vx/WE2E_test_data/airnow diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 089b66c073..2900e700e9 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2465,8 +2465,8 @@ verification: NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" - AERONET_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/aeronet/proc" - AIRNOW_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/airnow/proc" + AERONET_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/aeronet" + AIRNOW_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/airnow" # # OBS_[CCPA|NOHRSC|MRMS|NDAS]_FN_TEMPLATES: # File name templates for various obs types. These are meant to be used @@ -2567,10 +2567,10 @@ verification: 'RETOP', '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' ] OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' ] OBS_AERONET_FN_TEMPLATES: [ 'AOD', '{valid?fmt=%Y%m%d}/{valid?fmt=%Y%m%d}.lev15' ] - # NOTE: for files retrieved from HPSS (see AIRNOW_DATA_STORES variable), the + # NOTE: for files retrieved from AWS (see OBS_DATA_STORE variable), the # default value should be replaced with - # '{valid?fmt=%Y%m%d}/HourlyAQObs_{valid?fmt=%Y%m%d%H}.dat' - OBS_AIRNOW_FN_TEMPLATES: [ 'PM', '{valid?fmt=%Y%m%d}/HourlyData_{valid?fmt=%Y%m%d%H}.dat' ] + # '{valid?fmt=%Y%m%d}/HourlyData_{valid?fmt=%Y%m%d%H}.dat' + OBS_AIRNOW_FN_TEMPLATES: [ 'PM', '{valid?fmt=%Y%m%d}/HourlyAQObs_{valid?fmt=%Y%m%d%H}.dat' ] # # REMOVE_RAW_OBS_DIRS # Flag specifying whether to remove the "raw" observation directories @@ -2608,18 +2608,24 @@ verification: OBS_AIRNOW_FN_TEMPLATE_ASCII2NC_OUTPUT: 'hourly_airnow_obs_{valid?fmt=%Y%m%d%H}.nc' # # AIRNOW_INPUT_FORMAT: - # Observation format for ASCII Airnow observations. Valid options are AIRNOW_HOURLY_AQOBS and - # AIRNOW_HOURLY; for more information see the METplus users guide: + # Observation format for ASCII Airnow observations. Observations retrieved from HPSS are in + # "airnowhourlyaqobs" format, observations retrieved from AWS are generally in "airnowhourly" format. + # For more information see the METplus users guide: # https://met.readthedocs.io/en/latest/Users_Guide/reformat_point.html#ascii2nc-tool - # If not specified or set to a blank string, will attempt to determine its value based on the - # value of OBS_AIRNOW_FN_TEMPLATE + + AIRNOW_INPUT_FORMAT: "airnowhourlyaqobs" + # - # OBS_DATA_STORES: + # OBS_DATA_STORE*: # Location(s) to retrieve observation data from. Valid values are "aws" and/or "hpss", see # parm/data_locations.yaml for info on these data stores. # - AIRNOW_INPUT_FORMAT: "" - OBS_DATA_STORES: aws + OBS_DATA_STORE_CCPA: hpss + OBS_DATA_STORE_NOHRSC: hpss + OBS_DATA_STORE_MRMS: hpss + OBS_DATA_STORE_NDAS: hpss + OBS_DATA_STORE_AERONET: hpss + OBS_DATA_STORE_AIRNOW: hpss # # OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT: diff --git a/ush/get_obs.py b/ush/get_obs.py index 14193fbf57..f7d77d7739 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -463,8 +463,14 @@ def get_obs(config, obtype, yyyymmdd_task): arcv_intvl_hrs = 24 elif obtype == 'NDAS': arcv_intvl_hrs = 6 - elif obtype == 'AERONET' or obtype == 'AIRNOW': + elif obtype == 'AERONET': arcv_intvl_hrs = 24 + elif obtype == 'AIRNOW': + if vx_config[f'OBS_DATA_STORE_AIRNOW'] == 'hpss': + arcv_intvl_hrs = 24 + else: + arcv_intvl_hrs = 1 + arcv_intvl = dt.timedelta(hours=arcv_intvl_hrs) # Number of obs files within each archive. @@ -729,18 +735,19 @@ def get_obs(config, obtype, yyyymmdd_task): # obs-day dependent) and then call the retrieve_data.py script. os.chdir(basedir_raw) - # Pull obs from HPSS. This will get all the obs files in the current - # archive and place them in the raw archive directory. + # Pull obs from HPSS or AWS based on OBS_DATA_STORE* setting. + # # Note that for the specific case of NDAS obs, this will get all 7 obs # files in the current archive, although we will make use of only 6 of # these (we will not use the tm00 file). + parmdir = config['user']['PARMdir'] args = ['--debug', \ '--file_set', 'obs', \ '--config', os.path.join(parmdir, 'data_locations.yml'), \ '--cycle_date', yyyymmddhh_arcv_str, \ - '--data_stores', 'hpss', \ + '--data_stores', vx_config[f'OBS_DATA_STORE_{obtype}'], \ '--data_type', obtype + '_obs', \ '--output_path', arcv_dir_raw, \ '--summary_file', 'retrieve_data.log'] @@ -866,8 +873,10 @@ def get_obs(config, obtype, yyyymmdd_task): if os.path.isfile(badfile): shutil.move(badfile, os.path.join(arcv_dir_raw, fn_raw)) elif obtype == 'AIRNOW': - fn_raw = f'HourlyAQObs_{yyyymmddhh_str}.dat' - + if vx_config['AIRNOW_INPUT_FORMAT'] == 'airnowhourlyaqobs': + fn_raw = f'HourlyAQObs_{yyyymmddhh_str}.dat' + elif vx_config['AIRNOW_INPUT_FORMAT'] == 'airnowhourly': + fn_raw = f'HourlyData_{yyyymmddhh_str}.dat' fp_raw = os.path.join(arcv_dir_raw, fn_raw) # Special logic for AERONET pulled from http: internet archives result From 316736239c735a10ad15d670b030962bd172f6fc Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Wed, 19 Feb 2025 20:59:36 +0000 Subject: [PATCH 221/260] Update new test paths for Hera --- .../config.MET_verification_smoke_only_vx.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml b/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml index 9092962090..205ac4e884 100644 --- a/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml @@ -19,10 +19,10 @@ rocoto: verification: VX_FCST_MODEL_NAME: RRFS_smoke_test VX_FIELD_GROUPS: [ "SFC", "AOD", "PM25", "PM10"] - VX_FCST_INPUT_BASEDIR: /lfs6/BMC/gsd-fv3-dev/Michael.Kavulich/smoke_vx/WE2E_test_data/RRFS + VX_FCST_INPUT_BASEDIR: /scratch2/BMC/fv3lam/kavulich/smoke_verif/PR_prep/WE2E_test_data/RRFS FCST_SUBDIR_TEMPLATE: '' FCST_FN_TEMPLATE: 'rrfs.t00z.prslev.f{lead?fmt=%HHH}.conus.grib2' - NDAS_OBS_DIR: /lfs6/BMC/gsd-fv3-dev/Michael.Kavulich/smoke_vx/WE2E_test_data/ndas - AERONET_OBS_DIR: /lfs6/BMC/gsd-fv3-dev/Michael.Kavulich/smoke_vx/WE2E_test_data/aeronet - AIRNOW_OBS_DIR: /lfs6/BMC/gsd-fv3-dev/Michael.Kavulich/smoke_vx/WE2E_test_data/airnow + NDAS_OBS_DIR: /scratch2/BMC/fv3lam/kavulich/smoke_verif/PR_prep/WE2E_test_data/ndas + AERONET_OBS_DIR: /scratch2/BMC/fv3lam/kavulich/smoke_verif/PR_prep/WE2E_test_data/aeronet + AIRNOW_OBS_DIR: /scratch2/BMC/fv3lam/kavulich/smoke_verif/PR_prep/WE2E_test_data/airnow From cf9b6cdaa5241c1b353852b1d916eb46d43921fa Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr" Date: Wed, 19 Feb 2025 14:05:16 -0700 Subject: [PATCH 222/260] A few more details about the new available options --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 6 +++++- .../CustomizingTheWorkflow/ConfigWorkflow.rst | 11 +++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index 6100bd22a5..b14fc1616d 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -863,7 +863,11 @@ that attempt is successful, the workflow will move on to subsequent tasks. Thus :term:`AIRNOW` files manually from collections of publicly available data. Then, as above, they must set ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1,3,...]`` to match those staging locations and file names. - + +.. note:: + AIRNOW observations can be retrieved from AWS in addition to HPSS, but this requires changing some default settings. + See ``ush/config_defaults.yaml`` or :numref:`Section %s ` for more details. + * Users who have access to a data store that hosts the necessary files (e.g. NOAA HPSS) do not need to manually stage the obs data because the ``get_obs_*`` tasks will retrieve the necessary obs and place them in the locations specified by ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1,3,...]``. By default, diff --git a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst index b91892813e..e83e4e3a15 100644 --- a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst +++ b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst @@ -1881,6 +1881,17 @@ VX Parameters for Observations workflow verification tasks that call METplus's ASCII2NC tool on AIRNOW observations. +``AIRNOW_INPUT_FORMAT``: (Default: ``"airnowhourlyaqobs"``) + Observation format for ASCII Airnow observations. Observations retrieved from HPSS are in + "airnowhourlyaqobs" format, observations retrieved from AWS are generally in "airnowhourly" format. + For more information see the + `METplus users guide `_ + + +``OBS_DATA_STORE_AIRNOW``: (Default: ``hpss``) + Location to retrieve observation data from. Valid values are "aws" and/or "hpss", see + ``parm/data_locations.yaml`` for info on these data stores. + ``OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT``: (Default: ``'${OBS_NDAS_FN_TEMPLATES[1]}.nc'``) METplus template for the names of the NetCDF files generated by the workflow verification tasks that call METplus's Pb2nc tool on the From 93d5ea34ab5eeafd3d0e31184ccb65d7e9f9ba31 Mon Sep 17 00:00:00 2001 From: "Michael J. Kavulich, Jr." Date: Wed, 19 Feb 2025 22:21:46 +0000 Subject: [PATCH 223/260] Some final cleanup tasks: removing `set -x` declarations, updating config section imports to pre-emptively avoid conflicts with https://github.com/ufs-community/ufs-srweather-app/pull/1204 --- jobs/JREGIONAL_RUN_MET_ASCII2NC_OBS | 7 ++++++- scripts/exregional_check_post_output.sh | 1 - scripts/exregional_get_verif_obs.sh | 8 +++++++- scripts/exregional_run_met_ascii2nc_obs.sh | 12 ++++++++++-- ...gional_run_met_genensprod_or_ensemblestat.sh | 16 +++++++++++++--- ...regional_run_met_gridstat_or_pointstat_vx.sh | 17 +++++++++++++---- ..._run_met_gridstat_or_pointstat_vx_ensmean.sh | 16 +++++++++++++--- ..._run_met_gridstat_or_pointstat_vx_ensprob.sh | 16 +++++++++++++--- scripts/exregional_run_met_pb2nc_obs.sh | 14 ++++++++++++-- scripts/exregional_run_met_pcpcombine.sh | 16 +++++++++++++--- 10 files changed, 100 insertions(+), 23 deletions(-) diff --git a/jobs/JREGIONAL_RUN_MET_ASCII2NC_OBS b/jobs/JREGIONAL_RUN_MET_ASCII2NC_OBS index 151eb503cd..9eb445bf84 100755 --- a/jobs/JREGIONAL_RUN_MET_ASCII2NC_OBS +++ b/jobs/JREGIONAL_RUN_MET_ASCII2NC_OBS @@ -8,7 +8,12 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user nco workflow ; do +sections=( + user + nco + workflow +) +for sect in ${sections[*]} ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done . $USHdir/job_preamble.sh diff --git a/scripts/exregional_check_post_output.sh b/scripts/exregional_check_post_output.sh index 269713c296..4d5836519c 100755 --- a/scripts/exregional_check_post_output.sh +++ b/scripts/exregional_check_post_output.sh @@ -95,7 +95,6 @@ user-staged. # #----------------------------------------------------------------------- # -set -x i="0" if [[ $(boolify "${DO_ENSEMBLE}") == "TRUE" ]]; then i=$( bc -l <<< "${ENSMEM_INDX}-1" ) diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 3e218d99f5..3fa73abc44 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -31,7 +31,13 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user workflow nco verification; do +sections=( + user + nco + workflow + verification +) +for sect in ${sections[*]} ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done # diff --git a/scripts/exregional_run_met_ascii2nc_obs.sh b/scripts/exregional_run_met_ascii2nc_obs.sh index eba6ca6523..7ee9a34a23 100755 --- a/scripts/exregional_run_met_ascii2nc_obs.sh +++ b/scripts/exregional_run_met_ascii2nc_obs.sh @@ -8,7 +8,16 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user nco platform workflow nco global verification cpl_aqm_parm \ +sections=( + user + nco + platform + workflow + global + verification + cpl_aqm_parm +) +for sect in ${sections[*]} ; do constants fixed_files ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done @@ -49,7 +58,6 @@ scrfunc_dir=$( dirname "${scrfunc_fp}" ) # #----------------------------------------------------------------------- # -set -x get_metplus_tool_name \ METPLUSTOOLNAME="${METPLUSTOOLNAME}" \ outvarname_metplus_tool_name="metplus_tool_name" \ diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 520742ef6f..03011de433 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -8,9 +8,19 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user nco platform workflow nco global verification cpl_aqm_parm \ - constants fixed_files \ - task_run_post ; do +sections=( + user + nco + platform + workflow + global + verification + cpl_aqm_parm + constants + fixed_files + task_run_post +) +for sect in ${sections[*]} ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index d42d07356c..cd97201068 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -8,9 +8,19 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user nco platform workflow nco global verification cpl_aqm_parm \ - constants fixed_files \ - task_run_post ; do +sections=( + user + nco + platform + workflow + global + verification + cpl_aqm_parm + constants + fixed_files + task_run_post +) +for sect in ${sections[*]} ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done # @@ -345,7 +355,6 @@ vx_config_dict=$(<"${vx_config_fp}") # included in the yaml-formatted variable "settings" below. vx_config_dict=$( printf "%s\n" "${vx_config_dict}" | sed 's/^/ /' ) # -# #----------------------------------------------------------------------- # # Generate the METplus configuration file from its jinja template. diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index dc1101be3e..a9dfa47e2c 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -8,9 +8,19 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user nco platform workflow nco global verification cpl_aqm_parm \ - constants fixed_files \ - task_run_post ; do +sections=( + user + nco + platform + workflow + global + verification + cpl_aqm_parm + constants + fixed_files + task_run_post +) +for sect in ${sections[*]} ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index 8cefeccc4e..cb20b59fdc 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -8,9 +8,19 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user nco platform workflow nco global verification cpl_aqm_parm \ - constants fixed_files \ - task_run_post ; do +sections=( + user + nco + platform + workflow + global + verification + cpl_aqm_parm + constants + fixed_files + task_run_post +) +for sect in ${sections[*]} ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done # diff --git a/scripts/exregional_run_met_pb2nc_obs.sh b/scripts/exregional_run_met_pb2nc_obs.sh index 9e76415866..a278cb8fee 100755 --- a/scripts/exregional_run_met_pb2nc_obs.sh +++ b/scripts/exregional_run_met_pb2nc_obs.sh @@ -8,8 +8,18 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user nco platform workflow nco global verification cpl_aqm_parm \ - constants fixed_files ; do +sections=( + user + nco + platform + workflow + global + verification + cpl_aqm_parm + constants + fixed_files +) +for sect in ${sections[*]} ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done # diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 79f867f7c8..890f688154 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -8,9 +8,19 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user nco platform workflow nco global verification cpl_aqm_parm \ - constants fixed_files \ - task_run_post ; do +sections=( + user + nco + platform + workflow + global + verification + cpl_aqm_parm + constants + fixed_files + task_run_post +) +for sect in ${sections[*]} ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done # From e7779cf93bff8d08cf14d26c97bf26858e423c69 Mon Sep 17 00:00:00 2001 From: "Michael J. Kavulich, Jr." Date: Wed, 19 Feb 2025 22:23:28 +0000 Subject: [PATCH 224/260] Move new test to Orion since Jet is being deprecated for support --- tests/WE2E/machine_suites/coverage.jet | 1 - tests/WE2E/machine_suites/coverage.orion | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/WE2E/machine_suites/coverage.jet b/tests/WE2E/machine_suites/coverage.jet index 23a3375e7a..8c79a0b700 100644 --- a/tests/WE2E/machine_suites/coverage.jet +++ b/tests/WE2E/machine_suites/coverage.jet @@ -9,4 +9,3 @@ get_from_HPSS_ics_RAP_lbcs_RAP grid_RRFS_AK_3km_ics_FV3GFS_lbcs_FV3GFS_suite_HRRR grid_RRFS_CONUS_13km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16_plot grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15p2 -MET_verification_smoke_only_vx diff --git a/tests/WE2E/machine_suites/coverage.orion b/tests/WE2E/machine_suites/coverage.orion index 5cb4441437..dd5f25cc3f 100644 --- a/tests/WE2E/machine_suites/coverage.orion +++ b/tests/WE2E/machine_suites/coverage.orion @@ -1,3 +1,4 @@ +2020_CAD custom_ESGgrid_SF_1p1km deactivate_tasks get_from_AWS_ics_GEFS_lbcs_GEFS_fmt_grib2_2022040400_ensemble_2mems @@ -9,4 +10,4 @@ grid_RRFS_CONUScompact_25km_ics_RRFS_lbcs_RRFS_suite_RRFS_v1beta grid_RRFS_CONUScompact_13km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16 grid_RRFS_CONUScompact_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16 grid_SUBCONUS_Ind_3km_ics_FV3GFS_lbcs_FV3GFS_suite_WoFS_v0 -2020_CAD +MET_verification_smoke_only_vx From abaa8ce56ce96350ea2a72116c1340fce85ddb1c Mon Sep 17 00:00:00 2001 From: "Michael J. Kavulich, Jr." Date: Wed, 19 Feb 2025 22:24:03 +0000 Subject: [PATCH 225/260] Update some inline documentation, fix setting of FIRE_NUM_TASKS=0 when UFS_FIRE not active --- ush/get_obs.py | 3 ++- ush/setup.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ush/get_obs.py b/ush/get_obs.py index f7d77d7739..9aeb1cd3a5 100644 --- a/ush/get_obs.py +++ b/ush/get_obs.py @@ -322,9 +322,10 @@ def get_obs(config, obtype, yyyymmdd_task): AIRNOW Air Quality Particulate Matter (PM25, PM10) observations: ---------------------------------------------------------------- - For AIRNOW, the archive interval is 24 hours. There is one archive per day + For AIRNOW, the HPSS archive interval is 24 hours. There is one archive per day containing one text file per hour that contains all the observation for that hour. + When retrieved from AWS, the interval is 1 hour. """ # Convert obtype to upper case to simplify code below. diff --git a/ush/setup.py b/ush/setup.py index ef71e65970..e938b04281 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -1783,7 +1783,7 @@ def _get_location(xcs, fmt, expt_cfg): else: if fire_conf["FIRE_NUM_TASKS"] > 0: logger.warning("UFS_FIRE is not enabled; setting FIRE_NUM_TASKS = 0") - fire_conf["FIRE_NUM_TASKS"] = 1 + fire_conf["FIRE_NUM_TASKS"] = 0 # # ----------------------------------------------------------------------- # From bccde7b664d2b751918455df3fccff26a948d1ec Mon Sep 17 00:00:00 2001 From: "Michael J. Kavulich, Jr" Date: Wed, 19 Feb 2025 18:43:26 -0600 Subject: [PATCH 226/260] Fixes from Orion coverage tests - Fix typo in exregional_run_met_ascii2nc_obs.sh - Add TEST_AERONET_OBS_DIR, TEST_AIRNOW_OBS_DIR to orion machine file - Add checks for TEST_AERONET_OBS_DIR, TEST_AIRNOW_OBS_DIR in run_WE2E_tests.py - Need to create FV3 namelist for make_grid task as well - Update new smoke test for staged data --- scripts/exregional_run_met_ascii2nc_obs.sh | 1 - tests/WE2E/run_WE2E_tests.py | 2 +- .../config.MET_verification_smoke_only_vx.yaml | 7 ++----- ush/generate_FV3LAM_wflow.py | 3 ++- ush/machine/orion.yaml | 12 +++++++----- 5 files changed, 12 insertions(+), 13 deletions(-) diff --git a/scripts/exregional_run_met_ascii2nc_obs.sh b/scripts/exregional_run_met_ascii2nc_obs.sh index 7ee9a34a23..35d383bd1e 100755 --- a/scripts/exregional_run_met_ascii2nc_obs.sh +++ b/scripts/exregional_run_met_ascii2nc_obs.sh @@ -18,7 +18,6 @@ sections=( cpl_aqm_parm ) for sect in ${sections[*]} ; do - constants fixed_files ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done # diff --git a/tests/WE2E/run_WE2E_tests.py b/tests/WE2E/run_WE2E_tests.py index fc0a3e3268..c1f97b018c 100755 --- a/tests/WE2E/run_WE2E_tests.py +++ b/tests/WE2E/run_WE2E_tests.py @@ -201,7 +201,7 @@ def run_we2e_tests(homedir, args) -> None: # This section checks if we are doing verification on a machine with staged verification # obs. If so, and if the config file does not explicitly set the observation locations, # fill these in with defaults from the machine files - obs_vars = ['CCPA_OBS_DIR','MRMS_OBS_DIR','NDAS_OBS_DIR','NOHRSC_OBS_DIR'] + obs_vars = ['CCPA_OBS_DIR','MRMS_OBS_DIR','NDAS_OBS_DIR','NOHRSC_OBS_DIR','AERONET_OBS_DIR','AIRNOW_OBS_DIR'] for obvar in obs_vars: mach_path = machine_defaults['platform'].get('TEST_'+obvar) if not test_cfg['verification'].get(obvar) and mach_path: diff --git a/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml b/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml index 205ac4e884..ddb669248d 100644 --- a/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_verification_smoke_only_vx.yaml @@ -19,10 +19,7 @@ rocoto: verification: VX_FCST_MODEL_NAME: RRFS_smoke_test VX_FIELD_GROUPS: [ "SFC", "AOD", "PM25", "PM10"] - VX_FCST_INPUT_BASEDIR: /scratch2/BMC/fv3lam/kavulich/smoke_verif/PR_prep/WE2E_test_data/RRFS - FCST_SUBDIR_TEMPLATE: '' + VX_FCST_INPUT_BASEDIR: '{{ platform.WE2E_TEST_DATA }}/input_model_data/RRFS' + FCST_SUBDIR_TEMPLATE: '{init?fmt=%Y%m%d%H}' FCST_FN_TEMPLATE: 'rrfs.t00z.prslev.f{lead?fmt=%HHH}.conus.grib2' - NDAS_OBS_DIR: /scratch2/BMC/fv3lam/kavulich/smoke_verif/PR_prep/WE2E_test_data/ndas - AERONET_OBS_DIR: /scratch2/BMC/fv3lam/kavulich/smoke_verif/PR_prep/WE2E_test_data/aeronet - AIRNOW_OBS_DIR: /scratch2/BMC/fv3lam/kavulich/smoke_verif/PR_prep/WE2E_test_data/airnow diff --git a/ush/generate_FV3LAM_wflow.py b/ush/generate_FV3LAM_wflow.py index ac2acde904..30ba4b3f27 100755 --- a/ush/generate_FV3LAM_wflow.py +++ b/ush/generate_FV3LAM_wflow.py @@ -245,7 +245,8 @@ def generate_FV3LAM_wflow( # # ----------------------------------------------------------------------- # - if dict_find(expt_config["rocoto"]["tasks"], "task_run_fcst"): + if ( dict_find(expt_config["rocoto"]["tasks"], "task_run_fcst") or + dict_find(expt_config["rocoto"]["tasks"], "task_make_grid") ): log_info( """ Copying templates of various input files to the experiment directory...""", diff --git a/ush/machine/orion.yaml b/ush/machine/orion.yaml index 285eb34ee2..4383c9e4a2 100644 --- a/ush/machine/orion.yaml +++ b/ush/machine/orion.yaml @@ -3,11 +3,13 @@ platform: NCORES_PER_NODE: 40 SCHED: slurm WE2E_TEST_DATA: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop - TEST_CCPA_OBS_DIR: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/obs_data/ccpa/proc - TEST_MRMS_OBS_DIR: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/obs_data/mrms/proc - TEST_NDAS_OBS_DIR: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/obs_data/ndas/proc - TEST_NOHRSC_OBS_DIR: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/obs_data/nohrsc/proc - DOMAIN_PREGEN_BASEDIR: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/FV3LAM_pregen + TEST_CCPA_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/ccpa/proc' + TEST_MRMS_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/mrms/proc' + TEST_NDAS_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/ndas/proc' + TEST_NOHRSC_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/nohrsc/proc' + TEST_AERONET_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/aeronet' + TEST_AIRNOW_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/airnow' + DOMAIN_PREGEN_BASEDIR: '{{ platform.WE2E_TEST_DATA }}/FV3LAM_pregen' PARTITION_DEFAULT: orion QUEUE_DEFAULT: batch PARTITION_FCST: orion From 5d3e21bfeaa3f033c1d326bc69541857425f7b4c Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Thu, 20 Feb 2025 00:59:53 +0000 Subject: [PATCH 227/260] Update staged data locations on all tier-1 platforms --- ush/machine/derecho.yaml | 12 +++++++----- ush/machine/gaea.yaml | 14 ++++++++------ ush/machine/hera.yaml | 14 ++++++++------ ush/machine/hercules.yaml | 12 +++++++----- ush/machine/jet.yaml | 12 +++++++----- ush/machine/noaacloud.yaml | 12 +++++++----- 6 files changed, 44 insertions(+), 32 deletions(-) diff --git a/ush/machine/derecho.yaml b/ush/machine/derecho.yaml index d8a3e8f4d4..e768455788 100644 --- a/ush/machine/derecho.yaml +++ b/ush/machine/derecho.yaml @@ -3,11 +3,13 @@ platform: NCORES_PER_NODE: 128 SCHED: pbspro WE2E_TEST_DATA: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop - TEST_CCPA_OBS_DIR: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/obs_data/ccpa/proc - TEST_MRMS_OBS_DIR: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/obs_data/mrms/proc - TEST_NDAS_OBS_DIR: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/obs_data/ndas/proc - TEST_NOHRSC_OBS_DIR: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/obs_data/nohrsc/proc - DOMAIN_PREGEN_BASEDIR: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/FV3LAM_pregen + TEST_CCPA_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/ccpa/proc' + TEST_MRMS_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/mrms/proc' + TEST_NDAS_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/ndas/proc' + TEST_NOHRSC_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/nohrsc/proc' + TEST_AERONET_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/aeronet' + TEST_AIRNOW_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/airnow' + DOMAIN_PREGEN_BASEDIR: '{{ platform.WE2E_TEST_DATA }}/FV3LAM_pregen' QUEUE_DEFAULT: main QUEUE_FCST: main QUEUE_HPSS: main diff --git a/ush/machine/gaea.yaml b/ush/machine/gaea.yaml index 91e248e57b..6460274f2b 100644 --- a/ush/machine/gaea.yaml +++ b/ush/machine/gaea.yaml @@ -2,12 +2,14 @@ platform: WORKFLOW_MANAGER: rocoto NCORES_PER_NODE: 128 SCHED: slurm - WE2E_TEST_DATA: /gpfs/f5/epic/world-shared/UFS_SRW_data/develop - TEST_CCPA_OBS_DIR: /gpfs/f5/epic/world-shared/UFS_SRW_data/develop/obs_data/ccpa/proc - TEST_MRMS_OBS_DIR: /gpfs/f5/epic/world-shared/UFS_SRW_data/develop/obs_data/mrms/proc - TEST_NDAS_OBS_DIR: /gpfs/f5/epic/world-shared/UFS_SRW_data/develop/obs_data/ndas/proc - TEST_NOHRSC_OBS_DIR: /gpfs/f5/epic/world-shared/UFS_SRW_data/develop/obs_data/nohrsc/proc - DOMAIN_PREGEN_BASEDIR: /gpfs/f5/epic/world-shared/UFS_SRW_data/develop/FV3LAM_pregen + WE2E_TEST_DATA: /gpfs/f5/epic/world-shared/UFS_SRW_data/develop/ + TEST_CCPA_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/ccpa/proc' + TEST_MRMS_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/mrms/proc' + TEST_NDAS_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/ndas/proc' + TEST_NOHRSC_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/nohrsc/proc' + TEST_AERONET_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/aeronet' + TEST_AIRNOW_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/airnow' + DOMAIN_PREGEN_BASEDIR: '{{ platform.WE2E_TEST_DATA }}/FV3LAM_pregen' QUEUE_DEFAULT: normal QUEUE_FCST: normal QUEUE_HPSS: normal diff --git a/ush/machine/hera.yaml b/ush/machine/hera.yaml index 5644814e1d..a7b4a951f5 100644 --- a/ush/machine/hera.yaml +++ b/ush/machine/hera.yaml @@ -3,12 +3,14 @@ platform: NCORES_PER_NODE: 40 SCHED: slurm WE2E_TEST_DATA: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop - TEST_CCPA_OBS_DIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/obs_data/ccpa/proc - TEST_NOHRSC_OBS_DIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/obs_data/nohrsc/proc - TEST_MRMS_OBS_DIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/obs_data/mrms/proc - TEST_NDAS_OBS_DIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/obs_data/ndas/proc - TEST_GDAS_OBS_DIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/obs_data/gdas - DOMAIN_PREGEN_BASEDIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/FV3LAM_pregen + TEST_CCPA_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/ccpa/proc' + TEST_NOHRSC_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/nohrsc/proc' + TEST_MRMS_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/mrms/proc' + TEST_NDAS_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/ndas/proc' + TEST_AERONET_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/aeronet' + TEST_AIRNOW_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/airnow' + TEST_GDAS_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/gdas' + DOMAIN_PREGEN_BASEDIR: '{{ platform.WE2E_TEST_DATA }}/FV3LAM_pregen' PARTITION_DEFAULT: hera QUEUE_DEFAULT: batch PARTITION_FCST: hera diff --git a/ush/machine/hercules.yaml b/ush/machine/hercules.yaml index eddf307091..5c8787dd93 100644 --- a/ush/machine/hercules.yaml +++ b/ush/machine/hercules.yaml @@ -3,11 +3,13 @@ platform: NCORES_PER_NODE: 80 SCHED: slurm WE2E_TEST_DATA: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop - TEST_CCPA_OBS_DIR: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/obs_data/ccpa/proc - TEST_MRMS_OBS_DIR: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/obs_data/mrms/proc - TEST_NDAS_OBS_DIR: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/obs_data/ndas/proc - TEST_NOHRSC_OBS_DIR: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/obs_data/nohrsc/proc - DOMAIN_PREGEN_BASEDIR: /work/noaa/epic/role-epic/contrib/UFS_SRW_data/develop/FV3LAM_pregen + TEST_CCPA_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/ccpa/proc' + TEST_MRMS_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/mrms/proc' + TEST_NDAS_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/ndas/proc' + TEST_NOHRSC_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/nohrsc/proc' + TEST_AERONET_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/aeronet' + TEST_AIRNOW_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/airnow' + DOMAIN_PREGEN_BASEDIR: '{{ platform.WE2E_TEST_DATA }}/FV3LAM_pregen' PARTITION_DEFAULT: hercules QUEUE_DEFAULT: batch PARTITION_FCST: hercules diff --git a/ush/machine/jet.yaml b/ush/machine/jet.yaml index b14a4ab9ff..6f2b420f86 100644 --- a/ush/machine/jet.yaml +++ b/ush/machine/jet.yaml @@ -3,11 +3,13 @@ platform: NCORES_PER_NODE: 24 SCHED: slurm WE2E_TEST_DATA: /mnt/lfs5/HFIP/hfv3gfs/role.epic/UFS_SRW_data/develop - TEST_CCPA_OBS_DIR: /mnt/lfs5/HFIP/hfv3gfs/role.epic/UFS_SRW_data/develop/obs_data/ccpa/proc - TEST_MRMS_OBS_DIR: /mnt/lfs5/HFIP/hfv3gfs/role.epic/UFS_SRW_data/develop/obs_data/mrms/proc - TEST_NDAS_OBS_DIR: /mnt/lfs5/HFIP/hfv3gfs/role.epic/UFS_SRW_data/develop/obs_data/ndas/proc - TEST_NOHRSC_OBS_DIR: /mnt/lfs5/HFIP/hfv3gfs/role.epic/UFS_SRW_data/develop/obs_data/nohrsc/proc - DOMAIN_PREGEN_BASEDIR: /mnt/lfs5/HFIP/hfv3gfs/role.epic/UFS_SRW_data/develop/FV3LAM_pregen + TEST_CCPA_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/ccpa/proc' + TEST_MRMS_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/mrms/proc' + TEST_NDAS_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/ndas/proc' + TEST_NOHRSC_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/nohrsc/proc' + TEST_AERONET_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/aeronet' + TEST_AIRNOW_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/airnow' + DOMAIN_PREGEN_BASEDIR: '{{ platform.WE2E_TEST_DATA }}/FV3LAM_pregen' PARTITION_DEFAULT: vjet,kjet,xjet QUEUE_DEFAULT: batch PARTITION_FCST: xjet diff --git a/ush/machine/noaacloud.yaml b/ush/machine/noaacloud.yaml index 2b27c0c139..b7c9f56d31 100644 --- a/ush/machine/noaacloud.yaml +++ b/ush/machine/noaacloud.yaml @@ -3,11 +3,13 @@ platform: NCORES_PER_NODE: '{{ 44 if (user.ACCOUNT == "cz-epic") else 36 if (user.ACCOUNT == "ca-epic") else 28 }}' SCHED: slurm WE2E_TEST_DATA: /contrib/EPIC/UFS_SRW_data/develop - TEST_CCPA_OBS_DIR: /contrib/EPIC/UFS_SRW_data/develop/obs_data/ccpa/proc - TEST_MRMS_OBS_DIR: /contrib/EPIC/UFS_SRW_data/develop/obs_data/mrms/proc - TEST_NDAS_OBS_DIR: /contrib/EPIC/UFS_SRW_data/develop/obs_data/ndas/proc - TEST_NOHRSC_OBS_DIR: /contrib/EPIC/UFS_SRW_data/develop/obs_data/nohrsc/proc - DOMAIN_PREGEN_BASEDIR: /contrib/EPIC/UFS_SRW_data/develop/FV3LAM_pregen + TEST_CCPA_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/ccpa/proc' + TEST_MRMS_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/mrms/proc' + TEST_NDAS_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/ndas/proc' + TEST_NOHRSC_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/nohrsc/proc' + TEST_AERONET_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/aeronet' + TEST_AIRNOW_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/airnow' + DOMAIN_PREGEN_BASEDIR: '{{ platform.WE2E_TEST_DATA }}/FV3LAM_pregen' REMOVE_MEMORY: True RUN_CMD_FCST: mpiexec -np ${PE_MEMBER01} RUN_CMD_POST: mpiexec -np $nprocs From 47a79fd1a40aa7b0d7113fba16cea6aadfe6bf8c Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Thu, 20 Feb 2025 01:59:44 +0000 Subject: [PATCH 228/260] Fix failing pylint test --- ush/generate_FV3LAM_wflow.py | 51 +++++++++++------------------------- 1 file changed, 16 insertions(+), 35 deletions(-) diff --git a/ush/generate_FV3LAM_wflow.py b/ush/generate_FV3LAM_wflow.py index b605fb9aec..13ae7996e0 100755 --- a/ush/generate_FV3LAM_wflow.py +++ b/ush/generate_FV3LAM_wflow.py @@ -247,38 +247,24 @@ def generate_FV3LAM_wflow( # if ( dict_find(expt_config["rocoto"]["tasks"], "task_run_fcst") or dict_find(expt_config["rocoto"]["tasks"], "task_make_grid") ): - log_info( - """ - Copying templates of various input files to the experiment directory...""", - verbose=debug, - ) + logging.debug("Copying templates of various input files to the experiment directory...") - log_info( - """ - Copying the template data table file to the experiment directory...""", - verbose=debug, - ) - cp_vrfy(expt_config["workflow"]["DATA_TABLE_TMPL_FP"], expt_config["workflow"]["DATA_TABLE_FP"]) + logging.debug("Copying the template data table file to the experiment directory...") + cp_vrfy(expt_config["workflow"]["DATA_TABLE_TMPL_FP"], + expt_config["workflow"]["DATA_TABLE_FP"]) - log_info( - """ - Copying the template field table file to the experiment directory...""", - verbose=debug, - ) - cp_vrfy(expt_config["workflow"]["FIELD_TABLE_TMPL_FP"], expt_config["workflow"]["FIELD_TABLE_FP"]) + logging.debug("Copying the template field table file to the experiment directory...") + cp_vrfy(expt_config["workflow"]["FIELD_TABLE_TMPL_FP"], + expt_config["workflow"]["FIELD_TABLE_FP"]) # # Copy the CCPP physics suite definition file from its location in the # clone of the FV3 code repository to the experiment directory (EXPT- # DIR). # - log_info( - """ - Copying the CCPP physics suite definition XML file from its location in - the forecast model directory structure to the experiment directory...""", - verbose=debug, - ) - cp_vrfy(expt_config["workflow"]["CCPP_PHYS_SUITE_IN_CCPP_FP"], expt_config["workflow"]["CCPP_PHYS_SUITE_FP"]) + logging.debug("Copying CCPP suite definition file from forecast model repository") + cp_vrfy(expt_config["workflow"]["CCPP_PHYS_SUITE_IN_CCPP_FP"], + expt_config["workflow"]["CCPP_PHYS_SUITE_FP"]) # If UFS_FIRE, update FIELD_TABLE if expt_config['fire'].get('UFS_FIRE'): @@ -293,17 +279,11 @@ def generate_FV3LAM_wflow( # # Copy the field dictionary file from its location in the - # clone of the FV3 code repository to the experiment directory (EXPT- - # DIR). + # clone of the FV3 code repository to the experiment directory # - log_info( - """ - Copying the field dictionary file from its location in the - forecast model directory structure to the experiment - directory...""", - verbose=debug, - ) - cp_vrfy(expt_config["workflow"]["FIELD_DICT_IN_UWM_FP"], expt_config["workflow"]["FIELD_DICT_FP"]) + logging.debug("Copying field dictionary file from forecast model repository") + cp_vrfy(expt_config["workflow"]["FIELD_DICT_IN_UWM_FP"], + expt_config["workflow"]["FIELD_DICT_FP"]) # # ----------------------------------------------------------------------- @@ -326,7 +306,8 @@ def generate_FV3LAM_wflow( # the C-resolution of the grid), and this parameter is in most workflow # configurations is not known until the grid is created. # - if not expt_config['rocoto']['tasks'].get('task_make_grid') and dict_find(expt_config["rocoto"]["tasks"], "task_run_fcst"): + if ( not expt_config['rocoto']['tasks'].get('task_make_grid') and + dict_find(expt_config["rocoto"]["tasks"], "task_run_fcst") ): set_fv3nml_sfc_climo_filenames(flatten_dict(expt_config), debug) # From e6185e38acf3c26e3846c286ae72606caa760db0 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Thu, 20 Feb 2025 18:32:54 +0000 Subject: [PATCH 229/260] Upgrade hera gnu modulefile --- modulefiles/build_hera_gnu.lua | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modulefiles/build_hera_gnu.lua b/modulefiles/build_hera_gnu.lua index 9618375f03..adf4fa8287 100644 --- a/modulefiles/build_hera_gnu.lua +++ b/modulefiles/build_hera_gnu.lua @@ -7,10 +7,10 @@ whatis([===[Loads libraries needed for building the UFS SRW App on Hera using GN prepend_path("MODULEPATH", "/scratch2/NCEPDEV/stmp1/role.epic/installs/gnu/modulefiles") prepend_path("MODULEPATH", "/scratch2/NCEPDEV/stmp1/role.epic/installs/openmpi/modulefiles") -prepend_path("MODULEPATH", "/scratch1/NCEPDEV/nems/role.epic/spack-stack/spack-stack-1.6.0/envs/test.remove/install/modulefiles/Core") +prepend_path("MODULEPATH", "/scratch2/NCEPDEV/stmp1/role.epic/spack-stack/spack-stack-1.6.0_gnu13/envs/fms-2024.01/install/modulefiles/Core") -load("stack-gcc/9.2.0") -load("stack-openmpi/4.1.5") +load("stack-gcc/13.3.0") +load("stack-openmpi/4.1.6") load("stack-python/3.10.13") load("cmake/3.23.1") From d30a8ce5364c98ae9dbbbf0e8939d16c35f567a4 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Thu, 20 Feb 2025 21:46:53 +0000 Subject: [PATCH 230/260] I forgot that templates can be passed to retrieve_data.py...changing solution to just create the damn directories. --- ush/retrieve_data.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/ush/retrieve_data.py b/ush/retrieve_data.py index fc6465d2e9..4c5fa2344e 100755 --- a/ush/retrieve_data.py +++ b/ush/retrieve_data.py @@ -31,7 +31,7 @@ from copy import deepcopy import yaml - +from pathlib import Path def clean_up_output_dir(expected_subdir, local_archive, output_path, source_paths): @@ -40,7 +40,7 @@ def clean_up_output_dir(expected_subdir, local_archive, output_path, source_path Args: expected_subdir : Expected subdirectories local_archive (str): File name - output_path (str): Path to a location on disk. Path is expected to exist. + output_path (str): Path to a location on disk. source_paths (str): Returns: @@ -66,6 +66,7 @@ def clean_up_output_dir(expected_subdir, local_archive, output_path, source_path expected_output_loc = os.path.join(output_path, file_name) if not local_file_path == expected_output_loc: logging.info(f"Moving {local_file_path} to " f"{expected_output_loc}") + Path(output_path).mkdir(parents=True, exist_ok=True) shutil.move(local_file_path, expected_output_loc) # Clean up directories from inside archive, if they exist @@ -423,6 +424,7 @@ def get_requested_files(cla, file_templates, input_locs, method="disk", **kwargs target_path = create_target_path(target_path) logging.info(f"Retrieved files will be placed here: \n {target_path}") + Path(target_path).mkdir(parents=True, exist_ok=True) os.chdir(target_path) for fcst_hr in cla.fcst_hrs: @@ -519,7 +521,7 @@ def hpss_requested_files(cla, file_names, store_specs, members=-1, ens_group=-1) It cleans up the local disk after files are deemed available in order to remove any empty subdirectories that may still be present. - This function exepcts that the output directory exists and is writable. + This function exepcts that the output directory is writable. Args: cla (str): Command line arguments (Namespace object) @@ -1086,7 +1088,7 @@ def parse_args(argv): ) parser.add_argument( "--output_path", - help="Path to a location on disk. Path is expected to exist.", + help="Path to a location on disk where files will be placed.", required=True, type=os.path.abspath, ) @@ -1175,11 +1177,6 @@ def parse_args(argv): raise argparse.ArgumentTypeError(f"Invalid value '{store}' provided " \ f"for --data_stores; valid values are {valid_data_stores}") - # Check other requirements - if not os.path.isdir(args.output_path): - logging.critical(f"{args.output_path} does not exist or is not a directory") - raise FileNotFoundError(f"Argument `--output_path` must be an existing directory") - return args From 6166179159dd50f13ef2bcf00500e320940bb189 Mon Sep 17 00:00:00 2001 From: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> Date: Fri, 21 Feb 2025 08:50:53 -0500 Subject: [PATCH 231/260] [develop] Doc test bug fixes (#1206) * Refactors doc tests to separate doc build and linkcheck * Adds --keep-going flag so that tests continue on failure (so that developers can see all sources of failure, not just the first) * Fixes SD-related documentation warnings * Removes systems no longer supported by the UFS WM (Cheyenne, Odin, Stampede) * Updates warning about cron to mention Derecho & Gaea * Misc minor updates/formatting --------- Co-authored-by: Brandon Selbig <156852197+selbigmtnwx23@users.noreply.github.com> Co-authored-by: Michael Lueken <63728921+MichaelLueken@users.noreply.github.com> --- .github/scripts/check_tech_doc.sh | 2 +- .github/workflows/doc_tests.yaml | 7 ++- doc/Makefile | 2 +- .../BuildingRunningTesting/RunSRW.rst | 8 ++- .../CustomizingTheWorkflow/ConfigWorkflow.rst | 4 +- doc/UsersGuide/Reference/Glossary.rst | 6 +-- doc/conf.py | 30 +++++++++-- doc/requirements.in | 2 +- modulefiles/build_odin_intel.lua | 54 ------------------- modulefiles/wflow_odin.lua | 35 ------------ ush/smoke_dust_add_smoke.py | 3 ++ ush/valid_param_vals.yaml | 2 +- 12 files changed, 49 insertions(+), 106 deletions(-) delete mode 100644 modulefiles/build_odin_intel.lua delete mode 100644 modulefiles/wflow_odin.lua diff --git a/.github/scripts/check_tech_doc.sh b/.github/scripts/check_tech_doc.sh index d988e50cd6..1ee78b8574 100755 --- a/.github/scripts/check_tech_doc.sh +++ b/.github/scripts/check_tech_doc.sh @@ -5,7 +5,7 @@ set -eo pipefail # Install prerequisites -pip install sphinx +pip install Sphinx==7.4.7 pip install sphinx-rtd-theme pip install sphinxcontrib-bibtex diff --git a/.github/workflows/doc_tests.yaml b/.github/workflows/doc_tests.yaml index 34fb01a9ac..6649fc91c0 100644 --- a/.github/workflows/doc_tests.yaml +++ b/.github/workflows/doc_tests.yaml @@ -22,4 +22,9 @@ jobs: - name: Build documentation run: | cd doc - make doc + make clean && make html + - name: Check links + if: ${{ !cancelled() }} + run: | + cd doc + make linkcheck diff --git a/doc/Makefile b/doc/Makefile index a4fac61e1a..614ffbaafa 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -1,6 +1,6 @@ # Makefile for Sphinx documentation -SPHINXOPTS = -a -n -W +SPHINXOPTS = -a -n -W --keep-going SPHINXBUILD = sphinx-build SOURCEDIR = . BUILDDIR = build diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index eefaf63715..b93ebd0a28 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -310,9 +310,9 @@ On platforms where Rocoto and :term:`cron` are available, users can automate res USE_CRON_TO_RELAUNCH: true CRON_RELAUNCH_INTVL_MNTS: 3 -.. note:: +.. attention:: - On Orion, *cron* is only available on the orion-login-1 node, so users will need to work on that node when running *cron* jobs on Orion. + Cron is not available on Derecho or Gaea. On Orion/Hercules, users must be logged into the [hercules/orion]-login-1 node to use cron. When running with GNU compilers (i.e., if the modulefile used to set up the build environment in :numref:`Section %s ` uses a GNU compiler), users must also set ``COMPILER: "gnu"`` in the ``workflow:`` section of the ``config.yaml`` file. @@ -1389,6 +1389,10 @@ where ``/path/to/experiment/directory`` is changed to correspond to the user's ` * On NOAA Cloud instances, ``*/1 * * * *`` (or ``CRON_RELAUNCH_INTVL_MNTS: 1``) is the preferred option for cron jobs because compute nodes will shut down if they remain idle too long. If the compute node shuts down, it can take 15-20 minutes to start up a new one. * On other NOAA HPC systems, administrators discourage using ``*/1 * * * *`` due to load problems. ``*/3 * * * *`` (or ``CRON_RELAUNCH_INTVL_MNTS: 3``) is the preferred option for cron jobs on other Level 1 systems. +.. attention:: + + Cron is not available on Derecho or Gaea. On Orion/Hercules, users must be logged into the [hercules/orion]-login-1 node to use cron. + To check the experiment progress: .. code-block:: console diff --git a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst index dc582ba4bd..b32a012cf7 100644 --- a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst +++ b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst @@ -30,7 +30,7 @@ If non-default parameters are selected for the variables in this section, they s Setting ``RUN_ENVIR`` to "community" is recommended in most cases for users who are not running in NCO's production environment. Valid values: ``"nco"`` | ``"community"`` ``MACHINE``: (Default: "BIG_COMPUTER") - The machine (a.k.a. platform or system) on which the workflow will run. Currently supported platforms are listed on the :srw-wiki:`SRW App Wiki page `. When running the SRW App on any ParallelWorks/NOAA Cloud system, use "NOAACLOUD" regardless of the underlying system (AWS, GCP, or Azure). Valid values: ``"HERA"`` | ``"ORION"`` | ``"HERCULES"`` | ``"JET"`` | ``"CHEYENNE"`` | ``"DERECHO"`` | ``"GAEA"`` | ``"GAEA-C6"`` | ``"NOAACLOUD"`` | ``"STAMPEDE"`` | ``"ODIN"`` | ``"MACOS"`` | ``"LINUX"`` | ``"SINGULARITY"`` | ``"WCOSS2"`` (Check ``ufs-srweather-app/ush/valid_param_vals.yaml`` for the most up-to-date list of supported platforms.) + The machine (a.k.a. platform or system) on which the workflow will run. Currently supported platforms are listed on the :srw-wiki:`SRW App Wiki page `. When running the SRW App on any ParallelWorks/NOAA Cloud system, use "NOAACLOUD" regardless of the underlying system (AWS, GCP, or Azure). Valid values: ``"HERA"`` | ``"ORION"`` | ``"HERCULES"`` | ``"JET"`` | ``"DERECHO"`` | ``"GAEA"`` | ``"GAEA-C6"`` | ``"NOAACLOUD"`` | ``"MACOS"`` | ``"LINUX"`` | ``"SINGULARITY"`` | ``"WCOSS2"`` (Check ``ufs-srweather-app/ush/valid_param_vals.yaml`` for the most up-to-date list of supported platforms.) .. hint:: Users who are NOT on a named, supported Level 1 or 2 platform will need to set the ``MACHINE`` variable to ``LINUX`` or ``MACOS``. To combine use of a Linux or MacOS platform with the Rocoto workflow manager, users will also need to set ``WORKFLOW_MANAGER: "rocoto"`` in the ``platform:`` section of ``config.yaml``. This combination will assume a Slurm batch manager when generating the XML. @@ -2103,7 +2103,7 @@ Non-default parameters for coupled Air Quality Modeling (AQM) tasks are set in t Smoke and Dust Configuration Parameters -===================================== +========================================= Non-default parameters for Smoke and Dust tasks are set in the ``smoke_dust_parm:`` section of the ``config.yaml`` file. diff --git a/doc/UsersGuide/Reference/Glossary.rst b/doc/UsersGuide/Reference/Glossary.rst index c543b73258..efcc47cc7b 100644 --- a/doc/UsersGuide/Reference/Glossary.rst +++ b/doc/UsersGuide/Reference/Glossary.rst @@ -106,10 +106,10 @@ Glossary FV3 The Finite-Volume Cubed-Sphere :term:`dynamical core` (dycore). Developed at NOAA's `Geophysical - Fluid Dynamics Laboratory `__ (GFDL), it is a scalable and flexible dycore capable of both hydrostatic and non-hydrostatic atmospheric simulations. It is the dycore used in the UFS Weather Model. + Fluid Dynamics Laboratory `_ (GFDL), it is a scalable and flexible dycore capable of both hydrostatic and non-hydrostatic atmospheric simulations. It is the dycore used in the UFS Weather Model. FVCOM - `Finite Volume Community Ocean Model `__. FVCOM is used in modeling work for the `Great Lakes Coastal Forecasting System (next-gen FVCOM) `__ conducted by the `Great Lakes Environmental Research Laboratory `__. + `Finite Volume Community Ocean Model `_. FVCOM is used in modeling work for the `Great Lakes Coastal Forecasting System (next-gen FVCOM) `_ conducted by the `Great Lakes Environmental Research Laboratory `_. GFS `Global Forecast System `_. The GFS is a National Centers for Environmental Prediction (:term:`NCEP`) weather forecast model that generates data for dozens of atmospheric and land-soil variables, including temperatures, winds, precipitation, soil moisture, and atmospheric ozone concentration. The system couples four separate models (atmosphere, ocean, land/soil, and sea ice) that work together to accurately depict weather conditions. @@ -172,7 +172,7 @@ Glossary MPI stands for Message Passing Interface. An MPI is a standardized communication system used in parallel programming. It establishes portable and efficient syntax for the exchange of messages and data between multiple processors that are used by a single computer program. An MPI is required for high-performance computing (HPC) systems. MRMS - Multi-Radar/Multi-Sensor (MRMS) System Analysis data. This data is required for METplus composite reflectivity or :term:`echo top` verification tasks within the SRW App. A two-day archive of precipitation, radar, and aviation and severe weather fields is publicly available and can be accessed `here `__. + Multi-Radar/Multi-Sensor (MRMS) System Analysis data. This data is required for METplus composite reflectivity or :term:`echo top` verification tasks within the SRW App. A two-day archive of precipitation, radar, and aviation and severe weather fields is publicly available and can be accessed `here `_. NAM `North American Mesoscale Forecast System `_. NAM generates multiple grids (or domains) of weather forecasts over the North American continent at various horizontal resolutions. Each grid contains data for dozens of weather parameters, including temperature, precipitation, lightning, and turbulent kinetic energy. NAM uses additional numerical weather models to generate high-resolution forecasts over fixed regions, and occasionally to follow significant weather events like hurricanes. diff --git a/doc/conf.py b/doc/conf.py index a490a0df4d..22d23e757c 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -36,9 +36,18 @@ numfig = True -nitpick_ignore = [('py:class', 'obj'),('py:class', - 'yaml.dumper.Dumper'),('py:class', - 'xml.etree.ElementTree'),('py:class', 'Basemap'),] +nitpick_ignore = [('py:class', 'obj'), + ('py:class', 'yaml.dumper.Dumper'), + ('py:class', 'xml.etree.ElementTree'), + ('py:class', 'Basemap'), + ('py:class', 'pandas.DataFrame'), + ('py:class', 'numpy.ndarray'), + ('py:class', 'pandas.Index'), + ('py:class', 'xarray.DataArray'), + ('py:class', 'netCDF4.Dataset'), + ('py:class', 'ESMF.Field'), + ('py:class', 'ESMF.Grid'), + ] # -- General configuration --------------------------------------------------- @@ -255,8 +264,19 @@ def setup(app): # -- Options for autodoc extension --------------------------------------- -autodoc_mock_imports = ["f90nml","cartopy","mpl_toolkits.basemap","fill_jinja_template", - "matplotlib","numpy","uwtools","mpl_toolkits","metplus", +autodoc_mock_imports = ["f90nml", + "cartopy", + "mpl_toolkits.basemap", + "fill_jinja_template", + "matplotlib", + "numpy", + "uwtools", + "mpl_toolkits", + "metplus", + "netCDF4", + "pandas", + "xarray", + "ESMF", ] logger = logging.getLogger(__name__) diff --git a/doc/requirements.in b/doc/requirements.in index 75a70ab416..b38adfa03d 100644 --- a/doc/requirements.in +++ b/doc/requirements.in @@ -1,3 +1,3 @@ -sphinx>=7.4.0 +sphinx==7.4.7 sphinx_rtd_theme sphinxcontrib-bibtex diff --git a/modulefiles/build_odin_intel.lua b/modulefiles/build_odin_intel.lua deleted file mode 100644 index 40c3ada501..0000000000 --- a/modulefiles/build_odin_intel.lua +++ /dev/null @@ -1,54 +0,0 @@ -help([[ -This module loads libraries for building the UFS SRW App on -the NSSL machine Odin using Intel -]]) - -whatis([===[Loads libraries needed for building the UFS SRW App on Odin ]===]) - -prepend_path("PATH","/home/yunheng.wang/tools/cmake-3.23.0-rc2/bin") -setenv("CMAKE","/home/yunheng.wang/tools/cmake-3.23.0-rc2/bin/cmake") - -load("hpc/1.2.0") -load("hpc-intel") -load("hpc-cray-mpich") - ---load("srw_common") - -load("jasper") -load("zlib") -load("png") - ---load("cray-hdf5") ---load("cray-netcdf") -load("esmf") -load("fms") - -load("bacio") -load("crtm") -load("g2") -load("g2tmpl") -load("ip") -load("sp") -load("w3nco") -load("upp") - -load("gftl-shared") -load("yafyaml") -load("mapl") - -load("gfsio") -load("landsfcutil") -load("nemsio") -load("nemsiogfs") -load("sfcio") -load("sigio") -load("w3emc") -load("wgrib2") - -setenv("FC", "ftn") - -setenv("CMAKE_C_COMPILER","cc") -setenv("CMAKE_CXX_COMPILER","CC") -setenv("CMAKE_Fortran_COMPILER","ftn") -setenv("CMAKE_Platform","odin.intel") - diff --git a/modulefiles/wflow_odin.lua b/modulefiles/wflow_odin.lua deleted file mode 100644 index be3f9607e8..0000000000 --- a/modulefiles/wflow_odin.lua +++ /dev/null @@ -1,35 +0,0 @@ -help([[ -This module loads python environement for running the UFS SRW App on -the NSSL machine Odin -]]) - -whatis([===[Loads libraries needed for running the UFS SRW App on Odin ]===]) - - - -if mode() == "load" then - -- >>> conda initialize >>> - -- !! Contents within this block are managed by 'conda init' !! - local shell=myShellType() - local conda_path="/scratch/software/Odin/python/anaconda2" - local conda_file - if shell == "csh" then - conda_file=pathJoin(conda_path,"conda.csh") - else - conda_file=pathJoin(conda_path,"conda.sh") - end - - local exit_code = os.execute('test -f'..conda_file) - if exit_code == 0 then - local mcmd="source " .. conda_file - execute{cmd=mcmd, modeA={"load"}} - else - prepend_path("PATH", pathJoin(conda_path,"bin")) - end - -- <<< conda initialize <<< - - LmodMsgRaw([===[Please do the following to activate conda: - > conda config --set changeps1 False - > conda activate workflow_tools - ]===]) -end diff --git a/ush/smoke_dust_add_smoke.py b/ush/smoke_dust_add_smoke.py index 64229d7e36..5d3ee7ef6b 100755 --- a/ush/smoke_dust_add_smoke.py +++ b/ush/smoke_dust_add_smoke.py @@ -23,6 +23,9 @@ def populate_data(data: np.ndarray, target_shape: Tuple) -> np.ndarray: def main() -> None: + """ + Main entrypoint for generating smoke/dust initial conditions. + """ # File paths source_file = "fv_tracer.res.tile1.nc" target_file = "gfs_data.tile7.halo0.nc" diff --git a/ush/valid_param_vals.yaml b/ush/valid_param_vals.yaml index 84e6d093d9..c310eb1e00 100644 --- a/ush/valid_param_vals.yaml +++ b/ush/valid_param_vals.yaml @@ -4,7 +4,7 @@ valid_vals_RUN_ENVIR: ["nco", "community"] valid_vals_VERBOSE: [True, False] valid_vals_DEBUG: [True, False] -valid_vals_MACHINE: ["HERA", "WCOSS2", "ORION", "HERCULES", "JET", "ODIN", "CHEYENNE", "DERECHO", "STAMPEDE", "LINUX", "MACOS", "NOAACLOUD", "SINGULARITY", "GAEA", "GAEA-C6"] +valid_vals_MACHINE: ["HERA", "WCOSS2", "ORION", "HERCULES", "JET", "DERECHO", "LINUX", "MACOS", "NOAACLOUD", "SINGULARITY", "GAEA", "GAEA-C6"] valid_vals_SCHED: ["slurm", "pbspro", "lsf", "lsfcray", "none"] valid_vals_FCST_MODEL: ["ufs-weather-model"] valid_vals_WORKFLOW_MANAGER: ["rocoto", "ecflow", "none"] From 71ac8e1d0994b7e4b8cf6f99ddcf36988d699175 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Fri, 21 Feb 2025 17:53:12 +0000 Subject: [PATCH 232/260] Debugging failing github test --- tests/test_python/test_generate_FV3LAM_wflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_python/test_generate_FV3LAM_wflow.py b/tests/test_python/test_generate_FV3LAM_wflow.py index aa4e038f7a..73e5e072da 100644 --- a/tests/test_python/test_generate_FV3LAM_wflow.py +++ b/tests/test_python/test_generate_FV3LAM_wflow.py @@ -27,7 +27,7 @@ def test_generate_FV3LAM_wflow(self): # run workflows in separate process to avoid conflict between community and nco settings def run_workflow(USHdir, logfile): - p = Process(target=generate_FV3LAM_wflow, args=(USHdir,"config.yaml",logfile)) + p = Process(target=generate_FV3LAM_wflow, args=(USHdir,"config.yaml",logfile,True)) p.start() p.join() exit_code = p.exitcode From 792566ad26172e2ded5f41d78324c16116774ce5 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Fri, 21 Feb 2025 19:44:09 +0000 Subject: [PATCH 233/260] more CI debugging --- tests/test_python/test_generate_FV3LAM_wflow.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_python/test_generate_FV3LAM_wflow.py b/tests/test_python/test_generate_FV3LAM_wflow.py index 73e5e072da..f0acbd069a 100644 --- a/tests/test_python/test_generate_FV3LAM_wflow.py +++ b/tests/test_python/test_generate_FV3LAM_wflow.py @@ -53,6 +53,7 @@ def run_workflow(USHdir, logfile): run_command( f"""{sed} -i 's/MACHINE: hera/MACHINE: linux/g' {USHdir}/config.yaml""" ) + os.makedirs("/home/username/DATA/UFS/fix/fix_am") run_workflow(USHdir, logfile) def setUp(self): From d45a8bdae55fa59f30e808cb1a5c6f5e6f5bf2b2 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Fri, 21 Feb 2025 20:13:37 +0000 Subject: [PATCH 234/260] Incorporate fixes for 24+ hour forecasts from Gerard --- ush/config_defaults.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 5a0678cd97..423f63eee8 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2596,8 +2596,7 @@ verification: # CCPA observations. These files will contain observed accumulated # precipitation in NetCDF format for various accumulation intervals. # - OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{%- set obs_avail_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} - {{- "ccpa.t{valid?fmt=%H}z." ~ obs_avail_intvl_hrs ~ "h.hrap.conus.gb2_a${ACCUM_HH}h.nc" }}' + OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- "ccpa.hrap.conus.${FIELD_GROUP}${ACCUM_HH}h.{valid?fmt=%Y%m%d%H?shift=-${ACCUM_HH}H}_to_{valid?fmt=%Y%m%d%H}.nc" }}' # # OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT: # METplus template for the names of the NetCDF files generated by the @@ -2718,7 +2717,7 @@ verification: {%- if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %} {{- ".${ensmem_name}" }} {%- endif %} - {{- ".prslev.f{lead?fmt=%HHH}.${POST_OUTPUT_DOMAIN_NAME}_${FIELD_GROUP}_a${ACCUM_HH}h.nc" }}' + {{- ".prslev.${POST_OUTPUT_DOMAIN_NAME}.${FIELD_GROUP}${ACCUM_HH}h.{valid?fmt=%Y%m%d%H?shift=-${ACCUM_HH}H}_to_{valid?fmt=%Y%m%d%H}.nc" }}' # # VX_NDIGITS_ENSMEM_NAMES: # Number of digits to assume/use in the forecast ensemble member identifier From da29c880a204f5de536640192b2cd563d24c7d39 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Fri, 21 Feb 2025 20:13:59 +0000 Subject: [PATCH 235/260] More correct error message in create_symlink_to_file.py --- ush/python_utils/create_symlink_to_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/python_utils/create_symlink_to_file.py b/ush/python_utils/create_symlink_to_file.py index 300a9b0245..906c398203 100644 --- a/ush/python_utils/create_symlink_to_file.py +++ b/ush/python_utils/create_symlink_to_file.py @@ -45,7 +45,7 @@ def create_symlink_to_file(target, symlink, relative=True): print_err_msg_exit( f""" Cannot create symlink to specified target file because the latter does - not exist or is not a file: + not exist or is not readable: target = '{target}'""" ) From 3b0bacaf2f06d5e58201d40c090d5e1e37327985 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Fri, 21 Feb 2025 20:56:31 +0000 Subject: [PATCH 236/260] Attempt fix/enhancement of test_generate_FV3LAM_wflow.py --- tests/test_python/test_generate_FV3LAM_wflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_python/test_generate_FV3LAM_wflow.py b/tests/test_python/test_generate_FV3LAM_wflow.py index f0acbd069a..fd40162093 100644 --- a/tests/test_python/test_generate_FV3LAM_wflow.py +++ b/tests/test_python/test_generate_FV3LAM_wflow.py @@ -53,7 +53,7 @@ def run_workflow(USHdir, logfile): run_command( f"""{sed} -i 's/MACHINE: hera/MACHINE: linux/g' {USHdir}/config.yaml""" ) - os.makedirs("/home/username/DATA/UFS/fix/fix_am") + print(f"{test_dir=}\n{USHdir}") run_workflow(USHdir, logfile) def setUp(self): From 0825486c5a9a59677a847c6fda9cfafaf1869df1 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Fri, 21 Feb 2025 21:20:34 +0000 Subject: [PATCH 237/260] Attempted fix for test_generate_FV3LAM_wflow.py --- .github/workflows/python_tests.yaml | 5 +++++ tests/test_python/test_generate_FV3LAM_wflow.py | 5 ++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python_tests.yaml b/.github/workflows/python_tests.yaml index 113ec3f59c..27e9c26404 100644 --- a/.github/workflows/python_tests.yaml +++ b/.github/workflows/python_tests.yaml @@ -50,6 +50,11 @@ jobs: micromamba activate srw_app export UNIT_TEST=True export PYTHONPATH=$(pwd)/ush + # Retrieve fix files so generate_FV3LAM_workflow.py tests complete successfully + mkdir -p /home/runner/work && cd /home/runner/work + aws s3 cp --recursive https://noaa-ufs-srw-pds.s3.amazonaws.com/index.html#develop-20240618/fix/ + cd - + export CI_FIX_FILES=/home/runner/work/fix python -m unittest tests/test_python/*.py - name: Run python functional tests diff --git a/tests/test_python/test_generate_FV3LAM_wflow.py b/tests/test_python/test_generate_FV3LAM_wflow.py index fd40162093..5a7511d717 100644 --- a/tests/test_python/test_generate_FV3LAM_wflow.py +++ b/tests/test_python/test_generate_FV3LAM_wflow.py @@ -53,7 +53,10 @@ def run_workflow(USHdir, logfile): run_command( f"""{sed} -i 's/MACHINE: hera/MACHINE: linux/g' {USHdir}/config.yaml""" ) - print(f"{test_dir=}\n{USHdir}") + # If running CI, point config.yaml to correct location for fix files + if fix_files:=get_env_var("CI_FIX_FILES"): + run_command( + f"{sed} -i '-s/\/home\/username\/DATA\/UFS\//{fix_files}/g' {USHdir}/config.yaml") run_workflow(USHdir, logfile) def setUp(self): From be8d1e05de87429291579fce84ad9548fdb1abb2 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Fri, 21 Feb 2025 21:23:13 +0000 Subject: [PATCH 238/260] Fix bad s3 retrieval command --- .github/workflows/python_tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python_tests.yaml b/.github/workflows/python_tests.yaml index 27e9c26404..30ecc0f475 100644 --- a/.github/workflows/python_tests.yaml +++ b/.github/workflows/python_tests.yaml @@ -52,7 +52,7 @@ jobs: export PYTHONPATH=$(pwd)/ush # Retrieve fix files so generate_FV3LAM_workflow.py tests complete successfully mkdir -p /home/runner/work && cd /home/runner/work - aws s3 cp --recursive https://noaa-ufs-srw-pds.s3.amazonaws.com/index.html#develop-20240618/fix/ + aws s3 cp --recursive https://noaa-ufs-srw-pds.s3.amazonaws.com/index.html#develop-20240618/fix/ . cd - export CI_FIX_FILES=/home/runner/work/fix python -m unittest tests/test_python/*.py From 22a156418cd26a07686f2a504633e0ee9e280fc7 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Fri, 21 Feb 2025 21:26:58 +0000 Subject: [PATCH 239/260] More test fixes: correct AWS retrieval command, correctly escape backslashes in SED string --- .github/workflows/python_tests.yaml | 5 ++--- tests/test_python/test_generate_FV3LAM_wflow.py | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/python_tests.yaml b/.github/workflows/python_tests.yaml index 30ecc0f475..dbff8d0054 100644 --- a/.github/workflows/python_tests.yaml +++ b/.github/workflows/python_tests.yaml @@ -51,9 +51,8 @@ jobs: export UNIT_TEST=True export PYTHONPATH=$(pwd)/ush # Retrieve fix files so generate_FV3LAM_workflow.py tests complete successfully - mkdir -p /home/runner/work && cd /home/runner/work - aws s3 cp --recursive https://noaa-ufs-srw-pds.s3.amazonaws.com/index.html#develop-20240618/fix/ . - cd - + mkdir -p /home/runner/work + aws s3 cp --recursive https://noaa-ufs-srw-pds.s3.amazonaws.com/index.html#develop-20240618/fix/ /home/runner/work/ export CI_FIX_FILES=/home/runner/work/fix python -m unittest tests/test_python/*.py diff --git a/tests/test_python/test_generate_FV3LAM_wflow.py b/tests/test_python/test_generate_FV3LAM_wflow.py index 5a7511d717..80662d2487 100644 --- a/tests/test_python/test_generate_FV3LAM_wflow.py +++ b/tests/test_python/test_generate_FV3LAM_wflow.py @@ -56,7 +56,7 @@ def run_workflow(USHdir, logfile): # If running CI, point config.yaml to correct location for fix files if fix_files:=get_env_var("CI_FIX_FILES"): run_command( - f"{sed} -i '-s/\/home\/username\/DATA\/UFS\//{fix_files}/g' {USHdir}/config.yaml") + f"{sed} -i '-s/\\/home\\/username\\/DATA\\/UFS\\//{fix_files}/g' {USHdir}/config.yaml") run_workflow(USHdir, logfile) def setUp(self): From 2e42b6660dc1372e53ffcee7c2e02cf12e5563ff Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Fri, 21 Feb 2025 21:30:05 +0000 Subject: [PATCH 240/260] =?UTF-8?q?Make=20the=20linter=20happy=20?= =?UTF-8?q?=F0=9F=98=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_python/test_generate_FV3LAM_wflow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_python/test_generate_FV3LAM_wflow.py b/tests/test_python/test_generate_FV3LAM_wflow.py index 80662d2487..46d39212ea 100644 --- a/tests/test_python/test_generate_FV3LAM_wflow.py +++ b/tests/test_python/test_generate_FV3LAM_wflow.py @@ -55,8 +55,8 @@ def run_workflow(USHdir, logfile): ) # If running CI, point config.yaml to correct location for fix files if fix_files:=get_env_var("CI_FIX_FILES"): - run_command( - f"{sed} -i '-s/\\/home\\/username\\/DATA\\/UFS\\//{fix_files}/g' {USHdir}/config.yaml") + run_command(f"{sed} -i '-s/\\/home\\/username\\/DATA\\/UFS\\//{fix_files}/g' "\ + f"{USHdir}/config.yaml") run_workflow(USHdir, logfile) def setUp(self): From aa4cfdb0064e9cc34d8d92d9f42e7edd6424e400 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Fri, 21 Feb 2025 21:34:37 +0000 Subject: [PATCH 241/260] Correct AWS bucket path (I think) --- .github/workflows/python_tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python_tests.yaml b/.github/workflows/python_tests.yaml index dbff8d0054..96c53c00d3 100644 --- a/.github/workflows/python_tests.yaml +++ b/.github/workflows/python_tests.yaml @@ -52,7 +52,7 @@ jobs: export PYTHONPATH=$(pwd)/ush # Retrieve fix files so generate_FV3LAM_workflow.py tests complete successfully mkdir -p /home/runner/work - aws s3 cp --recursive https://noaa-ufs-srw-pds.s3.amazonaws.com/index.html#develop-20240618/fix/ /home/runner/work/ + aws s3 cp --recursive s3://noaa-ufs-srw-pds/develop-20240618/fix/ /home/runner/work/ export CI_FIX_FILES=/home/runner/work/fix python -m unittest tests/test_python/*.py From f11ddadd0256171289a80f9fb1b1978a7fafa9d3 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Fri, 21 Feb 2025 21:53:53 +0000 Subject: [PATCH 242/260] Get public S3 data unauthentcated --- .github/workflows/python_tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python_tests.yaml b/.github/workflows/python_tests.yaml index 96c53c00d3..1042ddc48c 100644 --- a/.github/workflows/python_tests.yaml +++ b/.github/workflows/python_tests.yaml @@ -52,7 +52,7 @@ jobs: export PYTHONPATH=$(pwd)/ush # Retrieve fix files so generate_FV3LAM_workflow.py tests complete successfully mkdir -p /home/runner/work - aws s3 cp --recursive s3://noaa-ufs-srw-pds/develop-20240618/fix/ /home/runner/work/ + aws s3 cp --recursive --no-sign-request s3://noaa-ufs-srw-pds/develop-20240618/fix/ /home/runner/work/ export CI_FIX_FILES=/home/runner/work/fix python -m unittest tests/test_python/*.py From b39bfeb9d1647c49ce37c1d3d00580c75f6b0765 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Fri, 21 Feb 2025 22:01:23 +0000 Subject: [PATCH 243/260] Add --no-progress flag so log files arent enormous --- .github/workflows/python_tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python_tests.yaml b/.github/workflows/python_tests.yaml index 1042ddc48c..883eaaa98e 100644 --- a/.github/workflows/python_tests.yaml +++ b/.github/workflows/python_tests.yaml @@ -52,7 +52,7 @@ jobs: export PYTHONPATH=$(pwd)/ush # Retrieve fix files so generate_FV3LAM_workflow.py tests complete successfully mkdir -p /home/runner/work - aws s3 cp --recursive --no-sign-request s3://noaa-ufs-srw-pds/develop-20240618/fix/ /home/runner/work/ + aws s3 cp --recursive --no-sign-request --no-progress s3://noaa-ufs-srw-pds/develop-20240618/fix/ /home/runner/work/ export CI_FIX_FILES=/home/runner/work/fix python -m unittest tests/test_python/*.py From 6b039a1aacd8175c63be73a791f9f0c5e83ba1a6 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Fri, 21 Feb 2025 22:08:44 +0000 Subject: [PATCH 244/260] Github runners dont have enough space for fix files. Try dummy directories. --- .github/workflows/python_tests.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/python_tests.yaml b/.github/workflows/python_tests.yaml index 883eaaa98e..469a0753ac 100644 --- a/.github/workflows/python_tests.yaml +++ b/.github/workflows/python_tests.yaml @@ -50,10 +50,11 @@ jobs: micromamba activate srw_app export UNIT_TEST=True export PYTHONPATH=$(pwd)/ush - # Retrieve fix files so generate_FV3LAM_workflow.py tests complete successfully - mkdir -p /home/runner/work - aws s3 cp --recursive --no-sign-request --no-progress s3://noaa-ufs-srw-pds/develop-20240618/fix/ /home/runner/work/ + # Make dummy directories for fix files so generate_FV3LAM_workflow.py tests complete successfully +# aws s3 cp --recursive --no-sign-request --no-progress s3://noaa-ufs-srw-pds/develop-20240618/fix/ /home/runner/work/ export CI_FIX_FILES=/home/runner/work/fix + mkdir -p $CI_FIX_FILES/fix_am + python -m unittest tests/test_python/*.py - name: Run python functional tests From d34c3f68715af59d5be538166d943531d8f41cf6 Mon Sep 17 00:00:00 2001 From: Michael Lueken <63728921+MichaelLueken@users.noreply.github.com> Date: Fri, 21 Feb 2025 17:02:38 -0500 Subject: [PATCH 245/260] [develop] Update weather model hash to 8933749 (February 19) and add smoke/dust WE2E test to testing suites (#1195) * Update weather model hash to 8933749 from February 19, 2025 * Add smoke_dust_grid_RRFS_CONUS_3km_suite_HRRR_gf WE2E test to coverage.gaea-c6, comprehensive and comprehensive.orion (sym linked to comprehensive.gaea-c6 and comprehensive.hercules) * Update modulefiles/build_hera_gnu.lua to allow smoke and dust WE2E test to run on Hera GNU. * Address documentation failures now that https://www.fvcom.org site's security certificate has been renewed * Change gaea to gaeac5 and gaea-c6 to gaeac6 throughout to address node name change in Jenkins * Remove Jet support in Jenkins --------- Co-authored-by: EdwardSnyder-NOAA --- .cicd/Jenkinsfile | 10 ++++---- .cicd/scripts/srw_unittest.sh | 2 +- .cicd/scripts/wrapper_srw_ftest.sh | 12 ++++------ Externals.cfg | 2 +- etc/lmod-setup.csh | 6 ++--- etc/lmod-setup.sh | 6 ++--- ..._gaea_intel.lua => build_gaeac5_intel.lua} | 0 ...ea-c6_intel.lua => build_gaeac6_intel.lua} | 0 modulefiles/build_hera_gnu.lua | 3 ++- modulefiles/build_noaacloud_intel.lua | 3 +++ .../{gaea-c6 => gaeac5}/get_obs.local.lua | 0 .../plot_allvars.local.lua | 0 .../tasks/{gaea => gaeac5}/python_srw.lua | 0 .../{gaea-c6 => gaeac5}/run_vx.local.lua | 0 .../tasks/{gaea => gaeac6}/get_obs.local.lua | 0 .../{gaea => gaeac6}/plot_allvars.local.lua | 0 .../{gaea-c6 => gaeac6}/prepstart.local.lua | 0 .../tasks/{gaea-c6 => gaeac6}/python_srw.lua | 0 .../{gaea-c6 => gaeac6}/python_srw_sd.lua | 0 .../tasks/{gaea => gaeac6}/run_vx.local.lua | 0 .../{gaea-c6 => gaeac6}/smoke_dust.local.lua | 0 .../tasks/noaacloud/prepstart.local.lua | 1 + .../tasks/noaacloud/smoke_dust.local.lua | 1 + .../{wflow_gaea.lua => wflow_gaeac5.lua} | 0 .../{wflow_gaea-c6.lua => wflow_gaeac6.lua} | 0 tests/WE2E/machine_suites/comprehensive | 1 + .../WE2E/machine_suites/comprehensive.gaea-c6 | 1 - ...comprehensive.jet => comprehensive.gaeac5} | 23 +++++-------------- ...omprehensive.gaea => comprehensive.gaeac6} | 0 tests/WE2E/machine_suites/comprehensive.orion | 1 + .../{coverage.gaea => coverage.gaeac5} | 0 .../{coverage.gaea-c6 => coverage.gaeac6} | 1 + tests/WE2E/machine_suites/coverage.jet | 11 --------- tests/WE2E/setup_WE2E_tests.sh | 2 +- tests/build.sh | 2 +- ush/machine/{gaea.yaml => gaeac5.yaml} | 0 ush/machine/{gaea-c6.yaml => gaeac6.yaml} | 0 ush/machine/noaacloud.yaml | 11 ++++++++- ush/valid_param_vals.yaml | 2 +- 39 files changed, 46 insertions(+), 55 deletions(-) rename modulefiles/{build_gaea_intel.lua => build_gaeac5_intel.lua} (100%) rename modulefiles/{build_gaea-c6_intel.lua => build_gaeac6_intel.lua} (100%) rename modulefiles/tasks/{gaea-c6 => gaeac5}/get_obs.local.lua (100%) rename modulefiles/tasks/{gaea-c6 => gaeac5}/plot_allvars.local.lua (100%) rename modulefiles/tasks/{gaea => gaeac5}/python_srw.lua (100%) rename modulefiles/tasks/{gaea-c6 => gaeac5}/run_vx.local.lua (100%) rename modulefiles/tasks/{gaea => gaeac6}/get_obs.local.lua (100%) rename modulefiles/tasks/{gaea => gaeac6}/plot_allvars.local.lua (100%) rename modulefiles/tasks/{gaea-c6 => gaeac6}/prepstart.local.lua (100%) rename modulefiles/tasks/{gaea-c6 => gaeac6}/python_srw.lua (100%) rename modulefiles/tasks/{gaea-c6 => gaeac6}/python_srw_sd.lua (100%) rename modulefiles/tasks/{gaea => gaeac6}/run_vx.local.lua (100%) rename modulefiles/tasks/{gaea-c6 => gaeac6}/smoke_dust.local.lua (100%) create mode 100644 modulefiles/tasks/noaacloud/prepstart.local.lua create mode 100644 modulefiles/tasks/noaacloud/smoke_dust.local.lua rename modulefiles/{wflow_gaea.lua => wflow_gaeac5.lua} (100%) rename modulefiles/{wflow_gaea-c6.lua => wflow_gaeac6.lua} (100%) delete mode 120000 tests/WE2E/machine_suites/comprehensive.gaea-c6 rename tests/WE2E/machine_suites/{comprehensive.jet => comprehensive.gaeac5} (75%) rename tests/WE2E/machine_suites/{comprehensive.gaea => comprehensive.gaeac6} (100%) rename tests/WE2E/machine_suites/{coverage.gaea => coverage.gaeac5} (100%) rename tests/WE2E/machine_suites/{coverage.gaea-c6 => coverage.gaeac6} (91%) delete mode 100644 tests/WE2E/machine_suites/coverage.jet rename ush/machine/{gaea.yaml => gaeac5.yaml} (100%) rename ush/machine/{gaea-c6.yaml => gaeac6.yaml} (100%) diff --git a/.cicd/Jenkinsfile b/.cicd/Jenkinsfile index a7665189ac..4d90a0bc40 100644 --- a/.cicd/Jenkinsfile +++ b/.cicd/Jenkinsfile @@ -12,10 +12,10 @@ pipeline { parameters { // Allow job runner to filter based on platform // Use the line below to enable all PW clusters - // choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'derecho', 'gaea', 'gaea-c6','hera', 'jet', 'orion', 'hercules', 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1'], description: 'Specify the platform(s) to use') + // choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'derecho', 'gaeac5', 'gaeac6','hera', 'orion', 'hercules', 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1'], description: 'Specify the platform(s) to use') // Use the line below to enable the PW AWS cluster - // choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'derecho', 'gaea', 'gaea-c6', 'hera', 'jet', 'orion', 'hercules', 'pclusternoaav2use1'], description: 'Specify the platform(s) to use') - choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'derecho', 'gaea', 'gaea-c6', 'hera', 'jet', 'orion', 'hercules'], description: 'Specify the platform(s) to use') + // choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'derecho', 'gaeac5', 'gaeac6', 'hera', 'orion', 'hercules', 'pclusternoaav2use1'], description: 'Specify the platform(s) to use') + choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'derecho', 'gaeac5', 'gaeac6', 'hera', 'orion', 'hercules'], description: 'Specify the platform(s) to use') // Allow job runner to filter based on compiler choice(name: 'SRW_COMPILER_FILTER', choices: ['all', 'gnu', 'intel'], description: 'Specify the compiler(s) to use to build') // Workflow Wrapper test depth {0..9}, 0=none, 1=simple, 9=all [default] @@ -103,7 +103,7 @@ pipeline { axes { axis { name 'SRW_PLATFORM' - values 'derecho', 'gaea', 'gaea-c6', 'hera', 'jet', 'orion', 'hercules' //, 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1' + values 'derecho', 'gaeac5', 'gaeac6', 'hera', 'orion', 'hercules' //, 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1' } axis { @@ -117,7 +117,7 @@ pipeline { exclude { axis { name 'SRW_PLATFORM' - values 'derecho', 'gaea', 'gaea-c6', 'jet', 'orion', 'hercules' //, 'pclusternoaav2use1' , 'azclusternoaav2eus1', 'gclusternoaav2usc1' + values 'derecho', 'gaeac5', 'gaeac6', 'orion', 'hercules' //, 'pclusternoaav2use1' , 'azclusternoaav2eus1', 'gclusternoaav2usc1' } axis { diff --git a/.cicd/scripts/srw_unittest.sh b/.cicd/scripts/srw_unittest.sh index cfc1719de4..d7fa4ea10f 100755 --- a/.cicd/scripts/srw_unittest.sh +++ b/.cicd/scripts/srw_unittest.sh @@ -18,7 +18,7 @@ fi cd $workspace # Only run this on machines with hpss access -hpss_machines=( jet hera ) +hpss_machines=( hera ) if [[ ${hpss_machines[@]} =~ ${SRW_PLATFORM} ]] ; then source ${workspace}/ush/load_modules_wflow.sh ${SRW_PLATFORM} diff --git a/.cicd/scripts/wrapper_srw_ftest.sh b/.cicd/scripts/wrapper_srw_ftest.sh index d43c828318..3cfdd49d4d 100755 --- a/.cicd/scripts/wrapper_srw_ftest.sh +++ b/.cicd/scripts/wrapper_srw_ftest.sh @@ -21,18 +21,18 @@ else fi # Customize wrapper scripts -if [[ "${SRW_PLATFORM}" == gaea ]]; then +if [[ "${SRW_PLATFORM}" == gaeac5 ]]; then sed -i '15i #SBATCH --clusters=c5' ${WORKSPACE}/${SRW_PLATFORM}/.cicd/scripts/${workflow_cmd}_srw_ftest.sh sed -i 's|qos=batch|qos=normal|g' ${WORKSPACE}/${SRW_PLATFORM}/.cicd/scripts/${workflow_cmd}_srw_ftest.sh sed -i 's|00:30:00|00:45:00|g' ${WORKSPACE}/${SRW_PLATFORM}/.cicd/scripts/${workflow_cmd}_srw_ftest.sh - sed -i 's|${JOBSdir}/JREGIONAL_RUN_POST|$USHdir/load_modules_run_task.sh "gaea" "run_post" ${JOBSdir}/JREGIONAL_RUN_POST|g' ${WORKSPACE}/${SRW_PLATFORM}/ush/wrappers/run_post.sh + sed -i 's|${JOBSdir}/JREGIONAL_RUN_POST|$USHdir/load_modules_run_task.sh "gaeac5" "run_post" ${JOBSdir}/JREGIONAL_RUN_POST|g' ${WORKSPACE}/${SRW_PLATFORM}/ush/wrappers/run_post.sh fi -if [[ "${SRW_PLATFORM}" == gaea-c6 ]]; then +if [[ "${SRW_PLATFORM}" == gaeac6 ]]; then sed -i '15i #SBATCH --clusters=c6' ${WORKSPACE}/${SRW_PLATFORM}/.cicd/scripts/${workflow_cmd}_srw_ftest.sh sed -i 's|qos=batch|qos=normal|g' ${WORKSPACE}/${SRW_PLATFORM}/.cicd/scripts/${workflow_cmd}_srw_ftest.sh sed -i 's|00:30:00|00:45:00|g' ${WORKSPACE}/${SRW_PLATFORM}/.cicd/scripts/${workflow_cmd}_srw_ftest.sh - sed -i 's|${JOBSdir}/JREGIONAL_RUN_POST|$USHdir/load_modules_run_task.sh "gaea-c6" "run_post" ${JOBSdir}/JREGIONAL_RUN_POST|g' ${WORKSPACE}/${SRW_PLATFORM}/ush/wrappers/run_post.sh + sed -i 's|${JOBSdir}/JREGIONAL_RUN_POST|$USHdir/load_modules_run_task.sh "gaeac6" "run_post" ${JOBSdir}/JREGIONAL_RUN_POST|g' ${WORKSPACE}/${SRW_PLATFORM}/ush/wrappers/run_post.sh fi if [[ "${SRW_PLATFORM}" == hera ]]; then @@ -41,10 +41,6 @@ if [[ "${SRW_PLATFORM}" == hera ]]; then fi fi -if [[ "${SRW_PLATFORM}" == jet ]]; then - sed -i '15i #SBATCH --partition=xjet' ${WORKSPACE}/${SRW_PLATFORM}/.cicd/scripts/${workflow_cmd}_srw_ftest.sh -fi - if [[ "${TASK_DEPTH}" == 0 ]] ; then exit 0 fi diff --git a/Externals.cfg b/Externals.cfg index 5e94961fb4..e6397e880b 100644 --- a/Externals.cfg +++ b/Externals.cfg @@ -12,7 +12,7 @@ protocol = git repo_url = https://github.com/ufs-community/ufs-weather-model # Specify either a branch name or a hash but not both. #branch = develop -hash = 3a5e52e +hash = 8933749 local_path = sorc/ufs-weather-model required = True diff --git a/etc/lmod-setup.csh b/etc/lmod-setup.csh index 6af4f8dd1f..40d1a62dc6 100644 --- a/etc/lmod-setup.csh +++ b/etc/lmod-setup.csh @@ -6,7 +6,7 @@ Usage: source etc/lmod-setup.csh PLATFORM OPTIONS: PLATFORM - name of machine you are building on - (e.g. cheyenne | hera | jet | orion | hercules | wcoss2 ) + (e.g. cheyenne | hera | orion | hercules | wcoss2 ) EOF_USAGE exit 1 else @@ -37,10 +37,10 @@ else if ( "$L_MACHINE" == singularity ) then module purge -else if ( "$L_MACHINE" == gaea ) then +else if ( "$L_MACHINE" == gaeac5 ) then module reset -else if ( "$L_MACHINE" == gaea-c6 ) then +else if ( "$L_MACHINE" == gaeac6 ) then module reset else if ( "$L_MACHINE" == derecho ) then diff --git a/etc/lmod-setup.sh b/etc/lmod-setup.sh index 5012d4381c..c2f803fd97 100644 --- a/etc/lmod-setup.sh +++ b/etc/lmod-setup.sh @@ -7,7 +7,7 @@ Usage: source etc/lmod-setup.sh PLATFORM OPTIONS: PLATFORM - name of machine you are building on - (e.g. cheyenne | hera | jet | orion | hercules | wcoss2 ) + (e.g. cheyenne | hera | orion | hercules | wcoss2 ) EOF_USAGE exit 1 else @@ -44,10 +44,10 @@ elif [ "$L_MACHINE" = singularity ]; then module purge -elif [ "$L_MACHINE" = gaea ]; then +elif [ "$L_MACHINE" = gaeac5 ]; then module reset -elif [ "$L_MACHINE" = gaea-c6 ]; then +elif [ "$L_MACHINE" = gaeac6 ]; then module reset elif [ "$L_MACHINE" = derecho ]; then diff --git a/modulefiles/build_gaea_intel.lua b/modulefiles/build_gaeac5_intel.lua similarity index 100% rename from modulefiles/build_gaea_intel.lua rename to modulefiles/build_gaeac5_intel.lua diff --git a/modulefiles/build_gaea-c6_intel.lua b/modulefiles/build_gaeac6_intel.lua similarity index 100% rename from modulefiles/build_gaea-c6_intel.lua rename to modulefiles/build_gaeac6_intel.lua diff --git a/modulefiles/build_hera_gnu.lua b/modulefiles/build_hera_gnu.lua index adf4fa8287..3f14b37863 100644 --- a/modulefiles/build_hera_gnu.lua +++ b/modulefiles/build_hera_gnu.lua @@ -17,7 +17,8 @@ load("cmake/3.23.1") load("srw_common") load(pathJoin("nccmp", os.getenv("nccmp_ver") or "1.9.0.1")) -load(pathJoin("nco", os.getenv("nco_ver") or "5.1.6")) +load(pathJoin("nco", os.getenv("nco_ver") or "5.0.6")) +load(pathJoin("prod_util", os.getenv("prod_util_ver") or "2.1.1")) load(pathJoin("openblas", os.getenv("openblas_ver") or "0.3.24")) prepend_path("CPPFLAGS", " -I/apps/slurm_hera/23.11.3/include/slurm"," ") diff --git a/modulefiles/build_noaacloud_intel.lua b/modulefiles/build_noaacloud_intel.lua index 5aa24ba157..bb219172b1 100644 --- a/modulefiles/build_noaacloud_intel.lua +++ b/modulefiles/build_noaacloud_intel.lua @@ -14,3 +14,6 @@ unload("gnu") load("cmake/3.23.1") load("srw_common") + +load(pathJoin("nco", os.getenv("nco_ver") or "5.0.6")) +load(pathJoin("prod_util", os.getenv("prod_util_ver") or "2.1.1")) diff --git a/modulefiles/tasks/gaea-c6/get_obs.local.lua b/modulefiles/tasks/gaeac5/get_obs.local.lua similarity index 100% rename from modulefiles/tasks/gaea-c6/get_obs.local.lua rename to modulefiles/tasks/gaeac5/get_obs.local.lua diff --git a/modulefiles/tasks/gaea-c6/plot_allvars.local.lua b/modulefiles/tasks/gaeac5/plot_allvars.local.lua similarity index 100% rename from modulefiles/tasks/gaea-c6/plot_allvars.local.lua rename to modulefiles/tasks/gaeac5/plot_allvars.local.lua diff --git a/modulefiles/tasks/gaea/python_srw.lua b/modulefiles/tasks/gaeac5/python_srw.lua similarity index 100% rename from modulefiles/tasks/gaea/python_srw.lua rename to modulefiles/tasks/gaeac5/python_srw.lua diff --git a/modulefiles/tasks/gaea-c6/run_vx.local.lua b/modulefiles/tasks/gaeac5/run_vx.local.lua similarity index 100% rename from modulefiles/tasks/gaea-c6/run_vx.local.lua rename to modulefiles/tasks/gaeac5/run_vx.local.lua diff --git a/modulefiles/tasks/gaea/get_obs.local.lua b/modulefiles/tasks/gaeac6/get_obs.local.lua similarity index 100% rename from modulefiles/tasks/gaea/get_obs.local.lua rename to modulefiles/tasks/gaeac6/get_obs.local.lua diff --git a/modulefiles/tasks/gaea/plot_allvars.local.lua b/modulefiles/tasks/gaeac6/plot_allvars.local.lua similarity index 100% rename from modulefiles/tasks/gaea/plot_allvars.local.lua rename to modulefiles/tasks/gaeac6/plot_allvars.local.lua diff --git a/modulefiles/tasks/gaea-c6/prepstart.local.lua b/modulefiles/tasks/gaeac6/prepstart.local.lua similarity index 100% rename from modulefiles/tasks/gaea-c6/prepstart.local.lua rename to modulefiles/tasks/gaeac6/prepstart.local.lua diff --git a/modulefiles/tasks/gaea-c6/python_srw.lua b/modulefiles/tasks/gaeac6/python_srw.lua similarity index 100% rename from modulefiles/tasks/gaea-c6/python_srw.lua rename to modulefiles/tasks/gaeac6/python_srw.lua diff --git a/modulefiles/tasks/gaea-c6/python_srw_sd.lua b/modulefiles/tasks/gaeac6/python_srw_sd.lua similarity index 100% rename from modulefiles/tasks/gaea-c6/python_srw_sd.lua rename to modulefiles/tasks/gaeac6/python_srw_sd.lua diff --git a/modulefiles/tasks/gaea/run_vx.local.lua b/modulefiles/tasks/gaeac6/run_vx.local.lua similarity index 100% rename from modulefiles/tasks/gaea/run_vx.local.lua rename to modulefiles/tasks/gaeac6/run_vx.local.lua diff --git a/modulefiles/tasks/gaea-c6/smoke_dust.local.lua b/modulefiles/tasks/gaeac6/smoke_dust.local.lua similarity index 100% rename from modulefiles/tasks/gaea-c6/smoke_dust.local.lua rename to modulefiles/tasks/gaeac6/smoke_dust.local.lua diff --git a/modulefiles/tasks/noaacloud/prepstart.local.lua b/modulefiles/tasks/noaacloud/prepstart.local.lua new file mode 100644 index 0000000000..6d428532bc --- /dev/null +++ b/modulefiles/tasks/noaacloud/prepstart.local.lua @@ -0,0 +1 @@ +load("python_srw_sd") diff --git a/modulefiles/tasks/noaacloud/smoke_dust.local.lua b/modulefiles/tasks/noaacloud/smoke_dust.local.lua new file mode 100644 index 0000000000..6d428532bc --- /dev/null +++ b/modulefiles/tasks/noaacloud/smoke_dust.local.lua @@ -0,0 +1 @@ +load("python_srw_sd") diff --git a/modulefiles/wflow_gaea.lua b/modulefiles/wflow_gaeac5.lua similarity index 100% rename from modulefiles/wflow_gaea.lua rename to modulefiles/wflow_gaeac5.lua diff --git a/modulefiles/wflow_gaea-c6.lua b/modulefiles/wflow_gaeac6.lua similarity index 100% rename from modulefiles/wflow_gaea-c6.lua rename to modulefiles/wflow_gaeac6.lua diff --git a/tests/WE2E/machine_suites/comprehensive b/tests/WE2E/machine_suites/comprehensive index 8c42aa4599..bbd2de0217 100644 --- a/tests/WE2E/machine_suites/comprehensive +++ b/tests/WE2E/machine_suites/comprehensive @@ -73,6 +73,7 @@ MET_ensemble_verification_only_vx_time_lag MET_ensemble_verification_winter_wx MET_verification_only_vx pregen_grid_orog_sfc_climo +smoke_dust_grid_RRFS_CONUS_3km_suite_HRRR_gf specify_EXTRN_MDL_SYSBASEDIR_ICS_LBCS specify_template_filenames vx-det_long-fcst_custom-vx-config_aiml-fourcastnet diff --git a/tests/WE2E/machine_suites/comprehensive.gaea-c6 b/tests/WE2E/machine_suites/comprehensive.gaea-c6 deleted file mode 120000 index 18a56d9739..0000000000 --- a/tests/WE2E/machine_suites/comprehensive.gaea-c6 +++ /dev/null @@ -1 +0,0 @@ -comprehensive.orion \ No newline at end of file diff --git a/tests/WE2E/machine_suites/comprehensive.jet b/tests/WE2E/machine_suites/comprehensive.gaeac5 similarity index 75% rename from tests/WE2E/machine_suites/comprehensive.jet rename to tests/WE2E/machine_suites/comprehensive.gaeac5 index 0e15479feb..5930843582 100644 --- a/tests/WE2E/machine_suites/comprehensive.jet +++ b/tests/WE2E/machine_suites/comprehensive.gaeac5 @@ -10,7 +10,6 @@ community custom_ESGgrid custom_ESGgrid_Central_Asia_3km -custom_ESGgrid_Great_Lakes_snow_8km custom_ESGgrid_IndianOcean_6km custom_ESGgrid_NewZealand_3km custom_ESGgrid_Peru_12km @@ -18,15 +17,7 @@ custom_ESGgrid_SF_1p1km custom_GFDLgrid__GFDLgrid_USE_NUM_CELLS_IN_FILENAMES_eq_FALSE custom_GFDLgrid deactivate_tasks -#get_from_AWS_ics_GEFS_lbcs_GEFS_fmt_grib2_2022040400_ensemble_2mems -get_from_HPSS_ics_FV3GFS_lbcs_FV3GFS_fmt_grib2_2019061200 -get_from_HPSS_ics_FV3GFS_lbcs_FV3GFS_fmt_nemsio_2019061200 -get_from_HPSS_ics_FV3GFS_lbcs_FV3GFS_fmt_nemsio_2021032018 -get_from_HPSS_ics_FV3GFS_lbcs_FV3GFS_fmt_netcdf_2022060112_48h -#get_from_HPSS_ics_GDAS_lbcs_GDAS_fmt_netcdf_2022040400_ensemble_2mems -get_from_HPSS_ics_GSMGFS_lbcs_GSMGFS -get_from_HPSS_ics_HRRR_lbcs_RAP -get_from_HPSS_ics_RAP_lbcs_RAP +get_from_AWS_ics_GEFS_lbcs_GEFS_fmt_grib2_2022040400_ensemble_2mems get_from_NOMADS_ics_FV3GFS_lbcs_FV3GFS grid_CONUS_25km_GFDLgrid_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16 grid_CONUS_3km_GFDLgrid_ics_FV3GFS_lbcs_FV3GFS_suite_RRFS_v1beta @@ -46,11 +37,11 @@ grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_RAP_suite_RAP grid_RRFS_CONUS_25km_ics_GSMGFS_lbcs_GSMGFS_suite_GFS_v15p2 grid_RRFS_CONUS_25km_ics_NAM_lbcs_NAM_suite_GFS_v16 grid_RRFS_CONUS_25km_ics_NAM_lbcs_NAM_suite_RRFS_v1beta -#grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15p2 -#grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15_thompson_mynn_lam3km -#grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16 -#grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_HRRR -#grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_RRFS_v1beta +grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15p2 +grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15_thompson_mynn_lam3km +grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16 +grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_HRRR +grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_RRFS_v1beta grid_RRFS_CONUScompact_13km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16 grid_RRFS_CONUScompact_13km_ics_HRRR_lbcs_RAP_suite_HRRR grid_RRFS_CONUScompact_13km_ics_HRRR_lbcs_RAP_suite_RRFS_v1beta @@ -67,9 +58,7 @@ grid_SUBCONUS_Ind_3km_ics_HRRR_lbcs_HRRR_suite_HRRR grid_SUBCONUS_Ind_3km_ics_HRRR_lbcs_RAP_suite_WoFS_v0 grid_SUBCONUS_Ind_3km_ics_NAM_lbcs_NAM_suite_GFS_v16 grid_SUBCONUS_Ind_3km_ics_RAP_lbcs_RAP_suite_RRFS_v1beta_plot -long_fcst MET_ensemble_verification_only_vx -MET_ensemble_verification_only_vx_time_lag MET_ensemble_verification_winter_wx MET_verification_only_vx pregen_grid_orog_sfc_climo diff --git a/tests/WE2E/machine_suites/comprehensive.gaea b/tests/WE2E/machine_suites/comprehensive.gaeac6 similarity index 100% rename from tests/WE2E/machine_suites/comprehensive.gaea rename to tests/WE2E/machine_suites/comprehensive.gaeac6 diff --git a/tests/WE2E/machine_suites/comprehensive.orion b/tests/WE2E/machine_suites/comprehensive.orion index 5930843582..265506fa3e 100644 --- a/tests/WE2E/machine_suites/comprehensive.orion +++ b/tests/WE2E/machine_suites/comprehensive.orion @@ -62,5 +62,6 @@ MET_ensemble_verification_only_vx MET_ensemble_verification_winter_wx MET_verification_only_vx pregen_grid_orog_sfc_climo +smoke_dust_grid_RRFS_CONUS_3km_suite_HRRR_gf specify_EXTRN_MDL_SYSBASEDIR_ICS_LBCS specify_template_filenames diff --git a/tests/WE2E/machine_suites/coverage.gaea b/tests/WE2E/machine_suites/coverage.gaeac5 similarity index 100% rename from tests/WE2E/machine_suites/coverage.gaea rename to tests/WE2E/machine_suites/coverage.gaeac5 diff --git a/tests/WE2E/machine_suites/coverage.gaea-c6 b/tests/WE2E/machine_suites/coverage.gaeac6 similarity index 91% rename from tests/WE2E/machine_suites/coverage.gaea-c6 rename to tests/WE2E/machine_suites/coverage.gaeac6 index 970fdf4086..62813e9209 100644 --- a/tests/WE2E/machine_suites/coverage.gaea-c6 +++ b/tests/WE2E/machine_suites/coverage.gaeac6 @@ -7,5 +7,6 @@ grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15_thompson_mynn_lam3km grid_RRFS_CONUScompact_25km_ics_HRRR_lbcs_HRRR_suite_HRRR grid_RRFS_CONUScompact_3km_ics_HRRR_lbcs_RAP_suite_RRFS_v1beta grid_SUBCONUS_Ind_3km_ics_RAP_lbcs_RAP_suite_RRFS_v1beta_plot +smoke_dust_grid_RRFS_CONUS_3km_suite_HRRR_gf 2020_CAPE 2020_easter_storm diff --git a/tests/WE2E/machine_suites/coverage.jet b/tests/WE2E/machine_suites/coverage.jet deleted file mode 100644 index 8c79a0b700..0000000000 --- a/tests/WE2E/machine_suites/coverage.jet +++ /dev/null @@ -1,11 +0,0 @@ -2019_hurricane_barry -community -custom_ESGgrid -custom_ESGgrid_Great_Lakes_snow_8km -custom_GFDLgrid -get_from_HPSS_ics_FV3GFS_lbcs_FV3GFS_fmt_nemsio_2021032018 -get_from_HPSS_ics_FV3GFS_lbcs_FV3GFS_fmt_netcdf_2022060112_48h -get_from_HPSS_ics_RAP_lbcs_RAP -grid_RRFS_AK_3km_ics_FV3GFS_lbcs_FV3GFS_suite_HRRR -grid_RRFS_CONUS_13km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16_plot -grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15p2 diff --git a/tests/WE2E/setup_WE2E_tests.sh b/tests/WE2E/setup_WE2E_tests.sh index 44a229299a..42f75515dd 100755 --- a/tests/WE2E/setup_WE2E_tests.sh +++ b/tests/WE2E/setup_WE2E_tests.sh @@ -45,7 +45,7 @@ function usage { } -machines=( hera jet cheyenne derecho orion wcoss2 gaea gaea-c6 odin singularity macos noaacloud ) +machines=( hera cheyenne derecho orion wcoss2 gaeac5 gaeac6 odin singularity macos noaacloud ) if [ "$1" = "-h" ] ; then usage ; fi [[ $# -le 2 ]] && usage diff --git a/tests/build.sh b/tests/build.sh index 4aa50f40f2..c499d3b9a2 100755 --- a/tests/build.sh +++ b/tests/build.sh @@ -21,7 +21,7 @@ function usage() { exit 1 } -machines=( hera jet cheyenne derecho orion hercules wcoss2 gaea gaea-c6 odin singularity macos noaacloud ) +machines=( hera derecho orion hercules wcoss2 gaeac5 gaeac6 odin singularity macos noaacloud ) [[ $# -gt 4 ]] && usage diff --git a/ush/machine/gaea.yaml b/ush/machine/gaeac5.yaml similarity index 100% rename from ush/machine/gaea.yaml rename to ush/machine/gaeac5.yaml diff --git a/ush/machine/gaea-c6.yaml b/ush/machine/gaeac6.yaml similarity index 100% rename from ush/machine/gaea-c6.yaml rename to ush/machine/gaeac6.yaml diff --git a/ush/machine/noaacloud.yaml b/ush/machine/noaacloud.yaml index b7c9f56d31..31f799a47c 100644 --- a/ush/machine/noaacloud.yaml +++ b/ush/machine/noaacloud.yaml @@ -29,9 +29,18 @@ platform: FIXorg: /contrib/EPIC/UFS_SRW_data/develop/fix/fix_orog FIXsfc: /contrib/EPIC/UFS_SRW_data/develop/fix/fix_sfc_climo FIXshp: /contrib/EPIC/UFS_SRW_data/develop/NaturalEarth + FIXaqm: /contrib/EPIC/UFS_SRW_data/develop/fix/fix_aqm + FIXemis: /contrib/EPIC/UFS_SRW_data/develop/fix/fix_emis + FIXsmoke: /contrib/EPIC/UFS_SRW_data/develop/fix/fix_smoke + FIXupp: /contrib/EPIC/UFS_SRW_data/develop/fix/fix_upp + FIXcrtm: /contrib/EPIC/UFS_SRW_data/develop/fix/fix_crtm EXTRN_MDL_DATA_STORES: aws nomads data: ics_lbcs: FV3GFS: nemsio: /contrib/EPIC/UFS_SRW_data/develop/input_model_data/FV3GFS/nemsio/${yyyymmdd}${hh} - grib2: /contrib/EPIC/UFS_SRW_data/develop/input_model_data/FV3GFS/grib2/${yyyymmdd}${hh} + grib2: /contrib/EPIC/UFS_SRW_data/develop/input_model_data/FV3GFS/grib2/${yyyymmdd}${hh} + +smoke_dust_parm: + COMINsmoke_default: /contrib/EPIC/UFS_SRW_data/develop/data_smoke_dust/RAVE_smoke_dust + COMINrave_default: /contrib/EPIC/UFS_SRW_data/develop/data_smoke_dust/RAVE_fire diff --git a/ush/valid_param_vals.yaml b/ush/valid_param_vals.yaml index c310eb1e00..65d2368b3f 100644 --- a/ush/valid_param_vals.yaml +++ b/ush/valid_param_vals.yaml @@ -4,7 +4,7 @@ valid_vals_RUN_ENVIR: ["nco", "community"] valid_vals_VERBOSE: [True, False] valid_vals_DEBUG: [True, False] -valid_vals_MACHINE: ["HERA", "WCOSS2", "ORION", "HERCULES", "JET", "DERECHO", "LINUX", "MACOS", "NOAACLOUD", "SINGULARITY", "GAEA", "GAEA-C6"] +valid_vals_MACHINE: ["HERA", "WCOSS2", "ORION", "HERCULES", "DERECHO", "LINUX", "MACOS", "NOAACLOUD", "SINGULARITY", "GAEAC5", "GAEAC6"] valid_vals_SCHED: ["slurm", "pbspro", "lsf", "lsfcray", "none"] valid_vals_FCST_MODEL: ["ufs-weather-model"] valid_vals_WORKFLOW_MANAGER: ["rocoto", "ecflow", "none"] From 8d8ef29de3b113b8716cbcb8f0a1ac6982d4691e Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Fri, 21 Feb 2025 22:51:31 +0000 Subject: [PATCH 246/260] Fix test file maybe --- .github/workflows/python_tests.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/python_tests.yaml b/.github/workflows/python_tests.yaml index 469a0753ac..5f700918b0 100644 --- a/.github/workflows/python_tests.yaml +++ b/.github/workflows/python_tests.yaml @@ -51,7 +51,6 @@ jobs: export UNIT_TEST=True export PYTHONPATH=$(pwd)/ush # Make dummy directories for fix files so generate_FV3LAM_workflow.py tests complete successfully -# aws s3 cp --recursive --no-sign-request --no-progress s3://noaa-ufs-srw-pds/develop-20240618/fix/ /home/runner/work/ export CI_FIX_FILES=/home/runner/work/fix mkdir -p $CI_FIX_FILES/fix_am From f3f8333c124b5a7a4f1c11f5fbdc6c124afa45aa Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Fri, 21 Feb 2025 23:01:52 +0000 Subject: [PATCH 247/260] Fix sed command --- tests/test_python/test_generate_FV3LAM_wflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_python/test_generate_FV3LAM_wflow.py b/tests/test_python/test_generate_FV3LAM_wflow.py index 46d39212ea..546c33de26 100644 --- a/tests/test_python/test_generate_FV3LAM_wflow.py +++ b/tests/test_python/test_generate_FV3LAM_wflow.py @@ -55,7 +55,7 @@ def run_workflow(USHdir, logfile): ) # If running CI, point config.yaml to correct location for fix files if fix_files:=get_env_var("CI_FIX_FILES"): - run_command(f"{sed} -i '-s/\\/home\\/username\\/DATA\\/UFS\\//{fix_files}/g' "\ + run_command(f"{sed} -i ''s|/home/username/DATA/UFS|{fix_files}|g "\ f"{USHdir}/config.yaml") run_workflow(USHdir, logfile) From 0b92d4957e0fa8166297a5bc2fe23219d74b0225 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Fri, 21 Feb 2025 23:08:28 +0000 Subject: [PATCH 248/260] ACTUALLY fix sed command --- tests/test_python/test_generate_FV3LAM_wflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_python/test_generate_FV3LAM_wflow.py b/tests/test_python/test_generate_FV3LAM_wflow.py index 546c33de26..75f7a0f105 100644 --- a/tests/test_python/test_generate_FV3LAM_wflow.py +++ b/tests/test_python/test_generate_FV3LAM_wflow.py @@ -55,7 +55,7 @@ def run_workflow(USHdir, logfile): ) # If running CI, point config.yaml to correct location for fix files if fix_files:=get_env_var("CI_FIX_FILES"): - run_command(f"{sed} -i ''s|/home/username/DATA/UFS|{fix_files}|g "\ + run_command(f"{sed} -i 's|/home/username/DATA/UFS|{fix_files}|g' "\ f"{USHdir}/config.yaml") run_workflow(USHdir, logfile) From e8705dcdd21a269ce1f6072a5244acfafe38d5af Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Sat, 22 Feb 2025 00:33:44 +0000 Subject: [PATCH 249/260] More test debugging --- tests/test_python/test_generate_FV3LAM_wflow.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tests/test_python/test_generate_FV3LAM_wflow.py b/tests/test_python/test_generate_FV3LAM_wflow.py index 75f7a0f105..77183e2e2d 100644 --- a/tests/test_python/test_generate_FV3LAM_wflow.py +++ b/tests/test_python/test_generate_FV3LAM_wflow.py @@ -55,8 +55,21 @@ def run_workflow(USHdir, logfile): ) # If running CI, point config.yaml to correct location for fix files if fix_files:=get_env_var("CI_FIX_FILES"): - run_command(f"{sed} -i 's|/home/username/DATA/UFS|{fix_files}|g' "\ - f"{USHdir}/config.yaml") + print(f"{fix_files=}") + config_file=f"{USHdir}/config.yaml" + sed_command=f"{sed} -i 's|/home/username/DATA/UFS|{fix_files}|g' "\ + f"{USHdir}/config.yaml" + print(f"{fix_files=}") + print(f"{config_file} contents before sed:") + with open(config_file, "r", encoding="utf-8") as file: + contents = file.read() + print(contents) + run_command(sed_command) + print(f"{config_file} contents after sed:") + with open(config_file, "r", encoding="utf-8") as file: + contents = file.read() + print(contents) + run_workflow(USHdir, logfile) def setUp(self): From e287312c6950605882be29090c66dc5cd28e400b Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Sat, 22 Feb 2025 00:41:51 +0000 Subject: [PATCH 250/260] another fix attempt! --- .github/workflows/python_tests.yaml | 2 +- tests/test_python/test_generate_FV3LAM_wflow.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/python_tests.yaml b/.github/workflows/python_tests.yaml index 5f700918b0..7d60a8f314 100644 --- a/.github/workflows/python_tests.yaml +++ b/.github/workflows/python_tests.yaml @@ -54,7 +54,7 @@ jobs: export CI_FIX_FILES=/home/runner/work/fix mkdir -p $CI_FIX_FILES/fix_am - python -m unittest tests/test_python/*.py + python -m unittest tests/test_python/test_generate_FV3LAM_wflow.py - name: Run python functional tests run: | diff --git a/tests/test_python/test_generate_FV3LAM_wflow.py b/tests/test_python/test_generate_FV3LAM_wflow.py index 77183e2e2d..d168a3300b 100644 --- a/tests/test_python/test_generate_FV3LAM_wflow.py +++ b/tests/test_python/test_generate_FV3LAM_wflow.py @@ -27,7 +27,7 @@ def test_generate_FV3LAM_wflow(self): # run workflows in separate process to avoid conflict between community and nco settings def run_workflow(USHdir, logfile): - p = Process(target=generate_FV3LAM_wflow, args=(USHdir,"config.yaml",logfile,True)) + p = Process(target=generate_FV3LAM_wflow, args=(USHdir,"config.yaml",logfile)) p.start() p.join() exit_code = p.exitcode @@ -58,7 +58,7 @@ def run_workflow(USHdir, logfile): print(f"{fix_files=}") config_file=f"{USHdir}/config.yaml" sed_command=f"{sed} -i 's|/home/username/DATA/UFS|{fix_files}|g' "\ - f"{USHdir}/config.yaml" + f"{USHdir}/machine/linux.yaml" print(f"{fix_files=}") print(f"{config_file} contents before sed:") with open(config_file, "r", encoding="utf-8") as file: From b3d753a464acc0488005016610f064e90926b741 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Sat, 22 Feb 2025 00:58:37 +0000 Subject: [PATCH 251/260] Another attempt --- tests/test_python/test_generate_FV3LAM_wflow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_python/test_generate_FV3LAM_wflow.py b/tests/test_python/test_generate_FV3LAM_wflow.py index d168a3300b..896f230a37 100644 --- a/tests/test_python/test_generate_FV3LAM_wflow.py +++ b/tests/test_python/test_generate_FV3LAM_wflow.py @@ -56,9 +56,9 @@ def run_workflow(USHdir, logfile): # If running CI, point config.yaml to correct location for fix files if fix_files:=get_env_var("CI_FIX_FILES"): print(f"{fix_files=}") - config_file=f"{USHdir}/config.yaml" + config_file=f"{USHdir}/machine/linux.yaml" sed_command=f"{sed} -i 's|/home/username/DATA/UFS|{fix_files}|g' "\ - f"{USHdir}/machine/linux.yaml" + f"{config_file}" print(f"{fix_files=}") print(f"{config_file} contents before sed:") with open(config_file, "r", encoding="utf-8") as file: From 0fb7c8f342f9a5f8324c257a42e1f7a25edb462f Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Sat, 22 Feb 2025 01:01:43 +0000 Subject: [PATCH 252/260] Finally working????? --- .github/workflows/python_tests.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python_tests.yaml b/.github/workflows/python_tests.yaml index 7d60a8f314..e5670ae1c3 100644 --- a/.github/workflows/python_tests.yaml +++ b/.github/workflows/python_tests.yaml @@ -51,8 +51,8 @@ jobs: export UNIT_TEST=True export PYTHONPATH=$(pwd)/ush # Make dummy directories for fix files so generate_FV3LAM_workflow.py tests complete successfully - export CI_FIX_FILES=/home/runner/work/fix - mkdir -p $CI_FIX_FILES/fix_am + export CI_FIX_FILES=/home/runner/work + mkdir -p $CI_FIX_FILES/fix/fix_am python -m unittest tests/test_python/test_generate_FV3LAM_wflow.py From e717e918d944b79cef5d712cd2f7e88ca3d2dae2 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Sat, 22 Feb 2025 01:05:08 +0000 Subject: [PATCH 253/260] Unit test finally fixed! --- .github/workflows/python_tests.yaml | 2 +- tests/test_python/test_generate_FV3LAM_wflow.py | 14 ++------------ 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/.github/workflows/python_tests.yaml b/.github/workflows/python_tests.yaml index e5670ae1c3..2f4255727d 100644 --- a/.github/workflows/python_tests.yaml +++ b/.github/workflows/python_tests.yaml @@ -54,7 +54,7 @@ jobs: export CI_FIX_FILES=/home/runner/work mkdir -p $CI_FIX_FILES/fix/fix_am - python -m unittest tests/test_python/test_generate_FV3LAM_wflow.py + python -m unittest tests/test_python/*.py - name: Run python functional tests run: | diff --git a/tests/test_python/test_generate_FV3LAM_wflow.py b/tests/test_python/test_generate_FV3LAM_wflow.py index 896f230a37..7aae34b602 100644 --- a/tests/test_python/test_generate_FV3LAM_wflow.py +++ b/tests/test_python/test_generate_FV3LAM_wflow.py @@ -55,20 +55,10 @@ def run_workflow(USHdir, logfile): ) # If running CI, point config.yaml to correct location for fix files if fix_files:=get_env_var("CI_FIX_FILES"): - print(f"{fix_files=}") - config_file=f"{USHdir}/machine/linux.yaml" + machine_file=f"{USHdir}/machine/linux.yaml" sed_command=f"{sed} -i 's|/home/username/DATA/UFS|{fix_files}|g' "\ - f"{config_file}" - print(f"{fix_files=}") - print(f"{config_file} contents before sed:") - with open(config_file, "r", encoding="utf-8") as file: - contents = file.read() - print(contents) + f"{machine_file}" run_command(sed_command) - print(f"{config_file} contents after sed:") - with open(config_file, "r", encoding="utf-8") as file: - contents = file.read() - print(contents) run_workflow(USHdir, logfile) From 644dbda46e4407ee959d1e9105b01847013b1a0b Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Sat, 22 Feb 2025 01:21:11 +0000 Subject: [PATCH 254/260] Revert inadvertant change to Gaea machine file --- ush/machine/gaeac5.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ush/machine/gaeac5.yaml b/ush/machine/gaeac5.yaml index e5a0b0386c..2dcad8dc58 100644 --- a/ush/machine/gaeac5.yaml +++ b/ush/machine/gaeac5.yaml @@ -2,7 +2,7 @@ platform: WORKFLOW_MANAGER: rocoto NCORES_PER_NODE: 128 SCHED: slurm - WE2E_TEST_DATA: /gpfs/f5/epic/world-shared/UFS_SRW_data/develop + WE2E_TEST_DATA: /gpfs/f5/epic/world-shared/UFS_SRW_data/develop/ TEST_CCPA_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/ccpa/proc' TEST_MRMS_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/mrms/proc' TEST_NDAS_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/ndas/proc' From 489428cb17b2cc02d085dcf7f2dc700568485278 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Mon, 24 Feb 2025 23:33:39 +0000 Subject: [PATCH 255/260] Missed gaea updates --- modulefiles/tasks/gaeac5/run_vx.local.lua | 4 ++-- ush/machine/gaeac6.yaml | 12 +++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/modulefiles/tasks/gaeac5/run_vx.local.lua b/modulefiles/tasks/gaeac5/run_vx.local.lua index be96db4bab..54a185ec20 100644 --- a/modulefiles/tasks/gaeac5/run_vx.local.lua +++ b/modulefiles/tasks/gaeac5/run_vx.local.lua @@ -1,8 +1,8 @@ --[[ Compiler-specific modules are used for met and metplus libraries --]] -local met_ver = (os.getenv("met_ver") or "11.1.0") -local metplus_ver = (os.getenv("metplus_ver") or "5.1.0") +local met_ver = (os.getenv("met_ver") or "12.0.1") +local metplus_ver = (os.getenv("metplus_ver") or "6.0.0") if (mode() == "load") then load(pathJoin("met", met_ver)) load(pathJoin("metplus",metplus_ver)) diff --git a/ush/machine/gaeac6.yaml b/ush/machine/gaeac6.yaml index 1f293adcb8..ffdcc2fc18 100644 --- a/ush/machine/gaeac6.yaml +++ b/ush/machine/gaeac6.yaml @@ -3,11 +3,13 @@ platform: NCORES_PER_NODE: 128 SCHED: slurm WE2E_TEST_DATA: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop - TEST_CCPA_OBS_DIR: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/obs_data/ccpa/proc - TEST_MRMS_OBS_DIR: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/obs_data/mrms/proc - TEST_NDAS_OBS_DIR: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/obs_data/ndas/proc - TEST_NOHRSC_OBS_DIR: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/obs_data/nohrsc/proc - DOMAIN_PREGEN_BASEDIR: /gpfs/f6/bil-fire8/world-shared/UFS_SRW_data/develop/FV3LAM_pregen + TEST_CCPA_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/ccpa/proc' + TEST_MRMS_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/mrms/proc' + TEST_NDAS_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/ndas/proc' + TEST_NOHRSC_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/nohrsc/proc' + TEST_AERONET_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/aeronet' + TEST_AIRNOW_OBS_DIR: '{{ platform.WE2E_TEST_DATA }}/obs_data/airnow' + DOMAIN_PREGEN_BASEDIR: '{{ platform.WE2E_TEST_DATA }}/FV3LAM_pregen' QUEUE_DEFAULT: normal QUEUE_FCST: normal QUEUE_HPSS: normal From 0990e000cf730c4ae3bf1475a66895ae6a0b6c5b Mon Sep 17 00:00:00 2001 From: Michael Kavulich Date: Thu, 27 Feb 2025 11:31:11 -0700 Subject: [PATCH 256/260] Apply suggestions from Christina's code review Co-authored-by: Christina Holt <56881914+christinaholtNOAA@users.noreply.github.com> --- doc/UsersGuide/BuildingRunningTesting/RunSRW.rst | 2 +- doc/UsersGuide/Reference/Glossary.rst | 4 ++-- parm/wflow/verify_det.yaml | 2 +- ush/config_defaults.yaml | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index b93ebd0a28..250f7d3d6d 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -867,7 +867,7 @@ that attempt is successful, the workflow will move on to subsequent tasks. Thus staging locations and file names. .. note:: - AIRNOW observations can be retrieved from AWS in addition to HPSS, but this requires changing some default settings. + AIRNOW observations can be retrieved from AWS or HPSS, but retrieving from AWS requires changing some default settings. See ``ush/config_defaults.yaml`` or :numref:`Section %s ` for more details. * Users who have access to a data store that hosts the necessary files (e.g. NOAA HPSS) do not need to diff --git a/doc/UsersGuide/Reference/Glossary.rst b/doc/UsersGuide/Reference/Glossary.rst index efcc47cc7b..97f3f214c4 100644 --- a/doc/UsersGuide/Reference/Glossary.rst +++ b/doc/UsersGuide/Reference/Glossary.rst @@ -13,7 +13,7 @@ Glossary According to the American Meteorological Society (AMS) definition, `advection `_ is "The process of transport of an atmospheric property solely by the mass motion (velocity field) of the atmosphere." In common parlance, advection is movement of atmospheric substances that are carried around by the wind. AERONET - The "`AErosol RObotic NETwork `_": A worldwide ground-based remote sensing aerosol networks established by NASA and PHOTONS. The SRW verification tasks can use "Level 1.5" (cloud-screened and quality-controlled) aerosol optical depth observations. + The "`AErosol RObotic NETwork `_": A worldwide ground-based remote sensing aerosol network established by NASA and PHOTONS. The SRW verification tasks can use "Level 1.5" (cloud-screened and quality-controlled) aerosol optical depth observations. AIRNOW A North American ground-level air quality measurement network. The SRW verification tasks can use PM2.5 and PM10 observations. More information available at https://www.airnow.gov/ @@ -166,7 +166,7 @@ Glossary MET METplus - The `Model Evaluation Tools `__ is a highly-configurable, state-of-the-art suite of verification tools developed at the :term:`DTC`. `METplus `_ is a suite of python wrappers providing low-level automation of the MET tools. + The `Model Evaluation Tools `__ is a highly configurable, state-of-the-art suite of verification tools developed at the :term:`DTC`. `METplus `_ is a suite of Python wrappers providing low-level automation of the MET tools. MPI MPI stands for Message Passing Interface. An MPI is a standardized communication system used in parallel programming. It establishes portable and efficient syntax for the exchange of messages and data between multiple processors that are used by a single computer program. An MPI is required for high-performance computing (HPC) systems. diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index d276fd81a9..c25d0f8b00 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -141,7 +141,7 @@ metatask_PointStat: FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["SFC", "UPA", "AOD", "PM25", "PM10"] %}{{ "%s " % var }}{% endif %}{% endfor %}' METAOBTYPE: '{% for var in verification.VX_FIELD_GROUPS %}{% if var =="SFC" or var =="UPA" %}NDAS {% elif var =="AOD" %}AERONET {% elif var =="PM25" or var =="PM10" %}AIRNOW {% endif %}{% endfor %}' METAOBS_DIR: '{% for var in verification.VX_FIELD_GROUPS %}{% if var =="SFC" or var =="UPA" %}&NDAS_OBS_DIR; {% elif var =="AOD" %}&AERONET_OBS_DIR; {% elif var =="PM25" or var =="PM10" %}&AIRNOW_OBS_DIR; {% endif %}{% endfor %}' - METAOB_AVAIL_HRS: '{% for var in verification.VX_FIELD_GROUPS %}{% if var =="SFC" or var =="UPA" %} {{- verification.NDAS_OBS_AVAIL_INTVL_HRS }} {% elif var =="AOD" %}{{- verification.AERONET_OBS_AVAIL_INTVL_HRS }} {% elif var =="PM25" or var =="PM10" %}{{- verification.AIRNOW_OBS_AVAIL_INTVL_HRS }} {% endif %}{% endfor %}' + METAOB_AVAIL_HRS: '{% for var in verification.VX_FIELD_GROUPS %}{% if var =="SFC" or var =="UPA" %} {{ verification.NDAS_OBS_AVAIL_INTVL_HRS }} {% elif var =="AOD" %}{{ verification.AERONET_OBS_AVAIL_INTVL_HRS }} {% elif var =="PM25" or var =="PM10" %}{{ verification.AIRNOW_OBS_AVAIL_INTVL_HRS }} {% endif %}{% endfor %}' metatask_PointStat_#FIELD_GROUP#_all_mems: var: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index 423f63eee8..f43a6af34c 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -2413,7 +2413,7 @@ verification: # METPLUS_VERBOSITY_LEVEL: # Logging verbosity level used by METplus verification tools. 0 to 9, # with 0 having the fewest log messages and 9 having the most. Levels 5 - # and above can result in very large log files and slower tool execution.. + # and above can result in very large log files and slower tool execution. # METPLUS_VERBOSITY_LEVEL: 2 # From cfefb8ac5b8dab7116ecf4e437d0cb011d3285bd Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Thu, 27 Feb 2025 20:11:33 +0000 Subject: [PATCH 257/260] Final round of suggested changes from reviewers --- jobs/JREGIONAL_RUN_MET_ASCII2NC_OBS | 22 ++++++++++++++++++++-- ush/generate_FV3LAM_wflow.py | 2 +- ush/setup.py | 8 +------- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/jobs/JREGIONAL_RUN_MET_ASCII2NC_OBS b/jobs/JREGIONAL_RUN_MET_ASCII2NC_OBS index 9eb445bf84..45c63baa33 100755 --- a/jobs/JREGIONAL_RUN_MET_ASCII2NC_OBS +++ b/jobs/JREGIONAL_RUN_MET_ASCII2NC_OBS @@ -1,5 +1,25 @@ #!/usr/bin/env bash +# +#----------------------------------------------------------------------- +# +# +# The J-Job that runs METplus for point-stat by initialization time for +# all forecast hours. +# +# Run-time environment variables: +# +# GLOBAL_VAR_DEFNS_FP +# +# Experiment variables +# +# user: +# SCRIPTSdir +# USHdir +# +#----------------------------------------------------------------------- +# + # #----------------------------------------------------------------------- # @@ -10,8 +30,6 @@ . $USHdir/source_util_funcs.sh sections=( user - nco - workflow ) for sect in ${sections[*]} ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} diff --git a/ush/generate_FV3LAM_wflow.py b/ush/generate_FV3LAM_wflow.py index 13ae7996e0..6ca6e05c79 100755 --- a/ush/generate_FV3LAM_wflow.py +++ b/ush/generate_FV3LAM_wflow.py @@ -326,7 +326,7 @@ def generate_FV3LAM_wflow( # dictionaries, instead. # pylint: disable=undefined-variable import_vars(dictionary=flatten_dict(expt_config)) - export_vars(source_dict=flatten_dict(expt_config)) + export_vars(source_dict=expt_config["global"]) settings = {} settings["gfs_physics_nml"] = { "do_shum": DO_SHUM, diff --git a/ush/setup.py b/ush/setup.py index cb60e26048..a756778e84 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -800,15 +800,9 @@ def _remove_tag(tasks, tag): vx_field_groups_all_by_obtype["AIRNOW"] = ["PM25", "PM10"] vx_metatasks_all_by_obtype["AIRNOW"] \ = ["task_get_obs_airnow", - "metatask_ASCII2nc_obs" + "metatask_ASCII2nc_obs", "metatask_PcpCombine_fcst_PM_all_mems"] - vx_field_groups_all_by_obtype["AERONET"] = ["AOD"] - vx_metatasks_all_by_obtype["AERONET"] = ["task_get_obs_aeronet","metatask_ASCII2nc_obs"] - - vx_field_groups_all_by_obtype["AIRNOW"] = ["PM25","PM10"] - vx_metatasks_all_by_obtype["AIRNOW"] = ["task_get_obs_airnow","metatask_ASCII2nc_obs","metatask_PcpCombine_fcst_PM_all_mems"] - # If there are no field groups specified for verification, remove those # tasks that are common to all observation types. vx_field_groups = vx_config["VX_FIELD_GROUPS"] From 802dd8a241558244b4206aa116fe13feda80110b Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Fri, 28 Feb 2025 15:50:41 +0000 Subject: [PATCH 258/260] Fix skill-score test --- parm/metplus/STATAnalysisConfig_skill_score | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parm/metplus/STATAnalysisConfig_skill_score b/parm/metplus/STATAnalysisConfig_skill_score index 2aa0f97df4..2d08ff44c1 100644 --- a/parm/metplus/STATAnalysisConfig_skill_score +++ b/parm/metplus/STATAnalysisConfig_skill_score @@ -148,6 +148,6 @@ hss_ec_value = NA; rank_corr_flag = FALSE; vif_flag = FALSE; tmp_dir = "/tmp"; -version = "V11.1.0"; +version = "V12.0.1"; //////////////////////////////////////////////////////////////////////////////// From 8322306ec6167aaae2267bd9a7d83e40ea3eba84 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Fri, 28 Feb 2025 18:32:29 +0000 Subject: [PATCH 259/260] Fix bad merge that omitted update of external_ic_nml --- ush/generate_FV3LAM_wflow.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ush/generate_FV3LAM_wflow.py b/ush/generate_FV3LAM_wflow.py index 6ca6e05c79..16a1b7dd58 100755 --- a/ush/generate_FV3LAM_wflow.py +++ b/ush/generate_FV3LAM_wflow.py @@ -721,6 +721,10 @@ def setup_fv3_namelist(expt_config,debug): settings["gfs_physics_nml"] = gfs_physics_nml_dict + # Update levp in external_ic_nml; this should be the only variable that needs changing + + settings["external_ic_nml"] = {"levp": LEVP} + # # Add to "settings" the values of those namelist variables that specify # the paths to fixed files in the FIXam directory. As above, these namelist From 6fdc2a054dabf7d9803dcf8bb49eb70232d5d8a9 Mon Sep 17 00:00:00 2001 From: "Michael Kavulich, Jr." Date: Fri, 28 Feb 2025 21:09:36 +0000 Subject: [PATCH 260/260] Another fix of bad merge --- ush/generate_FV3LAM_wflow.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/ush/generate_FV3LAM_wflow.py b/ush/generate_FV3LAM_wflow.py index 16a1b7dd58..c6f289c5ee 100755 --- a/ush/generate_FV3LAM_wflow.py +++ b/ush/generate_FV3LAM_wflow.py @@ -643,6 +643,7 @@ def setup_fv3_namelist(expt_config,debug): "npy": NY + 1, "layout": [LAYOUT_X, LAYOUT_Y], "bc_update_interval": LBC_SPEC_INTVL_HRS, + "npz": LEVP - 1, }) if CCPP_PHYS_SUITE == "FV3_GFS_v15p2": if CPL_AQM: @@ -721,10 +722,6 @@ def setup_fv3_namelist(expt_config,debug): settings["gfs_physics_nml"] = gfs_physics_nml_dict - # Update levp in external_ic_nml; this should be the only variable that needs changing - - settings["external_ic_nml"] = {"levp": LEVP} - # # Add to "settings" the values of those namelist variables that specify # the paths to fixed files in the FIXam directory. As above, these namelist