add rule to build dsp pars for SiPM data

legend-exp · Feb 13, 2025 · 3a845df · 3a845df
1 parent 9cf117e
commit 3a845df
Show file tree

Hide file tree

Showing 6 changed files with 145 additions and 9 deletions.
diff --git a/dataflow-config.yaml b/dataflow-config.yaml
@@ -15,8 +15,8 @@ paths:
 
   tier: $_/generated/tier
   tier_daq: $_/generated/tier/daq
-  tier_raw: $_/generated/tier/raw
-  tier_tcm: $_/generated/tier/tcm
+  tier_raw: /data1/shared/l200-p13/prodenv/prod-orig/ref-raw/generated/tier/raw
+  tier_tcm: /data1/shared/l200-p13/prodenv/prod-orig/ref-raw/generated/tier/tcm
   tier_dsp: $_/generated/tier/dsp
   tier_hit: $_/generated/tier/hit
   tier_ann: $_/generated/tier/ann

diff --git a/pyproject.toml b/pyproject.toml
@@ -116,6 +116,7 @@ par-geds-psp-average    = "legenddataflow.scripts.par.geds.psp.average:par_geds_
 par-geds-raw-blindcal   = "legenddataflow.scripts.par.geds.raw.blindcal:par_geds_raw_blindcal"
 par-geds-raw-blindcheck = "legenddataflow.scripts.par.geds.raw.blindcheck:par_geds_raw_blindcheck"
 par-geds-tcm-pulser     = "legenddataflow.scripts.par.geds.tcm.pulser:par_geds_tcm_pulser"
+par-spms-dsp-trg-thr    = "legenddataflow.scripts.par.spms.dsp.trigger_threshold:par_spms_trg_thr"
 
 [tool.uv.workspace]
 exclude = ["generated", "inputs", "software", "workflow"]

diff --git a/workflow/rules/dsp_pars_spms.smk b/workflow/rules/dsp_pars_spms.smk
@@ -0,0 +1,38 @@
+"""DSP parameter generation for SiPM data"""
+
+from pathlib import Path
+
+from legenddataflow import patterns as patt
+from legenddataflow import utils, execenv_pyexe
+
+
+rule build_pars_dsp_tau_spms:
+    input:
+        filelist=Path(utils.filelist_path(config)) / "all-{experiment}-{period}-{run}-{datatype}-raw.filelist"
+        pardb=lambda wildcards: get_overwrite_file("dsp", wildcards)
+    params:
+        timestamp="{timestamp}",
+        datatype="{datatype}",
+        channel="{channel}",
+        raw_table_name=lambda wildcards: get_table_name(
+            metadata, config, wildcards.datatype, wildcards.timestamp, wildcards.channel, "raw"
+        ),
+    wildcard_constraints:
+        datatype=r"\b(?!cal\b|xtc\b)\w+\b"
+    output:
+        temp(patt.get_pattern_pars_tmp_channel(config, "dsp", "spms_trigger_threshold")),
+    log:
+        patt.get_pattern_log_channel(config, "spms_trigger_threshold", time),
+    group:
+        "par-dsp"
+    shell:
+        execenv_pyexe(config, "par-spms-dsp-trg-thr") + \
+        "--config-path {configs} "
+        "--raw-files {input.filelist} "
+        "--dsp-db {input.pardb} "
+        "--datatype {params.datatype} "
+        "--timestamp {params.timestamp} "
+        "--sipm-name {params.channel} "
+        "--raw-table-name {params.raw_table_name} "
+        "--output-file {output} "
+        "--logfile {log} "
diff --git a/workflow/src/legenddataflow/cfgtools.py b/workflow/src/legenddataflow/cfgtools.py
@@ -0,0 +1,12 @@
+from typing import Mapping
+
+
+def get_channel_config(
+    mapping: Mapping, channel: str, default_key: str = "__default__"
+):
+    """Get channel key from mapping with default.
+
+    Returns the value at key `channel`, if existing, otherwise return value at
+    `default_key`.
+    """
+    return mapping.get(channel, mapping[default_key])
diff --git a/workflow/src/legenddataflow/scripts/par/spms/dsp/trigger_threshold.py b/workflow/src/legenddataflow/scripts/par/spms/dsp/trigger_threshold.py
@@ -0,0 +1,92 @@
+import argparse
+from pathlib import Path
+
+import hist
+import numpy as np
+from dbetto import Props, TextDB, utils
+from dspeed import run_one_dsp
+from lgdo import lh5
+
+from ..... import cfgtools
+from .....log import build_log
+
+
+def par_spms_dsp_trig_thr() -> None:
+    # CLI interface
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument("--raw-files", nargs="*")
+    argparser.add_argument("--raw-table-name", required=True)
+    argparser.add_argument("--output-file", required=True)
+    argparser.add_argument("--config-path", required=True)
+    argparser.add_argument("--datatype", required=True)
+    argparser.add_argument("--timestamp", required=True)
+    argparser.add_argument("--sipm-name", required=True)
+    argparser.add_argument("--dsp-db")
+    argparser.add_argument("--logfile")
+    args = argparser.parse_args()
+
+    # dataflow configs
+    df_configs = TextDB(args.config_path, lazy=True).on(
+        args.timestamp, system=args.datatype
+    )
+
+    # setup logging
+    log = build_log(df_configs, args.logfile)
+
+    log.debug("reading in the configuration files")
+    config = df_configs.snakemake_rules.pars_spms_dsp_trg_thr.inputs
+    dsp_config = utils.load_dict(
+        cfgtools.get_channel_config(config.processing_chain, args.sipm_name)
+    )
+    settings = utils.load_dict(
+        cfgtools.get_channel_config(config.settings, args.sipm_name)
+    )
+
+    # read raw file list
+    log.debug("reading in the raw waveforms")
+    with Path(args.raw_files[0]).open() as f:
+        input_file = f.read().splitlines()
+
+    data = lh5.read(
+        args.raw_table_name,
+        input_file,
+        field_mask=["waveform"],
+        n_rows=settings.n_events,
+    )
+
+    # get DSP database from overrides
+    _db_dict = {}
+    if args.dsp_db is not None:
+        _db_dict = args.dsp_db.get(args.sipm_name, {})
+
+    # run the DSP with the provided configuration
+    log.debug("running the DSP chain")
+    dsp_output = run_one_dsp(data, dsp_config, db_dict=_db_dict)
+
+    log.debug("analyzing DSP outputs")
+    # get output of the "curr" processor
+    curr = dsp_output.curr.values.view_as("np").flatten()
+    # determine a cutoff for the histogram used to extract the FWHM
+    low_cutoff, high_cutoff = np.quantile(curr, [0.005, 0.995])
+
+    # make histogram of the curr values
+    h = (
+        hist.new.Regular(settings.n_baseline_bins, low_cutoff, high_cutoff)
+        .Double()
+        .fill(curr)
+    )
+
+    # determine FWHM
+    counts = h.view()
+    idx_over_half = np.where(counts >= np.max(counts) / 2)[0]
+
+    edges = h.axes[0].edges
+    fwhm = edges[idx_over_half[-1]] - edges[idx_over_half[0]]
+
+    if fwhm <= 0:
+        msg = "determined FWHM of baseline derivative distribution is zero or negative"
+        raise RuntimeError(msg)
+
+    log.debug("writing out baseline_curr_fwhm = {fwhm}")
+    Path(args.output_file).parent.mkdir(parents=True, exist_ok=True)
+    Props.write_to(args.output_file, {"baseline_curr_fwhm": fwhm})
diff --git a/workflow/src/legenddataflow/utils.py b/workflow/src/legenddataflow/utils.py
@@ -11,13 +11,6 @@
 import string
 from pathlib import Path
 
-# from dateutil import parser
-
-# For testing/debugging, use
-# from scripts.utils import *
-# import snakemake as smk
-# setup = smk.load_configfile("config.json")["setups"]["l200"]
-
 
 def sandbox_path(setup):
     if "sandbox_path" in setup["paths"]: