From 65186c36301adb395fa62f81fa85ad8bddf06abb Mon Sep 17 00:00:00 2001 From: tgalvin Date: Thu, 9 May 2024 09:59:44 +0800 Subject: [PATCH 01/10] toy --- AegeanTools/BANE.py | 99 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 96 insertions(+), 3 deletions(-) diff --git a/AegeanTools/BANE.py b/AegeanTools/BANE.py index e81f1a4e..970e8fa8 100755 --- a/AegeanTools/BANE.py +++ b/AegeanTools/BANE.py @@ -5,6 +5,9 @@ The function filter_image should be imported from elsewhere and run as is. """ +import warnings +from typing import Tuple + import copy import logging import multiprocessing @@ -28,7 +31,6 @@ barrier = None memory_id = None - def init(b, mem): """ Set the global barrier and memory_id @@ -38,6 +40,62 @@ def init(b, mem): memory_id = mem +def median_clip(data): + return np.median(np.abs(data - np.median(data))) + +def rms_estimate( + data: np.ndarray, + mode: str = "mad", + clip_rounds: int = 2, + bin_perc: float = 0.25, + outlier_thres: float = 3.0, + nan_check: bool = True, +) -> Tuple[float,float]: + + if bin_perc > 1.0: + bin_perc /= 100.0 + + if mode == "std": + clipping_func = np.std + + elif mode == "mad": + clipping_func = median_clip + + else: + raise ValueError( + f"{mode} not supported as a clipping mode, available modes are `std` and `mad`. " + ) + + if nan_check: + data = data[np.isfinite(data)] + + cen_func = np.median + + for i in range(clip_rounds): + bkg = cen_func(data) + data = data[np.abs(data - bkg) < outlier_thres * clipping_func(data)] + + # Attempts to ensure a sane number of bins to fit against + mask_counts = 0 + loop = 1 + while mask_counts < 5 and loop < 5: + counts, binedges = np.histogram(data, bins=50 * loop) + binc = (binedges[:-1] + binedges[1:]) / 2 + + mask = counts >= bin_perc * np.max(counts) + mask_counts = np.sum(mask) + loop += 1 + + p = np.polyfit(binc[mask], np.log10(counts[mask] / np.max(counts)), 2) + a, b, c = p + + x1 = (-b + np.sqrt(b ** 2 - 4.0 * a * (c - np.log10(0.5)))) / (2.0 * a) + x2 = (-b - np.sqrt(b ** 2 - 4.0 * a * (c - np.log10(0.5)))) / (2.0 * a) + fwhm = np.abs(x1 - x2) + noise = fwhm / 2.355 + + return bkg, noise + def sigmaclip(arr, lo, hi, reps=10): """ Perform sigma clipping on an array, ignoring non finite values. @@ -72,6 +130,16 @@ def sigmaclip(arr, lo, hi, reps=10): """ clipped = np.array(arr)[np.isfinite(arr)] + # with np.errstate(all='ignore'): + + # bkg, rms = rms_estimate(data=clipped) + + # # if np.isnan(rms): + # # print(rms) + # # print(clipped) + + # return bkg, rms + if len(clipped) < 1: return np.nan, np.nan @@ -260,10 +328,32 @@ def box(r, c): for i, row in enumerate(rows): for j, col in enumerate(cols): + attempt = 0 + enlarge = 0 + rms = np.nan + r_min, r_max, c_min, c_max = box(row, col) new = data[r_min:r_max, c_min:c_max] new = np.ravel(new) _, rms = sigmaclip(new, 3, 3) + + # while attempt < 10 and np.isnan(rms): + # r_min, r_max, c_min, c_max = box(row, col) + # r_min -= enlarge + # r_max += enlarge + # c_min -=enlarge + # c_max += enlarge + + # new = data[r_min:r_max, c_min:c_max] + # new = np.ravel(new) + # _, rms = sigmaclip(new, 3, 3) + + # attempt += 1 + # enlarge = 50 * attempt + + # if np.isnan(rms): + # logging.info(f"Enlarging the box {enlarge}") + vals[i, j] = rms logging.debug("Interpolating rms to sharemem") @@ -365,7 +455,7 @@ def filter_mc_sharemem(filename, step_size, box_size, cores, shape, exit = False try: global memory_id - memory_id = str(uuid.uuid4()) + memory_id = str(uuid.uuid4())[:15] nbytes = np.prod(shape) * np.float64(1).nbytes ibkg = SharedMemory(name=f'ibkg_{memory_id}', create=True, size=nbytes) irms = SharedMemory(name=f'irms_{memory_id}', create=True, size=nbytes) @@ -393,7 +483,10 @@ def filter_mc_sharemem(filename, step_size, box_size, cores, shape, bkg = np.ndarray(shape, buffer=ibkg.buf, dtype=np.float64).astype(np.float32) rms = np.ndarray(shape, buffer=irms.buf, - dtype=np.float64).astype(np.float32) + dtype=np.float64).astype(np.float32) + + except Exception as e: + print(e) finally: ibkg.close() ibkg.unlink() From 77f3251cbed2f6324eed752216880e70627654ae Mon Sep 17 00:00:00 2001 From: tgalvin Date: Thu, 9 May 2024 15:51:38 +0800 Subject: [PATCH 02/10] initial toying around with modes --- AegeanTools/BANE.py | 284 +++++++++++++++------------------------- AegeanTools/CLI/BANE.py | 11 +- AegeanTools/sigma.py | 159 ++++++++++++++++++++++ 3 files changed, 277 insertions(+), 177 deletions(-) create mode 100644 AegeanTools/sigma.py diff --git a/AegeanTools/BANE.py b/AegeanTools/BANE.py index 970e8fa8..271309f9 100755 --- a/AegeanTools/BANE.py +++ b/AegeanTools/BANE.py @@ -5,8 +5,7 @@ The function filter_image should be imported from elsewhere and run as is. """ -import warnings -from typing import Tuple +from typing import Tuple, Union, TypeAlias, Dict, Any import copy import logging @@ -21,12 +20,24 @@ from astropy.io import fits from scipy.interpolate import RegularGridInterpolator -from .fits_tools import compress +from .fits_tools import compress +from .sigma import ( + FittedSigmaClip, fit_bkg_rms_estimate, sigmaclip, fitted_sigma_clip, SigmaClip, FitBkgRmsEstimate +) __author__ = 'Paul Hancock' __version__ = 'v1.10.0' __date__ = '2022-08-17' +BANE_MODE_MAPPINGS = dict( + sigmaclip=SigmaClip, + fitrmsbkgestimate=FitBkgRmsEstimate, + fittedsigmaclip=FittedSigmaClip +) +AVAILABLE_MODES = tuple(BANE_MODE_MAPPINGS.keys()) +AVAILABLE_TYPES = tuple(BANE_MODE_MAPPINGS.values()) +ClippingModes: TypeAlias = Union[SigmaClip, FitBkgRmsEstimate, FittedSigmaClip] + # global barrier for multiprocessing barrier = None memory_id = None @@ -40,132 +51,6 @@ def init(b, mem): memory_id = mem -def median_clip(data): - return np.median(np.abs(data - np.median(data))) - -def rms_estimate( - data: np.ndarray, - mode: str = "mad", - clip_rounds: int = 2, - bin_perc: float = 0.25, - outlier_thres: float = 3.0, - nan_check: bool = True, -) -> Tuple[float,float]: - - if bin_perc > 1.0: - bin_perc /= 100.0 - - if mode == "std": - clipping_func = np.std - - elif mode == "mad": - clipping_func = median_clip - - else: - raise ValueError( - f"{mode} not supported as a clipping mode, available modes are `std` and `mad`. " - ) - - if nan_check: - data = data[np.isfinite(data)] - - cen_func = np.median - - for i in range(clip_rounds): - bkg = cen_func(data) - data = data[np.abs(data - bkg) < outlier_thres * clipping_func(data)] - - # Attempts to ensure a sane number of bins to fit against - mask_counts = 0 - loop = 1 - while mask_counts < 5 and loop < 5: - counts, binedges = np.histogram(data, bins=50 * loop) - binc = (binedges[:-1] + binedges[1:]) / 2 - - mask = counts >= bin_perc * np.max(counts) - mask_counts = np.sum(mask) - loop += 1 - - p = np.polyfit(binc[mask], np.log10(counts[mask] / np.max(counts)), 2) - a, b, c = p - - x1 = (-b + np.sqrt(b ** 2 - 4.0 * a * (c - np.log10(0.5)))) / (2.0 * a) - x2 = (-b - np.sqrt(b ** 2 - 4.0 * a * (c - np.log10(0.5)))) / (2.0 * a) - fwhm = np.abs(x1 - x2) - noise = fwhm / 2.355 - - return bkg, noise - -def sigmaclip(arr, lo, hi, reps=10): - """ - Perform sigma clipping on an array, ignoring non finite values. - - During each iteration return an array whose elements c obey: - mean -std*lo < c < mean + std*hi - - where mean/std are the mean std of the input array. - - Parameters - ---------- - arr : iterable - An iterable array of numeric types. - lo : float - The negative clipping level. - hi : float - The positive clipping level. - reps : int - The number of iterations to perform. Default = 3. - - Returns - ------- - mean : float - The mean of the array, possibly nan - std : float - The std of the array, possibly nan - - Notes - ----- - Scipy v0.16 now contains a comparable method that will ignore nan/inf - values. - """ - clipped = np.array(arr)[np.isfinite(arr)] - - # with np.errstate(all='ignore'): - - # bkg, rms = rms_estimate(data=clipped) - - # # if np.isnan(rms): - # # print(rms) - # # print(clipped) - - # return bkg, rms - - if len(clipped) < 1: - return np.nan, np.nan - - std = np.std(clipped) - mean = np.mean(clipped) - prev_valid = len(clipped) - for count in range(int(reps)): - mask = (clipped > mean-std*lo) & (clipped < mean+std*hi) - clipped = clipped[mask] - - curr_valid = len(clipped) - if curr_valid < 1: - break - # No change in statistics if no change is noted - if prev_valid == curr_valid: - break - std = np.std(clipped) - mean = np.mean(clipped) - prev_valid = curr_valid - else: - logging.debug( - "No stopping criteria was reached after {0} cycles".format(count)) - - return mean, std - - def _sf2(args): """ A shallow wrapper for sigma_filter. @@ -189,8 +74,60 @@ def _sf2(args): raise Exception("".join(traceback.format_exception(*sys.exc_info()))) +def box(r, c, data_shape, box_size): + """ + calculate the boundaries of the box centered at r,c + with size = box_size + """ + r_min = max(0, r - box_size[0] // 2) + r_max = min(data_shape[0] - 1, r + box_size[0] // 2) + c_min = max(0, c - box_size[1] // 2) + c_max = min(data_shape[1] - 1, c + box_size[1] // 2) + + return r_min, r_max, c_min, c_max + + +def adaptive_box_estimate(data, row, column, box_size, mode: ClippingModes): + + r_min, r_max, c_min, c_max = box( + row, column, data_shape=data.shape, box_size=box_size + ) + new = data[r_min:r_max, c_min:c_max] + new = new.flatten() + + if isinstance(mode, SigmaClip): + bkg, rms = sigmaclip(new, lo=mode.low, hi=mode.high) + elif isinstance(mode, FitBkgRmsEstimate): + bkg, rms = fit_bkg_rms_estimate( + data=new, clip_rounds=mode.clip_rounds, bin_perc=mode.bin_perc, outlier_thres=mode.outlier_thres + ) + elif isinstance(mode, FittedSigmaClip): + bkg, rms = fitted_sigma_clip(data=new, sigma=mode.sigma) + else: + raise TypeError(f"Unrecognised type, {mode=}") + + return float(bkg), float(rms) + + # while attempt < 10 and np.isnan(rms): + # r_min, r_max, c_min, c_max = box(row, col) + # r_min -= enlarge + # r_max += enlarge + # c_min -=enlarge + # c_max += enlarge + + # new = data[r_min:r_max, c_min:c_max] + # new = np.ravel(new) + # _, rms = sigmaclip(new, 3, 3) + + # attempt += 1 + # enlarge = 50 * attempt + + # if np.isnan(rms): + # logging.info(f"Enlarging the box {enlarge}") + + def sigma_filter(filename, region, step_size, box_size, shape, domask, - cube_index): + cube_index, mode): """ Calculate the background and rms for a sub region of an image. The results are written to shared memory - irms and ibkg. @@ -269,17 +206,7 @@ def sigma_filter(filename, region, step_size, box_size, shape, domask, logging.debug('data size is {0}'.format(data.shape)) logging.debug('data format is {0}'.format(data.dtype)) - def box(r, c): - """ - calculate the boundaries of the box centered at r,c - with size = box_size - """ - r_min = max(0, r - box_size[0] // 2) - r_max = min(data.shape[0] - 1, r + box_size[0] // 2) - c_min = max(0, c - box_size[1] // 2) - c_max = min(data.shape[1] - 1, c + box_size[1] // 2) - return r_min, r_max, c_min, c_max - + # set up a grid of rows/cols at which we will compute the bkg/rms rows = list(range(ymin-data_row_min, ymax-data_row_min, step_size[0])) rows.append(ymax-data_row_min) @@ -288,14 +215,16 @@ def box(r, c): # store the computed bkg/rms in this smaller array vals = np.zeros(shape=(len(rows), len(cols))) - + for i, row in enumerate(rows): for j, col in enumerate(cols): - r_min, r_max, c_min, c_max = box(row, col) - new = data[r_min:r_max, c_min:c_max] - new = np.ravel(new) - bkg, _ = sigmaclip(new, 3, 3) - vals[i, j] = bkg + + bkg, rms = adaptive_box_estimate( + data=data, row=row, column=col, box_size=box_size, mode=mode + ) + + if np.isfinite(bkg): + vals[i, j] = bkg # indices of all the pixels within our region gr, gc = np.mgrid[ymin-data_row_min:ymax-data_row_min, 0:shape[1]] @@ -328,33 +257,14 @@ def box(r, c): for i, row in enumerate(rows): for j, col in enumerate(cols): - attempt = 0 - enlarge = 0 rms = np.nan + + bkg, rms = adaptive_box_estimate( + data=data, row=row, column=col, box_size=box_size, mode=mode + ) - r_min, r_max, c_min, c_max = box(row, col) - new = data[r_min:r_max, c_min:c_max] - new = np.ravel(new) - _, rms = sigmaclip(new, 3, 3) - - # while attempt < 10 and np.isnan(rms): - # r_min, r_max, c_min, c_max = box(row, col) - # r_min -= enlarge - # r_max += enlarge - # c_min -=enlarge - # c_max += enlarge - - # new = data[r_min:r_max, c_min:c_max] - # new = np.ravel(new) - # _, rms = sigmaclip(new, 3, 3) - - # attempt += 1 - # enlarge = 50 * attempt - - # if np.isnan(rms): - # logging.info(f"Enlarging the box {enlarge}") - - vals[i, j] = rms + if np.isfinite(rms): + vals[i, j] = rms logging.debug("Interpolating rms to sharemem") ifunc = RegularGridInterpolator((rows, cols), vals) @@ -383,7 +293,7 @@ def box(r, c): def filter_mc_sharemem(filename, step_size, box_size, cores, shape, nslice=None, domask=True, - cube_index=0): + cube_index=0, mode: str='sigmaclip', mode_kwargs: Dict[str, Any]=None): """ Calculate the background and noise images corresponding to the input file. The calculation is done via a box-car approach and uses multiple cores and @@ -421,6 +331,7 @@ def filter_mc_sharemem(filename, step_size, box_size, cores, shape, bkg, rms : numpy.ndarray The interpolated background and noise images. """ + mode_kwargs = mode_kwargs if mode_kwargs else {} if cores is None: cores = multiprocessing.cpu_count() @@ -446,11 +357,30 @@ def filter_mc_sharemem(filename, step_size, box_size, cores, shape, logging.debug("ymins {0}".format(ymins)) logging.debug("ymaxs {0}".format(ymaxs)) + + + if mode.lower() not in AVAILABLE_MODES: + raise ValueError(f"Mode not recognised. Received {mode=}, available modes {AVAILABLE_MODES}") + + logging.info(f"Using bane {mode=}") + mode: ClippingModes = BANE_MODE_MAPPINGS[mode.lower()](**mode_kwargs) + + # if mode.lower() == 'sigmaclip': + # logging.info(f"Using original estimate mode, {mode=}") + # mode: ClippingModes = SigmaClip(**mode_kwargs) + # elif mode.lower() == 'fitrmsbkgestimate': + # logging.info(f"Using fitting mode, {mode=}") + # mode: ClippingModes = FitBkgRmsEstimate(**mode_kwargs) + # elif mode.lower() == 'fittedsigmaclip': + # logging.info(f"Using fitted sigma clip, {mode=}") + # mode: ClippingModes = FittedSigmaClip(**mode_kwargs) + # else: + args = [] for region in zip(ymins, ymaxs): args.append((filename, region, step_size, box_size, - shape, domask, cube_index)) + shape, domask, cube_index, mode)) exit = False try: @@ -477,6 +407,8 @@ def filter_mc_sharemem(filename, step_size, box_size, cores, shape, logging.error("Caught keyboard interrupt") pool.close() exit = True + except Exception as e: + logging.error(e) else: pool.close() pool.join() @@ -500,7 +432,7 @@ def filter_mc_sharemem(filename, step_size, box_size, cores, shape, def filter_image(im_name, out_base, step_size=None, box_size=None, twopass=False, # Deprecated cores=None, mask=True, compressed=False, nslice=None, - cube_index=None): + cube_index=None, mode='sigmaclip'): """ Create a background and noise image from an input image. Resulting images are written to `outbase_bkg.fits` and `outbase_rms.fits` @@ -585,7 +517,7 @@ def filter_image(im_name, out_base, step_size=None, box_size=None, bkg, rms = filter_mc_sharemem(im_name, step_size=step_size, box_size=box_size, cores=cores, shape=shape, nslice=nslice, - domask=mask, cube_index=cube_index) + domask=mask, cube_index=cube_index, mode=mode) logging.info("done") if out_base is not None: diff --git a/AegeanTools/CLI/BANE.py b/AegeanTools/CLI/BANE.py index f84ce96b..a82b5065 100755 --- a/AegeanTools/CLI/BANE.py +++ b/AegeanTools/CLI/BANE.py @@ -50,6 +50,14 @@ def main(argv=()): parser.set_defaults(out_base=None, step_size=None, box_size=None, twopass=True, cores=None, usescipy=False, debug=False) + group1.add_argument( + '--mode', + type=str, + choices=BANE.AVAILABLE_MODES, + default='sigmaclip', + help=f"Which bkg and rms estimation mode to use. Available are: {BANE.AVAILABLE_MODES}" + ) + options = parser.parse_args(args=argv) if options.cite: @@ -89,5 +97,6 @@ def main(argv=()): box_size=options.box_size, cores=options.cores, mask=options.mask, compressed=options.compress, nslice=options.stripes, - cube_index=options.cube_index) + cube_index=options.cube_index, + mode=options.mode) return 0 diff --git a/AegeanTools/sigma.py b/AegeanTools/sigma.py new file mode 100644 index 00000000..964b3610 --- /dev/null +++ b/AegeanTools/sigma.py @@ -0,0 +1,159 @@ +"""Tooling around the rms and background estimation""" + +from re import L +from typing import Tuple, NamedTuple +import logging + +import numpy as np +from scipy.stats import norm, normaltest +from astropy.stats import mad_std, sigma_clip + +class FittedSigmaClip(NamedTuple): + """Arguments for the fitted_sigma_clip""" + sigma: int = 3 + """Threshhold before clipped""" + +def fitted_mean(data: np.ndarray) -> float: + mean, _ = norm.fit(data) + return mean + + +def fitted_std(data: np.ndarray) -> float: + _, std = norm.fit(data) + return std + +def fitted_sigma_clip(data: np.ndarray, sigma: int=3) -> Tuple[float,float]: + + data = data[np.isfinite(data)] + + clipped_plane = sigma_clip( + data.flatten(), + sigma=3, + cenfunc=fitted_mean, + stdfunc=fitted_std, + maxiters=None + ) + bkg, rms = norm.fit(clipped_plane.compressed()) + + +class FitBkgRmsEstimate(NamedTuple): + """Options for the fitting approach method""" + clip_rounds: int = 3 + """Number of clipping rounds to perform""" + bin_perc: float = 0.25 + """Minimum fraction of the histogram bins, or something""" + outlier_thres: float = 3.0 + """Threshold that a data point should be at to be considered an outlier""" + +def mad(data, bkg=None): + bkg = bkg if bkg else np.median(data) + return np.median(np.abs(data - bkg)) + +def fit_bkg_rms_estimate( + data: np.ndarray, + clip_rounds: int = 2, + bin_perc: float = 0.25, + outlier_thres: float = 3.0, +) -> Tuple[float,float]: + + data = data[np.isfinite(data)] + + cen_func = np.median + + bkg = cen_func(data) + + for i in range(clip_rounds): + data = data[np.abs(data - bkg) < outlier_thres * mad(data, bkg=bkg)] + bkg = cen_func(data) + + # Attempts to ensure a sane number of bins to fit against + mask_counts = 0 + loop = 1 + while True: + counts, binedges = np.histogram(data, bins=50 * loop) + + mask = counts >= bin_perc * np.max(counts) + mask_counts = np.sum(mask) + loop += 1 + + if not (mask_counts < 5 and loop < 5): + break + + binc = (binedges[:-1] + binedges[1:]) / 2 + p = np.polyfit(binc[mask], np.log10(counts[mask] / np.max(counts)), 2) + a, b, c = p + + x1 = (-b + np.sqrt(b ** 2 - 4.0 * a * (c - np.log10(0.5)))) / (2.0 * a) + x2 = (-b - np.sqrt(b ** 2 - 4.0 * a * (c - np.log10(0.5)))) / (2.0 * a) + fwhm = np.abs(x1 - x2) + noise = fwhm / 2.355 + + return float(bkg), noise + + + +class SigmaClip(NamedTuple): + """Container for the original sigma clipping method""" + low: float = 3.0 + """Low sigma clip threshhold""" + high: float = 3.0 + """High sigma clip threshhold""" + +def sigmaclip(arr, lo, hi, reps=10): + """ + Perform sigma clipping on an array, ignoring non finite values. + + During each iteration return an array whose elements c obey: + mean -std*lo < c < mean + std*hi + + where mean/std are the mean std of the input array. + + Parameters + ---------- + arr : iterable + An iterable array of numeric types. + lo : float + The negative clipping level. + hi : float + The positive clipping level. + reps : int + The number of iterations to perform. Default = 3. + + Returns + ------- + mean : float + The mean of the array, possibly nan + std : float + The std of the array, possibly nan + + Notes + ----- + Scipy v0.16 now contains a comparable method that will ignore nan/inf + values. + """ + clipped = np.array(arr)[np.isfinite(arr)] + + if len(clipped) < 1: + return np.nan, np.nan + + std = np.std(clipped) + mean = np.mean(clipped) + prev_valid = len(clipped) + for count in range(int(reps)): + mask = (clipped > mean-std*lo) & (clipped < mean+std*hi) + clipped = clipped[mask] + + curr_valid = len(clipped) + if curr_valid < 1: + break + # No change in statistics if no change is noted + if prev_valid == curr_valid: + break + std = np.std(clipped) + mean = np.mean(clipped) + prev_valid = curr_valid + else: + logging.debug( + "No stopping criteria was reached after {0} cycles".format(count)) + + return mean, std \ No newline at end of file From d4d1d23acb062acc121632dc2790875b7df7ef28 Mon Sep 17 00:00:00 2001 From: tgalvin Date: Thu, 9 May 2024 16:43:44 +0800 Subject: [PATCH 03/10] added new mode / fix bugs --- AegeanTools/BANE.py | 3 +++ AegeanTools/sigma.py | 23 +++++++++++++++++------ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/AegeanTools/BANE.py b/AegeanTools/BANE.py index 271309f9..5e1d51fd 100755 --- a/AegeanTools/BANE.py +++ b/AegeanTools/BANE.py @@ -106,6 +106,9 @@ def adaptive_box_estimate(data, row, column, box_size, mode: ClippingModes): else: raise TypeError(f"Unrecognised type, {mode=}") + if not np.isfinite(bkg) and not np.isfinite(rms): + raise ValueError("Nasty nan") + return float(bkg), float(rms) # while attempt < 10 and np.isnan(rms): diff --git a/AegeanTools/sigma.py b/AegeanTools/sigma.py index 964b3610..23a187ea 100644 --- a/AegeanTools/sigma.py +++ b/AegeanTools/sigma.py @@ -1,25 +1,35 @@ """Tooling around the rms and background estimation""" from re import L -from typing import Tuple, NamedTuple +from typing import Tuple, NamedTuple, Optional import logging import numpy as np -from scipy.stats import norm, normaltest -from astropy.stats import mad_std, sigma_clip +from scipy.stats import norm +from astropy.stats import sigma_clip class FittedSigmaClip(NamedTuple): """Arguments for the fitted_sigma_clip""" sigma: int = 3 """Threshhold before clipped""" -def fitted_mean(data: np.ndarray) -> float: +def fitted_mean(data: np.ndarray, axis: Optional[int] =None) -> float: + if axis is not None: + # This is to make astropy sigma clip happy + raise NotImplementedError("Unexpected axis keyword. ") + mean, _ = norm.fit(data) + return mean -def fitted_std(data: np.ndarray) -> float: +def fitted_std(data: np.ndarray, axis: Optional[int]=None) -> float: + if axis is not None: + # This is to make astropy sigma clip happy + raise NotImplementedError("Unexpected axis keyword. ") + _, std = norm.fit(data) + return std def fitted_sigma_clip(data: np.ndarray, sigma: int=3) -> Tuple[float,float]: @@ -29,12 +39,13 @@ def fitted_sigma_clip(data: np.ndarray, sigma: int=3) -> Tuple[float,float]: clipped_plane = sigma_clip( data.flatten(), sigma=3, - cenfunc=fitted_mean, + cenfunc=np.median, stdfunc=fitted_std, maxiters=None ) bkg, rms = norm.fit(clipped_plane.compressed()) + return float(bkg), float(rms) class FitBkgRmsEstimate(NamedTuple): """Options for the fitting approach method""" From d4c1975902fe41f7c79b7d35434ced9b0cf18517 Mon Sep 17 00:00:00 2001 From: tgalvin Date: Fri, 10 May 2024 11:31:56 +0800 Subject: [PATCH 04/10] Added adaptive mode --- AegeanTools/BANE.py | 57 ++++++++++++++++++-------------------------- AegeanTools/sigma.py | 44 ++++++++++++++++++++++++++-------- 2 files changed, 57 insertions(+), 44 deletions(-) diff --git a/AegeanTools/BANE.py b/AegeanTools/BANE.py index 5e1d51fd..cd5c1a54 100755 --- a/AegeanTools/BANE.py +++ b/AegeanTools/BANE.py @@ -19,6 +19,7 @@ import numpy as np from astropy.io import fits from scipy.interpolate import RegularGridInterpolator +from scipy.stats import normaltest from .fits_tools import compress from .sigma import ( @@ -87,47 +88,35 @@ def box(r, c, data_shape, box_size): return r_min, r_max, c_min, c_max -def adaptive_box_estimate(data, row, column, box_size, mode: ClippingModes): +def adaptive_box_estimate( + data: np.ndarray, row: int, column: int, box_size, mode: ClippingModes +) -> Tuple[float,float]: - r_min, r_max, c_min, c_max = box( - row, column, data_shape=data.shape, box_size=box_size - ) - new = data[r_min:r_max, c_min:c_max] - new = new.flatten() - if isinstance(mode, SigmaClip): - bkg, rms = sigmaclip(new, lo=mode.low, hi=mode.high) - elif isinstance(mode, FitBkgRmsEstimate): - bkg, rms = fit_bkg_rms_estimate( - data=new, clip_rounds=mode.clip_rounds, bin_perc=mode.bin_perc, outlier_thres=mode.outlier_thres + original_box = box_size + original_pixels = np.prod(original_box) + loop = 0 + while True: + test_box_size = tuple([b+loop*100 for b in original_box]) + r_min, r_max, c_min, c_max = box( + row, column, data_shape=data.shape, box_size=test_box_size ) - elif isinstance(mode, FittedSigmaClip): - bkg, rms = fitted_sigma_clip(data=new, sigma=mode.sigma) - else: - raise TypeError(f"Unrecognised type, {mode=}") + new = data[r_min:r_max, c_min:c_max] + new = new.flatten() + + result = mode.perform(data=new) + + if result.valid_pixels > 0.8 * original_pixels or loop > 5: + break + + loop += 1 + - if not np.isfinite(bkg) and not np.isfinite(rms): - raise ValueError("Nasty nan") + rms = result.rms + bkg = result.bkg return float(bkg), float(rms) - # while attempt < 10 and np.isnan(rms): - # r_min, r_max, c_min, c_max = box(row, col) - # r_min -= enlarge - # r_max += enlarge - # c_min -=enlarge - # c_max += enlarge - - # new = data[r_min:r_max, c_min:c_max] - # new = np.ravel(new) - # _, rms = sigmaclip(new, 3, 3) - - # attempt += 1 - # enlarge = 50 * attempt - - # if np.isnan(rms): - # logging.info(f"Enlarging the box {enlarge}") - def sigma_filter(filename, region, step_size, box_size, shape, domask, cube_index, mode): diff --git a/AegeanTools/sigma.py b/AegeanTools/sigma.py index 23a187ea..dc63f007 100644 --- a/AegeanTools/sigma.py +++ b/AegeanTools/sigma.py @@ -8,11 +8,23 @@ from scipy.stats import norm from astropy.stats import sigma_clip +class BANEResult(NamedTuple): + """Container for function results""" + rms: float + """RMS constrained""" + bkg: float + """BKG constrained""" + valid_pixels: int + """Number of pixels constrained against""" + class FittedSigmaClip(NamedTuple): """Arguments for the fitted_sigma_clip""" sigma: int = 3 """Threshhold before clipped""" - + + def perform(self, data: np.ndarray) -> BANEResult: + return fitted_sigma_clip(data=data, sigma=self.sigma) + def fitted_mean(data: np.ndarray, axis: Optional[int] =None) -> float: if axis is not None: # This is to make astropy sigma clip happy @@ -32,20 +44,22 @@ def fitted_std(data: np.ndarray, axis: Optional[int]=None) -> float: return std -def fitted_sigma_clip(data: np.ndarray, sigma: int=3) -> Tuple[float,float]: +def fitted_sigma_clip(data: np.ndarray, sigma: int=3) -> BANEResult: data = data[np.isfinite(data)] clipped_plane = sigma_clip( data.flatten(), - sigma=3, - cenfunc=np.median, + sigma=sigma, + cenfunc=fitted_mean, stdfunc=fitted_std, maxiters=None ) bkg, rms = norm.fit(clipped_plane.compressed()) - return float(bkg), float(rms) + result = BANEResult(rms=float(rms), bkg=float(bkg), valid_pixels=len(clipped_plane.compressed())) + + return result class FitBkgRmsEstimate(NamedTuple): """Options for the fitting approach method""" @@ -56,6 +70,9 @@ class FitBkgRmsEstimate(NamedTuple): outlier_thres: float = 3.0 """Threshold that a data point should be at to be considered an outlier""" + def perform(self, data: np.ndarray) -> BANEResult: + return fit_bkg_rms_estimate(data=data, clip_rounds=self.clip_rounds, bin_perc=self.bin_perc, outlier_thres=self.outlier_thres) + def mad(data, bkg=None): bkg = bkg if bkg else np.median(data) return np.median(np.abs(data - bkg)) @@ -65,7 +82,7 @@ def fit_bkg_rms_estimate( clip_rounds: int = 2, bin_perc: float = 0.25, outlier_thres: float = 3.0, -) -> Tuple[float,float]: +) -> BANEResult: data = data[np.isfinite(data)] @@ -74,7 +91,7 @@ def fit_bkg_rms_estimate( bkg = cen_func(data) for i in range(clip_rounds): - data = data[np.abs(data - bkg) < outlier_thres * mad(data, bkg=bkg)] + data = data[np.abs(data - bkg) < outlier_thres * 1.4826 * mad(data, bkg=bkg)] bkg = cen_func(data) # Attempts to ensure a sane number of bins to fit against @@ -99,7 +116,9 @@ def fit_bkg_rms_estimate( fwhm = np.abs(x1 - x2) noise = fwhm / 2.355 - return float(bkg), noise + result = BANEResult(rms=float(noise), bkg=float(bkg), valid_pixels=len(data)) + + return result @@ -110,7 +129,10 @@ class SigmaClip(NamedTuple): high: float = 3.0 """High sigma clip threshhold""" -def sigmaclip(arr, lo, hi, reps=10): + def perform(self, data: np.ndarray) -> BANEResult: + return sigmaclip(arr=data, lo=self.low, hi=self.high) + +def sigmaclip(arr, lo, hi, reps=10) -> BANEResult: """ Perform sigma clipping on an array, ignoring non finite values. @@ -167,4 +189,6 @@ def sigmaclip(arr, lo, hi, reps=10): logging.debug( "No stopping criteria was reached after {0} cycles".format(count)) - return mean, std \ No newline at end of file + result = BANEResult(rms=float(std), bkg=float(mean), valid_pixels=len(clipped)) + + return result \ No newline at end of file From cd9ed1c417595eb83a1b76aa1aa34e66c59b2590 Mon Sep 17 00:00:00 2001 From: tgalvin Date: Fri, 10 May 2024 13:06:38 +0800 Subject: [PATCH 05/10] added adaptive box option --- AegeanTools/BANE.py | 58 ++++++++++++++++++++++++++--------------- AegeanTools/CLI/BANE.py | 9 ++++++- 2 files changed, 45 insertions(+), 22 deletions(-) diff --git a/AegeanTools/BANE.py b/AegeanTools/BANE.py index cd5c1a54..aca8e932 100755 --- a/AegeanTools/BANE.py +++ b/AegeanTools/BANE.py @@ -5,7 +5,7 @@ The function filter_image should be imported from elsewhere and run as is. """ -from typing import Tuple, Union, TypeAlias, Dict, Any +from typing import Tuple, Union, TypeAlias, Dict, Any, Optional import copy import logging @@ -89,7 +89,7 @@ def box(r, c, data_shape, box_size): def adaptive_box_estimate( - data: np.ndarray, row: int, column: int, box_size, mode: ClippingModes + data: np.ndarray, row: int, column: int, box_size, mode: ClippingModes, max_loop: int = 5 ) -> Tuple[float,float]: @@ -106,7 +106,7 @@ def adaptive_box_estimate( result = mode.perform(data=new) - if result.valid_pixels > 0.8 * original_pixels or loop > 5: + if result.valid_pixels > 0.8 * original_pixels or loop > max_loop: break loop += 1 @@ -119,7 +119,7 @@ def adaptive_box_estimate( def sigma_filter(filename, region, step_size, box_size, shape, domask, - cube_index, mode): + cube_index, mode, adaptive_loop=0): """ Calculate the background and rms for a sub region of an image. The results are written to shared memory - irms and ibkg. @@ -148,6 +148,13 @@ def sigma_filter(filename, region, step_size, box_size, shape, domask, cube_index : int The index into the 3rd dimension (if present) + mode : ClippingModes + Which of the clipping modes to use. + + adaptive_loop : int + The maximum number of resizes allowed should a box be resized. If 0 this is turned off. + Default = 0 + Returns ------- None @@ -212,7 +219,7 @@ def sigma_filter(filename, region, step_size, box_size, shape, domask, for j, col in enumerate(cols): bkg, rms = adaptive_box_estimate( - data=data, row=row, column=col, box_size=box_size, mode=mode + data=data, row=row, column=col, box_size=box_size, mode=mode, max_loop=adaptive_loop ) if np.isfinite(bkg): @@ -252,7 +259,7 @@ def sigma_filter(filename, region, step_size, box_size, shape, domask, rms = np.nan bkg, rms = adaptive_box_estimate( - data=data, row=row, column=col, box_size=box_size, mode=mode + data=data, row=row, column=col, box_size=box_size, mode=mode, max_loop=adaptive_loop ) if np.isfinite(rms): @@ -285,7 +292,8 @@ def sigma_filter(filename, region, step_size, box_size, shape, domask, def filter_mc_sharemem(filename, step_size, box_size, cores, shape, nslice=None, domask=True, - cube_index=0, mode: str='sigmaclip', mode_kwargs: Dict[str, Any]=None): + cube_index=0, mode: str='sigmaclip', mode_kwargs: Optional[Dict[str, Any]]=None, + adaptive_box: bool=False): """ Calculate the background and noise images corresponding to the input file. The calculation is done via a box-car approach and uses multiple cores and @@ -318,6 +326,15 @@ def filter_mc_sharemem(filename, step_size, box_size, cores, shape, cube_index : int For 3d data use this index into the third dimension. Default = 0 + mode : str + Which background and rms estimation mode to use. + Default = 'sigmaclip', which is the original BANE mode. + + adaptive_box : bool + Resize the box-car should a position be deemed to have unreliable + statistics. Default = False. + + Returns ------- bkg, rms : numpy.ndarray @@ -357,22 +374,12 @@ def filter_mc_sharemem(filename, step_size, box_size, cores, shape, logging.info(f"Using bane {mode=}") mode: ClippingModes = BANE_MODE_MAPPINGS[mode.lower()](**mode_kwargs) - # if mode.lower() == 'sigmaclip': - # logging.info(f"Using original estimate mode, {mode=}") - # mode: ClippingModes = SigmaClip(**mode_kwargs) - # elif mode.lower() == 'fitrmsbkgestimate': - # logging.info(f"Using fitting mode, {mode=}") - # mode: ClippingModes = FitBkgRmsEstimate(**mode_kwargs) - # elif mode.lower() == 'fittedsigmaclip': - # logging.info(f"Using fitted sigma clip, {mode=}") - # mode: ClippingModes = FittedSigmaClip(**mode_kwargs) - # else: - + adaptive_loop = 5 if adaptive_box else 0 args = [] for region in zip(ymins, ymaxs): args.append((filename, region, step_size, box_size, - shape, domask, cube_index, mode)) + shape, domask, cube_index, mode, adaptive_loop)) exit = False try: @@ -424,7 +431,7 @@ def filter_mc_sharemem(filename, step_size, box_size, cores, shape, def filter_image(im_name, out_base, step_size=None, box_size=None, twopass=False, # Deprecated cores=None, mask=True, compressed=False, nslice=None, - cube_index=None, mode='sigmaclip'): + cube_index=None, mode='sigmaclip', adaptive_box: bool=False): """ Create a background and noise image from an input image. Resulting images are written to `outbase_bkg.fits` and `outbase_rms.fits` @@ -467,6 +474,14 @@ def filter_image(im_name, out_base, step_size=None, box_size=None, If the input data is 3d, then use this index for the 3rd dimension. Default = None, use the first index. + mode : str + Which background and rms estimation mode to use. + Default = 'sigmaclip', which is the original BANE mode. + + adaptive_box : bool + Resize the box-car should a position be deemed to have unreliable + statistics. Default = False. + Returns ------- bkg, rms : `numpy.ndarray` @@ -509,7 +524,8 @@ def filter_image(im_name, out_base, step_size=None, box_size=None, bkg, rms = filter_mc_sharemem(im_name, step_size=step_size, box_size=box_size, cores=cores, shape=shape, nslice=nslice, - domask=mask, cube_index=cube_index, mode=mode) + domask=mask, cube_index=cube_index, mode=mode, + adaptive_box=adaptive_box) logging.info("done") if out_base is not None: diff --git a/AegeanTools/CLI/BANE.py b/AegeanTools/CLI/BANE.py index a82b5065..12e96638 100755 --- a/AegeanTools/CLI/BANE.py +++ b/AegeanTools/CLI/BANE.py @@ -57,6 +57,12 @@ def main(argv=()): default='sigmaclip', help=f"Which bkg and rms estimation mode to use. Available are: {BANE.AVAILABLE_MODES}" ) + group1.add_argument( + "--adaptive-box", + default=False, + action='store_true', + help="Allow the box-car to be resized should insufficent statistics be detected. " + ) options = parser.parse_args(args=argv) @@ -98,5 +104,6 @@ def main(argv=()): mask=options.mask, compressed=options.compress, nslice=options.stripes, cube_index=options.cube_index, - mode=options.mode) + mode=options.mode, + adapative_box=opts.adaptive_box) return 0 From 638f3063c16a606c429c35f2d9349ef62cc3c6c0 Mon Sep 17 00:00:00 2001 From: tgalvin Date: Fri, 10 May 2024 13:28:07 +0800 Subject: [PATCH 06/10] added docstrings / typo fix --- AegeanTools/BANE.py | 27 +++++++++++++++++++++++++ AegeanTools/CLI/BANE.py | 2 +- AegeanTools/sigma.py | 45 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 71 insertions(+), 3 deletions(-) diff --git a/AegeanTools/BANE.py b/AegeanTools/BANE.py index aca8e932..0c1a6e28 100755 --- a/AegeanTools/BANE.py +++ b/AegeanTools/BANE.py @@ -91,6 +91,33 @@ def box(r, c, data_shape, box_size): def adaptive_box_estimate( data: np.ndarray, row: int, column: int, box_size, mode: ClippingModes, max_loop: int = 5 ) -> Tuple[float,float]: + """Estimate the background and RMS level within a data extract. Multiple + clipping and noise estimation modes are supported. + + The adaptive behavour is only activated should the pixel statistics become + compromised. At the moment, this only is considered true if fewer than 80percent + of the pixels remain after clipping. + + Parameters + ---------- + data : np.ndarray + Data that will be the source of the box extraction + row : int + Row to center extracted region at + column : int + Column to center extracted region at + box_size : Tuple[int,int] + Base size of the box to extract + mode : ClippingModes + Instance of a clipping and noise estiamtion mode to use. Should return a BANEResult and ahve a .perform method. + max_loop : int, optional + Maximum number of adjustable boxes to use. Will increase in steps of 100 pixels. If 0 adative mode is disabled, by default 5 + + Returns + ------- + Tuple[float,float] + Backgrounf and noise estimation + """ original_box = box_size diff --git a/AegeanTools/CLI/BANE.py b/AegeanTools/CLI/BANE.py index 12e96638..22323eab 100755 --- a/AegeanTools/CLI/BANE.py +++ b/AegeanTools/CLI/BANE.py @@ -105,5 +105,5 @@ def main(argv=()): nslice=options.stripes, cube_index=options.cube_index, mode=options.mode, - adapative_box=opts.adaptive_box) + adaptive_box=options.adaptive_box) return 0 diff --git a/AegeanTools/sigma.py b/AegeanTools/sigma.py index dc63f007..8957823b 100644 --- a/AegeanTools/sigma.py +++ b/AegeanTools/sigma.py @@ -26,6 +26,7 @@ def perform(self, data: np.ndarray) -> BANEResult: return fitted_sigma_clip(data=data, sigma=self.sigma) def fitted_mean(data: np.ndarray, axis: Optional[int] =None) -> float: + """Internal function that returns the mean by fitting to pixel distribution data""" if axis is not None: # This is to make astropy sigma clip happy raise NotImplementedError("Unexpected axis keyword. ") @@ -36,6 +37,7 @@ def fitted_mean(data: np.ndarray, axis: Optional[int] =None) -> float: def fitted_std(data: np.ndarray, axis: Optional[int]=None) -> float: + """Internal function that retunrs the stf by fitting to the pixel distribution""" if axis is not None: # This is to make astropy sigma clip happy raise NotImplementedError("Unexpected axis keyword. ") @@ -45,7 +47,21 @@ def fitted_std(data: np.ndarray, axis: Optional[int]=None) -> float: return std def fitted_sigma_clip(data: np.ndarray, sigma: int=3) -> BANEResult: - + """Estimate the back ground and noise level by fitting to the pixel distribution. + Sigma clipping is performed using the fitted statistics. + + Parameters + ---------- + data : np.ndarray + Data that will be considered + sigma : int, optional + Threshold for a point to be flagged, by default 3 + + Returns + ------- + BANEResult + RMS and bkg ground statistics + """ data = data[np.isfinite(data)] clipped_plane = sigma_clip( @@ -74,6 +90,9 @@ def perform(self, data: np.ndarray) -> BANEResult: return fit_bkg_rms_estimate(data=data, clip_rounds=self.clip_rounds, bin_perc=self.bin_perc, outlier_thres=self.outlier_thres) def mad(data, bkg=None): + """Compute the median asbolute deviation. optionally provide a + precomuted background measure + """ bkg = bkg if bkg else np.median(data) return np.median(np.abs(data - bkg)) @@ -83,7 +102,29 @@ def fit_bkg_rms_estimate( bin_perc: float = 0.25, outlier_thres: float = 3.0, ) -> BANEResult: + """An over the top attempt at robustly characterising the + back ground and RMS. Data will first be flagged via the MAD, + then bin, then those bin counts are used to fit for a gaussian. + Only bins that are above 25 percent of the maximum bin count + are used in the fitting process. + + Parameters + ---------- + data : np.ndarray + Data to be estimated + clip_rounds : int, optional + Number of clipping rounds, by default 2 + bin_perc : float, optional + Minimum set of counts, relative to the max count bin, that any bin should have. Less than this and they are discarded, by default 0.25 + outlier_thres : float, optional + Clipping threshold to use, by default 3.0 + + Returns + ------- + BANEResult + Backgroun and noise estimation + """ data = data[np.isfinite(data)] cen_func = np.median @@ -132,7 +173,7 @@ class SigmaClip(NamedTuple): def perform(self, data: np.ndarray) -> BANEResult: return sigmaclip(arr=data, lo=self.low, hi=self.high) -def sigmaclip(arr, lo, hi, reps=10) -> BANEResult: +def sigmaclip(arr, lo, hi, reps=10) -> BANEResult: """ Perform sigma clipping on an array, ignoring non finite values. From f627448f4e52822de1dc6847a861e87215056b48 Mon Sep 17 00:00:00 2001 From: tgalvin Date: Fri, 10 May 2024 16:09:16 +0800 Subject: [PATCH 07/10] added finite check --- AegeanTools/BANE.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/AegeanTools/BANE.py b/AegeanTools/BANE.py index 0c1a6e28..40970139 100755 --- a/AegeanTools/BANE.py +++ b/AegeanTools/BANE.py @@ -133,10 +133,13 @@ def adaptive_box_estimate( result = mode.perform(data=new) - if result.valid_pixels > 0.8 * original_pixels or loop > max_loop: + if loop >= max_loop: + break + if all(np.isfinite(result)) and result.valid_pixels > 0.9 * len(new): break loop += 1 + print(f"increasing, {loop=} {row=} {column=}") rms = result.rms @@ -402,6 +405,7 @@ def filter_mc_sharemem(filename, step_size, box_size, cores, shape, mode: ClippingModes = BANE_MODE_MAPPINGS[mode.lower()](**mode_kwargs) adaptive_loop = 5 if adaptive_box else 0 + logging.info(f"Adaptive box resize loops: {adaptive_loop}") args = [] for region in zip(ymins, ymaxs): From 0fd986263816ac397cf8705f35c546553a30f15c Mon Sep 17 00:00:00 2001 From: tgalvin Date: Fri, 10 May 2024 16:21:59 +0800 Subject: [PATCH 08/10] removed bad tim --- AegeanTools/BANE.py | 1 - 1 file changed, 1 deletion(-) diff --git a/AegeanTools/BANE.py b/AegeanTools/BANE.py index 40970139..ad5656dc 100755 --- a/AegeanTools/BANE.py +++ b/AegeanTools/BANE.py @@ -121,7 +121,6 @@ def adaptive_box_estimate( original_box = box_size - original_pixels = np.prod(original_box) loop = 0 while True: test_box_size = tuple([b+loop*100 for b in original_box]) From f30237ae61c33ae2cbf875116a8918c49c82aa0f Mon Sep 17 00:00:00 2001 From: tgalvin Date: Wed, 15 May 2024 15:23:48 +0800 Subject: [PATCH 09/10] added git gaussian cdf --- AegeanTools/BANE.py | 9 +++-- AegeanTools/sigma.py | 94 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 99 insertions(+), 4 deletions(-) diff --git a/AegeanTools/BANE.py b/AegeanTools/BANE.py index ad5656dc..7c1f2a9a 100755 --- a/AegeanTools/BANE.py +++ b/AegeanTools/BANE.py @@ -23,7 +23,7 @@ from .fits_tools import compress from .sigma import ( - FittedSigmaClip, fit_bkg_rms_estimate, sigmaclip, fitted_sigma_clip, SigmaClip, FitBkgRmsEstimate + FitGaussianCDF, FittedSigmaClip, SigmaClip, FitBkgRmsEstimate, FitGaussianCDF ) __author__ = 'Paul Hancock' @@ -33,11 +33,12 @@ BANE_MODE_MAPPINGS = dict( sigmaclip=SigmaClip, fitrmsbkgestimate=FitBkgRmsEstimate, - fittedsigmaclip=FittedSigmaClip + fittedsigmaclip=FittedSigmaClip, + fitgaussiancdf=FitGaussianCDF ) AVAILABLE_MODES = tuple(BANE_MODE_MAPPINGS.keys()) AVAILABLE_TYPES = tuple(BANE_MODE_MAPPINGS.values()) -ClippingModes: TypeAlias = Union[SigmaClip, FitBkgRmsEstimate, FittedSigmaClip] +ClippingModes: TypeAlias = Union[SigmaClip, FitBkgRmsEstimate, FittedSigmaClip, FitGaussianCDF] # global barrier for multiprocessing barrier = None @@ -144,6 +145,8 @@ def adaptive_box_estimate( rms = result.rms bkg = result.bkg + # print(f"{row} {column}") + return float(bkg), float(rms) diff --git a/AegeanTools/sigma.py b/AegeanTools/sigma.py index 8957823b..53bd0d09 100644 --- a/AegeanTools/sigma.py +++ b/AegeanTools/sigma.py @@ -1,11 +1,13 @@ """Tooling around the rms and background estimation""" from re import L -from typing import Tuple, NamedTuple, Optional +from typing import Tuple, NamedTuple, Optional, Callable import logging import numpy as np from scipy.stats import norm +from scipy.special import erf +from scipy.optimize import minimize from astropy.stats import sigma_clip class BANEResult(NamedTuple): @@ -17,6 +19,96 @@ class BANEResult(NamedTuple): valid_pixels: int """Number of pixels constrained against""" + +class FitGaussianCDF(NamedTuple): + """Options for the fitting approach method""" + linex_args: Tuple[float,float] = (0.1, 1.0) + + def perform(self, data: np.ndarray) -> BANEResult: + return fit_gaussian_cdf(data=data, linex_args=self.linex_args) + + +def softmax(x: np.ndarray, offset: float=0) -> np.ndarray: + """Activation function""" + return np.log(1+np.exp(x + offset)) + +def gaussian_cdf(x: np.ndarray, mu: float, sig: float) -> np.ndarray: + """Calculate teh cumulative distribution given a mu and sig across x-values""" + top = x - mu + bottom = 1.412 * sig + + return 0.5 * (1. + erf(top / bottom)) + + +def linear_squared(x: np.ndarray, a: float, b: float) -> np.ndarray: + """Non-linear loss function. Residuals below zero are scaled via the + linear gradient `a`. The squared-residual of residual values above + zero are scaled by factor `b`. + + Parameters + ---------- + x : np.ndarray + Residuals to scale via this linear-squared function + a : float + Gradient to apply to `x` values below zero + b : float + Scale to apply to the squared `x` values when `x` is above zero + + Returns + ------- + np.ndarray + Cost of each residual. This vector should be summed and treated as the loss when optimizing + """ + + mask = x < 0 + + return (a * -x + 1) * mask + (b * (1. + x))** 2. * ~mask + + x = x.copy() + + x[mask] = a * -x[mask] + 1 + x[~mask] = b * (1+x[~mask])**2 + + return x + +def make_fitness( + x: np.ndarray, y: np.ndarray, a: float, b: float +) -> Callable: + """Creates the cost function on the (x,y) values that will be used throughout minimisation. """ + + def loss_function(args): + mu, sig, scalar = args + + s = np.sum( + linear_squared( + gaussian_cdf(x, mu, sig) * softmax(scalar) - y, + a, + b, + ) + ) + + return s + return loss_function + +def fit_gaussian_cdf(data: np.ndarray, linex_args: Tuple[float,float] = (0.1, 1.0)) -> BANEResult: + + data = data[np.isfinite(data)].flatten() + + sort_data = np.sort(data) + cdf_data = np.arange(start=0, stop=1, step=1./len(sort_data)) + + func = make_fitness(sort_data, cdf_data, *linex_args) + + res = minimize( + func, (0, 0.0001, 0.8), + # tol=1e-7, + method='Nelder-Mead' + ) + + bane_result = BANEResult(rms=res.x[1], bkg=res.x[0], valid_pixels=int(res.x[2] * len(data))) + + return bane_result + class FittedSigmaClip(NamedTuple): """Arguments for the fitted_sigma_clip""" sigma: int = 3 From dcacdb3dc27af0cffb61431ad52ef2e6c179eb07 Mon Sep 17 00:00:00 2001 From: tgalvin Date: Wed, 15 May 2024 16:20:22 +0800 Subject: [PATCH 10/10] removed offset=0 from softmax --- AegeanTools/BANE.py | 2 +- AegeanTools/sigma.py | 11 ++--------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/AegeanTools/BANE.py b/AegeanTools/BANE.py index 7c1f2a9a..fc1db625 100755 --- a/AegeanTools/BANE.py +++ b/AegeanTools/BANE.py @@ -145,7 +145,7 @@ def adaptive_box_estimate( rms = result.rms bkg = result.bkg - # print(f"{row} {column}") + print(f"{row} {column}") return float(bkg), float(rms) diff --git a/AegeanTools/sigma.py b/AegeanTools/sigma.py index 53bd0d09..d432a12a 100644 --- a/AegeanTools/sigma.py +++ b/AegeanTools/sigma.py @@ -28,9 +28,9 @@ def perform(self, data: np.ndarray) -> BANEResult: return fit_gaussian_cdf(data=data, linex_args=self.linex_args) -def softmax(x: np.ndarray, offset: float=0) -> np.ndarray: +def softmax(x: np.ndarray) -> np.ndarray: """Activation function""" - return np.log(1+np.exp(x + offset)) + return np.log(1+np.exp(x)) def gaussian_cdf(x: np.ndarray, mu: float, sig: float) -> np.ndarray: """Calculate teh cumulative distribution given a mu and sig across x-values""" @@ -64,13 +64,6 @@ def linear_squared(x: np.ndarray, a: float, b: float) -> np.ndarray: return (a * -x + 1) * mask + (b * (1. + x))** 2. * ~mask - x = x.copy() - - x[mask] = a * -x[mask] + 1 - x[~mask] = b * (1+x[~mask])**2 - - return x - def make_fitness( x: np.ndarray, y: np.ndarray, a: float, b: float ) -> Callable: