From 96e17dafd62ce3ec6d3fa3677dfdecf659d73805 Mon Sep 17 00:00:00 2001 From: Dieter Werthmuller Date: Fri, 14 Feb 2025 10:25:59 +0100 Subject: [PATCH] Implement feedback Femke --- docs/paper.bib | 55 +++++++++++++++++++++++-- docs/paper.md | 106 +++++++++++++++++++++++++++---------------------- 2 files changed, 110 insertions(+), 51 deletions(-) diff --git a/docs/paper.bib b/docs/paper.bib index d016eb1..60e0362 100644 --- a/docs/paper.bib +++ b/docs/paper.bib @@ -5,7 +5,8 @@ @ARTICLE{esmda YEAR = {2013}, VOLUME = {55}, PAGES = {3-15}, - DOI = {10.1016/j.cageo.2012.03.011} + DOI = {10.1016/j.cageo.2012.03.011}, + URL = {https://doi.org/10.1016/j.cageo.2012.03.011}, } @ARTICLE{SciPy, @@ -28,8 +29,8 @@ @ARTICLE{SciPy YEAR = {2020}, VOLUME = {17}, PAGES = {261--272}, - ADSURL = {https://rdcu.be/b08Wh}, DOI = {10.1038/s41592-019-0686-2}, + URL = {https://doi.org/10.1038/s41592-019-0686-2}, } @ARTICLE{NumPy, @@ -52,7 +53,7 @@ @ARTICLE{NumPy PAGES = {357--362}, DOI = {10.1038/s41586-020-2649-2}, PUBLISHER = {Springer Science and Business Media {LLC}}, - URL = {https://doi.org/10.1038/s41586-020-2649-2} + URL = {https://doi.org/10.1038/s41586-020-2649-2}, } @ARTICLE{opendarts, @@ -80,5 +81,51 @@ @SOFTWARE{pyesmda PUBLISHER = {Zenodo}, VERSION = {v0.3.2}, DOI = {10.5281/zenodo.7425670}, - URL = {https://doi.org/10.5281/zenodo.7425670} + URL = {https://doi.org/10.5281/zenodo.7425670}, +} + +@ARTICLE{burgers, + AUTHOR = {Gerrit Burgers and Peter Jan van Leeuwen and Geir Evensen}, + TITLE = {Analysis Scheme in the Ensemble {K}alman Filter}, + JOURNAL = {Monthly Weather Review}, + YEAR = {1998}, + PUBLISHER = {American Meteorological Society}, + VOLUME = {126}, + NUMBER = {6}, + DOI = {10.1175/1520-0493(1998)126<1719:ASITEK>2.0.CO;2}, + PAGES = {1719 - 1724}, + URL = {https://doi.org/10.1175/1520-0493(1998)126<1719:ASITEK>2.0.CO;2}, +} + +@ARTICLE{saifullin, + TITLE = {Integrating geomechanical proxy models with data assimilation for energy transition applications}, + JOURNAL = {Geomechanics for Energy and the Environment}, + VOLUME = {40}, + PAGES = {100618}, + YEAR = {2024}, + ISSN = {2352-3808}, + DOI = {10.1016/j.gete.2024.100618}, + URL = {https://doi.org/10.1016/j.gete.2024.100618}, + AUTHOR = {Ilshat Saifullin and Gabriel Serrão Seabra and Anne Pluymakers and Femke C. Vossepoel and Denis Voskov}, +} + +@ARTICLE{seabra, + TITLE = {AI enhanced data assimilation and uncertainty quantification applied to Geological Carbon Storage}, + JOURNAL = {International Journal of Greenhouse Gas Control}, + VOLUME = {136}, + PAGES = {104190}, + YEAR = {2024}, + ISSN = {1750-5836}, + DOI = {10.1016/j.ijggc.2024.104190}, + URL = {https://doi.org/10.1016/j.ijggc.2024.104190}, + AUTHOR = {Gabriel Serrão Seabra and Nikolaj T. Mücke and Vinicius Luiz Santos Silva and Denis Voskov and Femke C. Vossepoel}, +} + +@BOOK{evensen, + AUTHOR = {Geir Evensen and Femke C. Vossepoel and Peter Jan van Leeuwen}, + TITLE = {Data Assimilation Fundamentals}, + PUBLISHER = {Springer}, + YEAR = {2022}, + DOI = {10.1007/978-3-030-96709-3}, + URL = {https://doi.org/10.1007/978-3-030-96709-3}, } diff --git a/docs/paper.md b/docs/paper.md index a591998..24df78e 100644 --- a/docs/paper.md +++ b/docs/paper.md @@ -23,38 +23,41 @@ affiliations: index: 2 - name: Petroleo Brasileiro S.A. (Petrobras), BR index: 3 -date: 31 December 2024 +date: 14 February 2025 bibliography: paper.bib --- # Summary -Data Assimilation (DA) combines computer models with real-world measurements to -improve predictions. The Python package `dageo` is a tool to apply DA in -geoscience applications. Currently, it implements the Ensemble Smoother with -Multiple Data Assimilation (ESMDA) method [@esmda] and provides tools for -reservoir engineering applications. The package includes localization for -refined updates, gaussian random field generation for realistic permeability -modeling, and integration capabilities with external simulators. - -An additional feature of `dageo` is an educational, two-dimensional -single-phase reservoir simulator that models pressure changes over time and -well behavior for both injection and production scenarios. This simulator is -particularly useful for educational purposes, providing a practical platform -for students and researchers to learn and experiment with DA concepts and -techniques. The software is well documented, with examples that guide users -through learning ESMDA concepts, testing new ideas, and applying methods to -real-world problems. +Data Assimilation combines computer models with real-world measurements to +improve estimates and forecasts of dynamical systems such as oceans, +atmosphere, and subsurface reservoirs. The Python package `dageo` is a tool to +apply data assimilation in geoscience applications. Currently, it encompasses +the Ensemble Smoother with Multiple Data Assimilation (ESMDA) method [@esmda] +and provides tools for reservoir engineering applications. The package includes +localization to help with relatively small ensambles, Gaussian random field +generation for realistic permeability modeling, and integration capabilities +with external simulators. + +An additional feature of `dageo` is a two-dimensional single-phase reservoir +simulator that models pressure changes over time and well behavior for both +injection and production scenarios. This simulator is particularly useful for +educational purposes, providing a practical platform for students and +researchers to learn and experiment with data assimilation concepts and +techniques. The software features an online documentation, with examples that +guide users through learning ESMDA concepts, testing new ideas, and applying +methods to real-world problems. # ESMDA -ESMDA is the first implemented method, out of the current need of the authors. -However, `dageo` is general enough so that other DA methods can and will be -added easily at a later stage. While ESMDA is theoretically straightforward, -practical implementation requires careful handling of matrix operations, -ensemble management, and numerical stability. The algorithm works by -iteratively updating an ensemble of model parameters to match observed data, +ESMDA is the first data assimilation method implemented in `dageo`, out of the +current need of the authors. However, `dageo` is general enough so that other +data assimilation methods can and will be added easily at a later stage. While +ESMDA is theoretically straightforward, practical implementation requires +careful handling of matrix operations, ensemble management, and ensuring +numerical stability. The algorithm works by iteratively updating an ensemble of +model parameters to match observed data following $$ z_j^a = z_j^f + C_\text{ZD}^f \left(C_\text{DD}^f + \alpha C_\text{D} @@ -64,13 +67,21 @@ $$ where $z^a$ represents the updated (analysis) parameters, $z^f$ the prior (forecast) parameters, and the $C$ terms represent various covariance matrices for the data and the model parameters (subscripts D and Z, respectively). The -ESMDA coefficient (or inflation factor) is denoted by $\alpha$, the -predicted data vector is $d^f$ and $d_{\text{uc}}$ represents the perturbed observations for the j-th ensemble member, generated by adding random noise to the original observations for each iteration, as proposed in the original ESMDA method. The equation is evaluated for $i$ data assimilation steps, where -$i$ is typically a low number between 4 to 10. The $\alpha$ can change in each -step, as long as $\sum_i \frac{1}{\alpha_i} = 1$. Common are either constant -values or series of decreasing values. The algorithm's implementation in -`dageo` includes optimizations for computational efficiency and numerical -stability. +ESMDA coefficient (or inflation factor) is denoted by $\alpha$, the predicted +data vector is $d^f$ and $d_{\text{uc}}$ represents the perturbed observations +[@burgers] for the j-th ensemble member, generated by adding random noise to +the original observations for each iteration, as proposed in the original ESMDA +method. Note that we assume to have an identity observation operator that +translates the model state to the equivalent of the observations, so it is +omitted in the equation (for more details in this regard see @evensen). The +equation is evaluated for $i$ steps, where $i$ is typically a low number +between 4 to 10. The $\alpha$ can change in each step, as long as $\sum_i +\frac{1}{\alpha_i} = 1$. Common are either constant values or series of +decreasing values. Note that while this explanation describes the parameter +estimation problem, it could also be used to estimate the state estimation or +both. The algorithm's implementation in `dageo` includes optimizations for +efficient computation of the covariance matrix and allows to easily parallelize +the forward model. # Key Features and Applications @@ -78,28 +89,29 @@ stability. Existing implementations often lack documentation and informative examples, creating barriers for newcomers. These challenges are addressed in `dageo` through several key innovations: it provides a robust, tested ESMDA -implementation alongside a built-in, simple reservoir simulator, while -offering, as a key feature, integration capabilities with external simulators. -The gallery contains an example of this integration with the \emph{open Delft -Advanced Research Terra Simulator} `open-DARTS` [@opendarts], a -state-of-the-art, open-source reservoir simulation framework developed at TU -Delft. It demonstrates how `dageo` can be used with industry-standard -simulators while maintaining its user-friendly interface. The code itself is -light, building upon NumPy arrays [@NumPy] and sparse matrices provided by -SciPy [@SciPy], as only dependencies. +implementation alongside a built-in, simple reservoir simulator, while offering +and showcasing in the gallery, as a key feature, integration capabilities with +external simulators. The gallery contains an example of this integration with +the \emph{open Delft Advanced Research Terra Simulator} `open-DARTS` +[@opendarts], a state-of-the-art, open-source reservoir simulation framework +developed at TU Delft. It demonstrates how `dageo` can be used with +industry-standard simulators while maintaining its user-friendly interface. The +code itself is light, building upon NumPy arrays [@NumPy] and sparse matrices +provided by SciPy [@SciPy], as only dependencies. While other ESMDA implementations exist, e.g., `pyesmda` [@pyesmda], `dageo` distinguishes itself through comprehensive documentation and examples, the integration of a simple but practical reservoir simulator, the implementation of advanced features like localization techniques for parameter updates, -gaussian random field generation for realistic permeability modeling, and a -focus on educational applications. This makes `dageo` a unique and valuable -tool for both research and teaching. The software has been used in several -research projects, including reservoir characterization studies at TU Delft, -integration with the open-DARTS simulator for geothermal applications, and -educational workshops on data assimilation techniques. These applications -highlight the software's versatility and its ability to address a wide range of -challenges in reservoir engineering and geoscience. +Gaussian random field generation for realistic permeability modeling, and a +focus on ease of use, making it suitable for educational applications. This +makes `dageo` a unique and valuable tool for both research and teaching. The +software has been used in several research projects, including reservoir +characterization studies at TU Delft, integration with the open-DARTS simulator +for geothermal applications, and educational workshops on data assimilation +techniques [e.g., @saifullin; @seabra]. These applications highlight the +software's versatility and its ability to address a wide range of challenges in +reservoir engineering and geoscience. # Acknowledgements