From 8f5461192d69a11934ec9c81ab7a1a4bdcbacbd3 Mon Sep 17 00:00:00 2001 From: klbonne Date: Tue, 25 Oct 2022 13:11:49 -0400 Subject: [PATCH 01/27] Fixing issues in docstrings that cause warnings/errors in html build process. Renamed an image file with better extension (PNG -> png). Added __init__.py file in iv.models for module recognition. --- ...xtractor.PNG => diode_param_extractor.png} | Bin pvops/iv/extractor.py | 18 +- pvops/iv/models/__init__.py | 0 pvops/iv/physics_utils.py | 15 +- pvops/iv/preprocess.py | 16 + pvops/iv/simulator.py | 342 +++++++++--------- pvops/text/classify.py | 2 - pvops/text/defaults.py | 4 - pvops/text/nlp_utils.py | 17 +- pvops/text/preprocess.py | 8 - pvops/text/utils.py | 7 +- pvops/text/visualize.py | 29 +- pvops/text2time/preprocess.py | 14 - pvops/text2time/utils.py | 39 +- pvops/text2time/visualize.py | 8 - pvops/timeseries/models/AIT.py | 32 +- pvops/timeseries/models/linear.py | 2 - pvops/timeseries/preprocess.py | 12 +- 18 files changed, 258 insertions(+), 307 deletions(-) rename examples/assets/{diode_param_extractor.PNG => diode_param_extractor.png} (100%) create mode 100644 pvops/iv/models/__init__.py diff --git a/examples/assets/diode_param_extractor.PNG b/examples/assets/diode_param_extractor.png similarity index 100% rename from examples/assets/diode_param_extractor.PNG rename to examples/assets/diode_param_extractor.png diff --git a/pvops/iv/extractor.py b/pvops/iv/extractor.py index 219dcca..1c9b50f 100644 --- a/pvops/iv/extractor.py +++ b/pvops/iv/extractor.py @@ -14,7 +14,7 @@ class BruteForceExtractor(): '''Process measured IV curves - Requires a set of curves to create Isc vs Irr and Voc vs Temp vs Isc(Irr) + Requires a set of curves to create Isc vs Irr and Voc vs Temp vs Isc(Irr) ''' def __init__(self, input_df, current_col, voltage_col, irradiance_col, temperature_col, T_type, windspeed_col=None, @@ -235,16 +235,18 @@ def f_multiple_samples(self, params): return msse_tot def fit_params(self, cell_parameters, n_mods, bounds_func, user_func=None, verbose=0): - """Fit diode parameters from a set of IV curves. + """ + Fit diode parameters from a set of IV curves. Parameters - ---------- cell_parameters : dict - Cell-level parameters, usually extracted from the CEC database, which will be used as the + Cell-level parameters, usually extracted from the CEC + database, which will be used as the initial guesses in the optimization process. n_mods : int - if int, defines the number of modules in a string(1=simulate a single module) + if int, defines the number of modules in a + string(1=simulate a single module) bounds_func : function Function to establish the bounded search space See below for an example: @@ -259,8 +261,10 @@ def bounds_func(iph,io,rs,rsh,nnsvth,perc_adjust=0.5): (nnsvth - 10*nnsvth*perc_adjust, nnsvth + 10*nnsvth*perc_adjust)) user_func : function - Optional, a function similar to `self.create_string_object` which has the following inputs: - `self, iph, io, rs, rsh, nnsvth`. This can be used to extract unique failure parameterization. + Optional, a function similar to `self.create_string_object` + which has the following inputs: + `self, iph, io, rs, rsh, nnsvth`. This can be used to + extract unique failure parameterization. verbose : int if verbose >= 1, print information about fitting if verbose >= 2, plot information about each iteration diff --git a/pvops/iv/models/__init__.py b/pvops/iv/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pvops/iv/physics_utils.py b/pvops/iv/physics_utils.py index 3b1bf66..80e3c45 100644 --- a/pvops/iv/physics_utils.py +++ b/pvops/iv/physics_utils.py @@ -12,7 +12,6 @@ def calculate_IVparams(v, c): hardcoded regions. Parameters - ---------- x : numpy array X-axis data @@ -24,7 +23,6 @@ def calculate_IVparams(v, c): Optional, polyfit degree Returns - ------- Dictionary of IV curve parameters """ @@ -78,7 +76,6 @@ def smooth_curve(x, y, npts=50, deg=12): """Smooth curve using a polyfit Parameters - ---------- x : numpy array X-axis data @@ -90,7 +87,6 @@ def smooth_curve(x, y, npts=50, deg=12): Optional, polyfit degree Returns - ------- smoothed x array smoothed y array @@ -105,7 +101,6 @@ def iv_cutoff(Varr, Iarr, val): """Cut IV curve greater than voltage `val` (usually 0) Parameters - ---------- V: numpy array Voltage array @@ -115,7 +110,6 @@ def iv_cutoff(Varr, Iarr, val): Filter threshold Returns - ------- V_cutoff, I_cutoff """ @@ -128,7 +122,6 @@ def intersection(x1, y1, x2, y2): Adapted from https://stackoverflow.com/a/5462917 Parameters - ---------- x1: numpy array X-axis data for curve 1 @@ -140,7 +133,6 @@ def intersection(x1, y1, x2, y2): Y-axis data for curve 2 Returns - ------- intersection coordinates """ @@ -240,18 +232,17 @@ def T_to_tcell(POA, T, WS, T_type, a=-3.56, b=-0.0750, delTcnd=3): WS: numerical, Wind speed at height of 10 meters, in m/s a,b,delTcnd: numerical, - Page 12 in Ref. [1]_ + Page 12 in Ref. [Dierauf]_ T_type: string, Describe input temperature, either 'ambient' or 'module' Returns - ------- numerical Cell temperature, in Celcius - .. [1] Dierauf, Timothy, et al. Weather-corrected performance ratio. No. NREL/TP-5200-57991. + .. [Dierauf] Dierauf, Timothy, et al. Weather-corrected performance ratio. No. NREL/TP-5200-57991. National Renewable Energy Lab.(NREL), Golden, CO (United States), 2013. https://www.nrel.gov/docs/fy13osti/57991.pdf ''' @@ -383,7 +374,6 @@ def gt_correction(v, i, gact, tact, cecparams, n_units=1, option=3): Three correction options are provided, two of which are from an IEC standard. Parameters - ---------- v : numpy array Voltage array @@ -401,7 +391,6 @@ def gt_correction(v, i, gact, tact, cecparams, n_units=1, option=3): Correction method choice. See method for specifics. Returns - ------- vref Corrected voltage array diff --git a/pvops/iv/preprocess.py b/pvops/iv/preprocess.py index 276152a..ba66959 100644 --- a/pvops/iv/preprocess.py +++ b/pvops/iv/preprocess.py @@ -7,6 +7,22 @@ def preprocess(input_df, resmpl_resolution, iv_col_dict, resmpl_cutoff=0.03, correct_gt=False, normalize_y=True, CECmodule_parameters=None, n_mods=None, gt_correct_option=3): """IV processing function which supports irradiance & temperature correction + + Parameters + ---------- + input_df : DataFrame + resmpl_resolution : + iv_col_dict : + resmpl_cutoff : float + correct_gt : bool + normalize_y : bool + CECmodule_parameters : + n_mods : int + gt_correct_option : int + + Results + ------- + df : DataFrame """ current_col = iv_col_dict["current"] diff --git a/pvops/iv/simulator.py b/pvops/iv/simulator.py index 7e97220..4801e17 100644 --- a/pvops/iv/simulator.py +++ b/pvops/iv/simulator.py @@ -10,14 +10,13 @@ import random from tqdm import tqdm import pvlib -from utils import get_CEC_params -from physics_utils import voltage_pts, add_series, bypass -from physics_utils import intersection, iv_cutoff, gt_correction +from pvops.iv.utils import get_CEC_params +from pvops.iv.physics_utils import voltage_pts, add_series, bypass,\ + intersection, iv_cutoff, gt_correction class Simulator(): - """ - An object which simulates Photovoltaic (PV) current-voltage (IV) curves wth failures + """An object which simulates Photovoltaic (PV) current-voltage (IV) curves with failures Parameters ---------- @@ -26,63 +25,59 @@ class Simulator(): in the the CEC database. The `key` in this dictionary is the name of the module in the CEC database. The `values` are `ncols`, which is the number of columns in the module, and `nsubstrings`, which is the number of substrings. - { - key: Module name in CEC database - { - 'ncols': int - 'nsubstrings': int - } - } pristine_condition : dict - Define the pristine condition - A full condition is defined as: - {'identifier': IDENTIFIER_NAME, - 'E': IRRADIANCE, - 'Tc': CELL_TEMPERATURE, - 'Rsh_mult': RSH_MULTIPLIER, - 'Rs_mult': RS_MULTIPLIER, - 'Io_mult': IO_MULTIPLIER, - 'Il_mult': IL_MULTIPLIER, - 'nnsvth_mult': NNSVTH_MULTIPLIER, - 'modname': MODULE_NAME_IN_CECDB - } - - DICTIONARY PARAMETERS: - ---------------------- - IDENTIFIER_NAME: str, - Name used to define condition - IRRADIANCE: numerical, - Value of irradiance (Watts per meter-squared) - CELL_TEMPERATURE: numerical, - Value of cell temperature (Celcius) - RSH_MULTIPLIER: numerical, - Multiplier usually less than 1 to simulate a drop in Rsh - RS_MULTIPLIER: numerical, - Multiplier usually greater than 1 to simulate increase in Rs - IO_MULTIPLIER: numerical, - Multiplier usually less than 1 to simulate a drop in IO - IL_MULTIPLIER: numerical, - Multiplier usually less than 1 to simulate a drop in IL - NNSVTH_MULTIPLIER: numerical, - Multiplier usually less than 1 to simulate a drop in NNSVTH, and therefore a_ref - MODULE_NAME_IN_CECDB: str - Module name in CEC database (e.g. Jinko_Solar_Co___Ltd_JKMS260P_60) + Defines the pristine condition. + A full condition is defined as a dictionary with the + following key/value pairs: + + - 'identifier': IDENTIFIER_NAME, + - 'E': IRRADIANCE, + - 'Tc': CELL_TEMPERATURE, + - 'Rsh_mult': RSH_MULTIPLIER, + - 'Rs_mult': RS_MULTIPLIER, + - 'Io_mult': IO_MULTIPLIER, + - 'Il_mult': IL_MULTIPLIER, + - 'nnsvth_mult': NNSVTH_MULTIPLIER, + - 'modname': MODULE_NAME_IN_CECDB + + IDENTIFIER_NAME: str, + Name used to define condition + IRRADIANCE: numerical, + Value of irradiance (Watts per meter-squared) + CELL_TEMPERATURE: numerical, + Value of cell temperature (Celcius) + RSH_MULTIPLIER: numerical, + Multiplier usually less than 1 to simulate a drop in Rsh + RS_MULTIPLIER: numerical, + Multiplier usually greater than 1 to simulate increase in Rs + IO_MULTIPLIER: numerical, + Multiplier usually less than 1 to simulate a drop in IO + IL_MULTIPLIER: numerical, + Multiplier usually less than 1 to simulate a drop in IL + NNSVTH_MULTIPLIER: numerical, + Multiplier usually less than 1 to simulate a drop in NNSVTH, and therefore a_ref + MODULE_NAME_IN_CECDB: str + Module name in CEC database (e.g. Jinko_Solar_Co___Ltd_JKMS260P_60) replacement_5params : dict Optional, replace the definitions of the five electrical parameters, which normally are extracted from the CEC database. These parameters can be determined by the :py:class:`IVProcessor` class - replacement_5params = {'I_L_ref': None, - 'I_o_ref': None, - 'R_s': None, - 'R_sh_ref': None, - 'a_ref': None} + Key/value pairs: + + - 'I_L_ref': None + - 'I_o_ref': None + - 'R_s': None + - 'R_sh_ref': None + - 'a_ref': None simulation_method : int Module simulation method (1 or 2) - 1 : Avalanche breakdown model, as hypothesized in Ref. [1]_ - 2 : Add-on to method 1, includes a rebalancing of the $I_sc$ prior to adding in series + + 1) Avalanche breakdown model, as hypothesized in Ref. [1]_ + + 2) : Add-on to method 1, includes a rebalancing of the $I_sc$ prior to adding in series .. [1] "Computer simulation of the effects of electrical mismatches in photovoltaic cell interconnection circuits" JW Bishop, Solar Cell (1988) DOI: 10.1016/0379-6787(88)90059-2 @@ -90,13 +85,15 @@ class Simulator(): Attributes ---------- multilevel_ivdata : dict - Dictionary containing the simulated IV curves - For nth-definition of string curves, - multilevel_ivdata['string']['STRING IDENTIFIER'][n] - For nth-definition of module curves, - multilevel_ivdata['module']['MODULE IDENTIFIER'][n] - For nth-definition of substring (substr_id= 1,2,3,...) curves, - multilevel_ivdata['module']['MODULE IDENTIFIER']['substr{sbstr_id}'][n] + Dictionary containing the simulated IV curves + + - For nth-definition of string curves, + multilevel_ivdata['string']['STRING IDENTIFIER'][n] + - For nth-definition of module curves, + multilevel_ivdata['module']['MODULE IDENTIFIER'][n] + - For nth-definition of substring (substr_id = 1,2,3,...) curves, + multilevel_ivdata['module']['MODULE IDENTIFIER']['substr{sbstr_id}'][n] + pristine_condition : dict Dictionary of conditions defining the pristine case module_parameters : dict @@ -106,90 +103,110 @@ class Simulator(): Methods ------- - add_preset_conditions(fault_name, fault_condition, save_name = None, **kwargs) - Define a failure condition using a preset condition. See :py:mod:`add_preset_conditions` + + add_preset_conditions(fault_name, fault_condition, save_name = None, kwargs) + Define a failure condition using a preset condition. See :py:meth:`add_preset_conditions` + add_manual_conditions(modcell, condition_dict) - Define a failure by passing in modcell and cell condition definitons manually. See :py:mod:`add_manual_conditions` + Define a failure by passing in modcell and cell condition definitons manually. See :py:meth:`add_manual_conditions` + generate_many_samples(identifier, N, distributions = None, default_sample = None) Generate `N` more definitions of the same failure cell condition by defining parameter `distributions` + build_strings(config_dict) - Define a string as a list of modules which were defined in :py:mod:`add_preset_conditions` or :py:mod:`add_manual_conditions` + Define a string as a list of modules which were defined in :py:meth:`add_preset_conditions` or :py:meth:`add_manual_conditions` + simulate(sample_limit) Simulate cell, substring, module, and string-level definitions + print_info() Display the number of definitions on the cell, module, and string levels + visualize(lim = False) Visualize the definitions and render parameter distributions Process ------- A `pristine` condition is created automatically - 1. Specify failure conditions either by + + - Specify failure conditions either by + 1) add a preset configuration - add_preset_conditions('complete', fault_condition) - add_preset_conditions('landscape', fault_condition, rows_aff = 2) - add_preset_conditions('portrait', fault_condition, cols_aff = 2) - add_preset_conditions('pole', fault_condition, width = 2, pos = None) - add_preset_conditions('bird_droppings', fault_condition, n_droppings = None) + + - ``add_preset_conditions('complete', fault_condition)`` + - ``add_preset_conditions('landscape', fault_condition, rows_aff = 2)`` + - ``add_preset_conditions('portrait', fault_condition, cols_aff = 2)`` + - ``add_preset_conditions('pole', fault_condition, width = 2, pos = None)`` + - ``add_preset_conditions('bird_droppings', fault_condition, n_droppings = None)`` 2) add a manual configuration - add_manual_conditions(modcell, condition_dict) + + - add_manual_conditions(modcell, condition_dict) + 3) both - 2. (Optional) Generate many definitions of a cell condition - generate_many_samples(identifier, N, distributions = None, default_sample = None) - 3. (Optional) Define a string as a list of modules - build_strings(config_dict) - 4. Simulate all levels of the designed PV system - simulate(sample_limit) - 5. (Optional) Display information about the system - print_info() - visualize(lim = False) - 6. Access simulations for your intended use + + - (Optional) Generate many definitions of a cell condition + ``generate_many_samples(identifier, N, distributions = None, default_sample = None)`` + + - (Optional) Define a string as a list of modules + ``build_strings(config_dict)`` + + - Simulate all levels of the designed PV system + ``simulate(sample_limit)`` + + - (Optional) Display information about the system + ``print_info()`` + ``visualize(lim = False)`` + + - Access simulations for your intended use + 1) Export simulations as dataframe, which has columns: - df = sims_to_df(cutoff=False) + ``df = sims_to_df(cutoff=False)`` + 2) Access simulations manually - Inspect `Simulator().multilevel_ivdata` - See `Attributes` above for information on multilevel_ivdata. + Inspect ``Simulator().multilevel_ivdata`` + See `Attributes` above for information on multilevel_ivdata. Example ------- - sim = Simulator( - mod_specs = { - 'Jinko_Solar_Co___Ltd_JKM270PP_60': {'ncols': 6, - 'nsubstrings': 3 - } - }, - pristine_condition = { - 'identifier': 'pristine', - 'E': 1000, - 'Tc': 50, - 'Rsh_mult': 1, - 'Rs_mult': 1, - 'Io_mult': 1, - 'Il_mult': 1, - 'nnsvth_mult': 1, - }, - # Optional, Determined by IVProcessor() - replacement_5params = {'I_L_ref': 9.06157444e+00, - 'I_o_ref': 1.67727320e-10, # 0.3e-10, - 'R_s': 5.35574950e-03, - 'R_sh_ref': 3.03330425e+00, - 'a_ref': 2.54553421e-02} - ) - - condition = {'identifier':'light_shade','E':925} - sim.add_preset_conditions('complete', condition, save_name = f'Complete_lightshading') - - sim.build_strings({'Partial_lightshading': ['pristine']*6 + ['Complete_lightshading']*6}) - - sim.simulate() - - sim.print_info() - - # Look at a result! - Vsim = sim.multilevel_ivdata['string']['Partial_lightshading']['V'][0] - Isim = sim.multilevel_ivdata['string']['Partial_lightshading']['I'][0] - + .. code-block:: python + + sim = Simulator( + mod_specs = { + 'Jinko_Solar_Co___Ltd_JKM270PP_60': {'ncols': 6, + 'nsubstrings': 3 + } + }, + pristine_condition = { + 'identifier': 'pristine', + 'E': 1000, + 'Tc': 50, + 'Rsh_mult': 1, + 'Rs_mult': 1, + 'Io_mult': 1, + 'Il_mult': 1, + 'nnsvth_mult': 1, + }, + # Optional, Determined by IVProcessor() + replacement_5params = {'I_L_ref': 9.06157444e+00, + 'I_o_ref': 1.67727320e-10, # 0.3e-10, + 'R_s': 5.35574950e-03, + 'R_sh_ref': 3.03330425e+00, + 'a_ref': 2.54553421e-02} + ) + + condition = {'identifier':'light_shade','E':925} + sim.add_preset_conditions('complete', condition, save_name = f'Complete_lightshading') + + sim.build_strings({'Partial_lightshading': ['pristine']*6 + ['Complete_lightshading']*6}) + + sim.simulate() + + sim.print_info() + + # Look at a result! + Vsim = sim.multilevel_ivdata['string']['Partial_lightshading']['V'][0] + Isim = sim.multilevel_ivdata['string']['Partial_lightshading']['I'][0] """ def __init__(self, @@ -278,30 +295,36 @@ def add_preset_conditions(self, fault_name, fault_condition, save_name=None, **k ---------- fault_name: str Options: + - 'complete': entire module has fault_condition (e.g. Full module shading) - Requires no other specifications - e.g. add_preset_conditions('complete', fault_condition) + Requires no other specifications + e.g. add_preset_conditions('complete', fault_condition) - 'landscape': entire rows are affected by fault_condition (e.g. interrow shading) - Requires specification of rows_aff - e.g. add_preset_conditions('landscape', fault_condition, rows_aff = 2) + Requires specification of rows_aff + e.g. add_preset_conditions('landscape', fault_condition, rows_aff = 2) - 'portrait': entire columns are affected by fault_condition (e.g. vegetation growth shading) - Requires specification of cols_aff - e.g. add_preset_conditions('portrait', fault_condition, cols_aff = 2) + Requires specification of cols_aff + + - e.g. add_preset_conditions('portrait', fault_condition, cols_aff = 2) - 'pole': Place pole shadow over module - Requires specification of width (integer), which designates the width of main shadow and \\ - requires light_shading fault_condition specification which specifies less intense shading \\ - on edges of shadow - Optional: pos = (left, right) designates the start and end of the pole shading, - where left is number in the first column and right is number in last column - if pos not specified, the positions are chosen randomly - e.g. add_preset_conditions('pole', fault_condition, light_shading = light_fault_condition, width = 2, pos = (5, 56)) + Requires specification of width (integer), which designates the width of main shadow and \\ + requires light_shading fault_condition specification which specifies less intense shading \\ + on edges of shadow + + - Optional: pos = (left, right) designates the start and end of the pole shading, + where left is number in the first column and right is number in last column + if pos not specified, the positions are chosen randomly + e.g. add_preset_conditions('pole', fault_condition, light_shading = light_fault_condition, width = 2, pos = (5, 56)) - 'bird_droppings': Random positions are chosen for bird_dropping simulations - Optional specification is n_droppings. If not specified, chosen as random number between + + - Optional specification is n_droppings. If not specified, chosen as random number between 1 and the number of cells in a column e.g. add_preset_conditions('bird_droppings', fault_condition, n_droppings = 3) + fault_location: dict Same dict as one shown in __init__. - **kwargs: variables defined by which fault_name you choose, see above + + kwargs: variables dependent on which fault_name you choose, see above Tip: ---- @@ -376,17 +399,22 @@ def add_manual_conditions(self, modcell, condition_dict): modcell: dict Key: name of the condition Value: list, - 1D list: Give a single situation for this condition - 2D list: Give multiple situations for this condition - A list where each value signifies a cell's condition - See below for example + - 1D list: Give a single situation for this condition + - 2D list: Give multiple situations for this condition + - A list where each value signifies a cell's condition. + + See below for example. + If key is same as an existing key, the list is appended to list of scenarios \\ which that key owns condition_dict: dict Define the numerical value written in modcell - ** If the variable is not defined, values will default to those specified \\ - in the pristine condition, defined in __init__. + .. note:: + + If the variable is not defined, values will default to those specified \\ + in the pristine condition, defined in __init__. + A full condition is defined as: .. code-block:: python @@ -401,8 +429,7 @@ def add_manual_conditions(self, modcell, condition_dict): 'nnsvth_mult': NNSVTH_MULTIPLIER } - condition_dict PARAMETERS: - -------------------------- + Key/value pairs: ID: int, Value corresponding to those in modcell @@ -467,14 +494,12 @@ def _add_val_to_dict(self, d, k, v): """Utility function to conglomerate a dictionary with 2d lists as value Parameters - ---------- d : dict k : a key in `d` dictionary v : value to update at key Returns - ------- Dictionary with updated v value """ @@ -489,13 +514,11 @@ def _get_key_set(self, d, key): Dictionary[k] = key value Parameters - ---------- d : dict key : a key in `d` dictionary Returns - ------- A reformatted dictionary """ @@ -734,7 +757,6 @@ def sims_to_df(self, focus=['string', 'module'], cutoff=False): """Return the failure definitions as a dataframe. Parameters - ---------- focus : list of string Subset the definitions to a level of the system @@ -743,7 +765,6 @@ def sims_to_df(self, focus=['string', 'module'], cutoff=False): Cutoff curves to only return on positive voltage domain Returns - ------- Dataframe with columns: 'current': IV trace current @@ -840,7 +861,6 @@ def simulate(self, sample_limit=None): """Simulate the cell, substring, module, and string-level IV curves using the defined conditions Parameters - ---------- sample_limit : int Optional, used when want to restrict number of combinations of failures at the string level. @@ -882,7 +902,6 @@ def simulate_module(self, mod_key): """Wrapper method which simulates a module depending on the defined simulation_method. Parameters - ---------- mod_key : str Module name as defined in condiction_dict and modcells @@ -1436,8 +1455,17 @@ def build_strings(self, config_dict): """Pass a dictionary into object memory e.g. For 6 modules faulted with modcell specification 'complete' - config_dict = {'faulting_bottom_mods': ['pristine', 'pristine', 'pristine', 'pristine', 'pristine', 'pristine', - 'complete', 'complete', 'complete', 'complete', 'complete', 'complete']} + + .. code-block:: python + + config_dict = { + 'faulting_bottom_mods': [ + 'pristine', 'pristine', 'pristine', + 'pristine', 'pristine', 'pristine', + 'complete', 'complete', 'complete', + 'complete', 'complete', 'complete' + ] + } """ # print(config_dict) @@ -1618,7 +1646,6 @@ def visualize_specific_iv(self, ax=None, string_identifier=None, module_identifi If the object has multiple definitions, all definitions will be plotted Parameters - ---------- ax : matplotlib axes Optional, pass an axes to add visualization @@ -1635,7 +1662,6 @@ def visualize_specific_iv(self, ax=None, string_identifier=None, module_identifi Here, cutoff must also be True. Returns - ------- matplotlib axes """ @@ -1708,7 +1734,6 @@ def visualize_multiple_cells_traces(self, list_cell_identifiers, cutoff=True): """Visualize multiple cell traces Parameters - ---------- list_cell_identifiers : list list of cell identifiers. call `self.print_info()` for full list. @@ -1716,7 +1741,6 @@ def visualize_multiple_cells_traces(self, list_cell_identifiers, cutoff=True): If True, only visualize IV curves in positive voltage domain Returns - ------- matplotlib axes """ @@ -1780,7 +1804,6 @@ def visualize_cell_level_traces(self, cell_identifier, cutoff=True, table=True, """Visualize IV curves for cell_identifier and tabulate the definitions. Parameters - ---------- cell_identifier : str Cell identifier. Call `self.print_info()` for full list. @@ -1792,7 +1815,6 @@ def visualize_cell_level_traces(self, cell_identifier, cutoff=True, table=True, Matplotli subplots axes Returns - ------- matplotlib axes @@ -1901,7 +1923,6 @@ def visualize_module_configurations(self, module_identifier, title=None, n_plots """Visualize failure locations on a module. Parameters - ---------- module_identifier : int Module identifier. Call `self.print_info()` for full list. @@ -1911,7 +1932,6 @@ def visualize_module_configurations(self, module_identifier, title=None, n_plots Number of plots to render in a single figure. Returns - ------- matplotlib axes diff --git a/pvops/text/classify.py b/pvops/text/classify.py index 085d1c7..6b2f366 100644 --- a/pvops/text/classify.py +++ b/pvops/text/classify.py @@ -27,7 +27,6 @@ def classification_deployer( To see an example of this method's application, see ``examples//text_class_example.py`` Parameters - ---------- X : list of str List of documents (str). The documents will be passed through the pipeline_steps, where they will be transformed into vectors. @@ -114,7 +113,6 @@ def classification_deployer( The frequency of the messages increase with the verbosity level. Returns - ------- DataFrame Summarization of results from all of the classifiers diff --git a/pvops/text/defaults.py b/pvops/text/defaults.py index 6bbbd88..45733cd 100644 --- a/pvops/text/defaults.py +++ b/pvops/text/defaults.py @@ -34,7 +34,6 @@ def supervised_classifier_defs(settings_flag): non-specific to the natural language processing application Parameters - ---------- settings_flag : str Either 'light', 'normal' or 'detailed'; a setting which @@ -44,7 +43,6 @@ def supervised_classifier_defs(settings_flag): setting but for days on 'detailed'. Returns - ------- search_space: dict Hyperparameter instances for each clusterer @@ -261,7 +259,6 @@ def unsupervised_classifier_defs(setting_flag, n_clusters): the natural language processing application Parameters - ---------- setting_flag : str Either 'normal' or 'detailed'; a setting which determines @@ -274,7 +271,6 @@ def unsupervised_classifier_defs(setting_flag, n_clusters): set to the number of unique categories within data. Returns - ------- search_space: dict Hyperparameter instances for each clusterer diff --git a/pvops/text/nlp_utils.py b/pvops/text/nlp_utils.py index 3d56584..7cfc34d 100644 --- a/pvops/text/nlp_utils.py +++ b/pvops/text/nlp_utils.py @@ -94,20 +94,19 @@ class DataDensifier(BaseEstimator): This process is usually incorporated in this library when doing unsupervised machine learning. This class is built specifically to work inside a sklearn pipeline. Therefore, it uses the default ``transform``, ``fit``, ``fit_transform`` method structure. + """ def transform(self, X, y=None): """Return a dense array if the input array is sparse. Parameters - ---------- X : array Input data of numerical values. For this package, these values could represent embedded representations of documents. Returns - ------- dense array """ @@ -120,14 +119,12 @@ def fit(self, X, y=None): """Placeholder method to conform to the sklearn class structure. Parameters - ---------- X : array Input data y : Not utilized. Returns - ------- DataDensifier object """ @@ -138,14 +135,12 @@ def fit_transform(self, X, y=None): which returns a dense array when the input is sparse. Parameters - ---------- X : array Input data y : Not utilized. Returns - ------- dense array """ @@ -156,8 +151,7 @@ def create_stopwords(lst_langs=["english"], lst_add_words=[], lst_keep_words=[]) """Concatenate a list of stopwords using both words grabbed from nltk and user-specified words. Parameters - - --------- + ---------- lst_langs: list List of strings designating the languages for a nltk.corpus.stopwords.words query. If empty list is passed, no stopwords will be queried from nltk. lst_add_words: list @@ -166,7 +160,6 @@ def create_stopwords(lst_langs=["english"], lst_add_words=[], lst_keep_words=[]) List of words(e.g., "before" or "until") to remove from stopwords list. This is usually used to modify default stop words that might be of interest to PV. Returns - ------- List List of alphabetized stopwords @@ -189,16 +182,14 @@ def summarize_text_data(om_df, colname): in total. Parameters - - --------- + ---------- om_df : DataFrame A pandas dataframe containing O&M data, which contains at least the colname of interest colname : str Column name of column with text Returns - - ------ + ------- None """ df = om_df.copy() diff --git a/pvops/text/preprocess.py b/pvops/text/preprocess.py index 64dce44..3b1a1ab 100644 --- a/pvops/text/preprocess.py +++ b/pvops/text/preprocess.py @@ -14,7 +14,6 @@ def preprocessor( """Preprocessing function which processes the raw text data into processed text data and extracts dates Parameters - ---------- om_df : DataFrame A pandas dataframe containing O&M data, which contains at least the columns within col_dict. @@ -33,7 +32,6 @@ def preprocessor( If False, return with preprocessed text and extracted dates Returns - ------- DataFrame Contains the original columns as well as the processed data, located in columns defined by the inputs @@ -146,7 +144,6 @@ def get_dates( extract_dates_only = True. Parameters - ---------- document : str String representation of a document @@ -168,7 +165,6 @@ def get_dates( Recommendation: set True if you frequently publish documents and your dataframe is ordered chronologically Returns - ------- list List of dates found in text @@ -286,7 +282,6 @@ def text_remove_nondate_nums(document, PRINT_INFO=False): as a date by the date extractor. Parameters - ---------- document : str String representation of a document @@ -295,7 +290,6 @@ def text_remove_nondate_nums(document, PRINT_INFO=False): progress Returns - ------- string string of processed document @@ -421,7 +415,6 @@ def text_remove_numbers_stopwords(document, lst_stopwords): """Conduct final processing steps after date extraction Parameters - ---------- document : str String representation of a document @@ -429,7 +422,6 @@ def text_remove_numbers_stopwords(document, lst_stopwords): List of stop words which will be filtered in final preprocessing step Returns - ------- string string of processed document diff --git a/pvops/text/utils.py b/pvops/text/utils.py index 2b7da4c..01bb39f 100644 --- a/pvops/text/utils.py +++ b/pvops/text/utils.py @@ -8,15 +8,15 @@ def remap_attributes(om_df, remapping_df, remapping_col_dict, within remapping_df. Parameters - - --------- + ---------- om_df : DataFrame A pandas dataframe containing O&M data, which needs to be remapped. remapping_df : dataframe Holds columns that define the remappings - remapping_col_dict: dict of {str : str} + remapping_col_dict : dict of {str : str} A dictionary that contains the column names that describes how remapping is going to be done + - **attribute_col** (*string*), should be assigned to associated column name in om_df which will be remapped - **remapping_col_from** (*string*), should be assigned @@ -34,7 +34,6 @@ def remap_attributes(om_df, remapping_df, remapping_col_dict, If True, print information about remapping. Returns - ------- DataFrame dataframe with remapped columns populated diff --git a/pvops/text/visualize.py b/pvops/text/visualize.py index 24d31ce..0b8195c 100644 --- a/pvops/text/visualize.py +++ b/pvops/text/visualize.py @@ -31,7 +31,6 @@ def visualize_attribute_connectivity( ``ATTRIBUTE1_COL`` and ``ATTRIBUTE2_COL`` Parameters - ---------- om_df : DataFrame A pandas dataframe containing O&M data, which contains columns specified in om_col_dict @@ -49,19 +48,19 @@ def visualize_attribute_connectivity( graph_aargs Optional, arguments passed to networkx graph drawer. Suggested attributes to pass: - with_labels=True - font_weight='bold' - node_size=19000 - font_size=35 - node_color='darkred' - font_color='red' + + - with_labels=True + - font_weight='bold' + - node_size=19000 + - font_size=35 + - node_color='darkred' + - font_color='red' Returns - ------- Matplotlib figure instance, networkx EdgeView object - i.e. [('A', 'X'), ('X', 'B'), ('C', 'Y'), ('C', 'Z')] + i.e. [('A', 'X'), ('X', 'B'), ('C', 'Y'), ('C', 'Z')] """ df = om_df.copy() ATTRIBUTE1_COL = om_col_dict["attribute1_col"] @@ -110,14 +109,15 @@ def visualize_attribute_timeseries( for each label within the label column Parameters - ---------- om_df : DataFrame A pandas dataframe of O&M data, which contains columns in om_col_dict om_col_dict: dict of {str : str} A dictionary that contains the column names relevant for the get_dates fn + - **label** (*string*), should be assigned to associated column name for the label/attribute of interest in om_df - **date** (*string*), should be assigned to associated column name for the dates relating to the documents in om_df + date_structure : str Controls the resolution of the bar chart's timeseries Default: "%Y-%m". Can change to include finer resolutions (e.g., by including day, "%Y-%m-%d") @@ -128,7 +128,6 @@ def visualize_attribute_timeseries( Optional, color map name in matplotlib Returns - ------- Matplotlib figure instance """ @@ -197,7 +196,6 @@ def visualize_cluster_entropy( """Visualize entropy of embedding space parition. Currently only supports doc2vec embedding. Parameters - ---------- doc2vec : Doc2Vec model instance Instance of gensim.models.doc2vec.Doc2Vec @@ -222,7 +220,6 @@ def eval_kmeans(X,k): Optional, color map Returns - ------- Matplotlib figure instance """ @@ -283,7 +280,6 @@ def visualize_document_clusters(cluster_tokens, min_frequency=20): the results of an unsupervised partitioning of documents. Parameters - ---------- cluster_tokens : list List of tokenized documents @@ -291,7 +287,6 @@ def visualize_document_clusters(cluster_tokens, min_frequency=20): Minimum number of occurrences that a word must have in a cluster for it to be visualized Returns - ------- Matplotlib figure instance """ @@ -362,7 +357,6 @@ def visualize_word_frequency_plot( """Visualize the frequency distribution of words within a set of documents Parameters - ---------- tokenized_words : list List of tokenized words @@ -370,11 +364,10 @@ def visualize_word_frequency_plot( Optional, title of plot font_size : int Optional, font size - **aargs : + aargs : Optional, other parameters passed to nltk.FreqDist.plot() Returns - ------- Matplotlib figure instance """ diff --git a/pvops/text2time/preprocess.py b/pvops/text2time/preprocess.py index 322e57a..f628265 100644 --- a/pvops/text2time/preprocess.py +++ b/pvops/text2time/preprocess.py @@ -12,7 +12,6 @@ def data_site_na(pom_df, df_col_dict): or O&M data. Parameters - ---------- pom_df: DataFrame A data frame corresponding to either the production or O&M @@ -28,7 +27,6 @@ def data_site_na(pom_df, df_col_dict): user's site-ID Returns - ------- pom_df: DataFrame An updated version of the input data frame, where rows with @@ -56,9 +54,7 @@ def om_date_convert(om_df, om_col_dict, toffset=0.0): Converts dates from string format to date time object in O&M dataframe. - Parameters - ---------- om_df: DataFrame A data frame corresponding to O&M data. @@ -78,7 +74,6 @@ def om_date_convert(om_df, om_col_dict, toffset=0.0): don't align as they should Returns - ------- DataFrame An updated version of the input dataframe, but with @@ -111,9 +106,7 @@ def om_datelogic_check(om_df, om_col_dict, om_dflag="swap"): either dropped or the dates are swapped, depending on the user's preference. - Parameters - ---------- om_df: DataFrame A data frame corresponding to O&M data. @@ -134,7 +127,6 @@ def om_datelogic_check(om_df, om_col_dict, om_dflag="swap"): that row. Returns - ------- om_df: DataFrame An updated version of the input dataframe, but with O&M data @@ -177,7 +169,6 @@ def om_nadate_process(om_df, om_col_dict, om_dendflag="drop"): preference. Parameters - ---------- om_df: DataFrame A data frame corresponding to O&M data. @@ -199,7 +190,6 @@ def om_nadate_process(om_df, om_col_dict, om_dendflag="drop"): rows untouched. Returns - ------- om_df: DataFrame An updated version of the input dataframe, but with no @@ -247,7 +237,6 @@ def prod_date_convert(prod_df, prod_col_dict, toffset=0.0): Parameters - ---------- prod_df: DataFrame A data frame corresponding to production data. @@ -265,7 +254,6 @@ def prod_date_convert(prod_df, prod_col_dict, toffset=0.0): and O&M data don't align as they should. Returns - ------- DataFrame An updated version of the input dataframe, but with @@ -295,7 +283,6 @@ def prod_nadate_process(prod_df, prod_col_dict, pnadrop=False): Parameters - ---------- prod_df: DataFrame A data frame corresponding to production data. @@ -315,7 +302,6 @@ def prod_nadate_process(prod_df, prod_col_dict, pnadrop=False): will output the same input data frame with no modifications. Returns - ------- prod_df: DataFrame The output data frame. If pflag = 'drop', an updated version diff --git a/pvops/text2time/utils.py b/pvops/text2time/utils.py index b1dd686..652b954 100644 --- a/pvops/text2time/utils.py +++ b/pvops/text2time/utils.py @@ -11,9 +11,7 @@ def interpolate_data(prod_df, om_df, prod_col_dict, om_col_dict, om_cols_to_tran """ Provides general overview of the overlapping production and O&M data. - Parameters - ---------- prod_df: DataFrame A data frame corresponding to the production @@ -50,7 +48,6 @@ def interpolate_data(prod_df, om_df, prod_col_dict, om_col_dict, om_cols_to_tran List of om_col_dict keys to translate into prod_df Returns - ------- prod_output: DataFrame A data frame that includes statistics for the production data per site in the data frame. @@ -58,7 +55,7 @@ def interpolate_data(prod_df, om_df, prod_col_dict, om_col_dict, om_cols_to_tran - **Actual # Time Stamps** (*datetime.datetime*), total number of overlapping production time-stamps - - **Max # Time Stamps** (*datetime.datetime), maximum number of production time-stamps, + - **Max # Time Stamps** (*datetime.datetime*), maximum number of production time-stamps, including NANs om_out: DataFrame @@ -67,7 +64,7 @@ def interpolate_data(prod_df, om_df, prod_col_dict, om_col_dict, om_cols_to_tran - **Earliest Event Start** (*datetime.datetime*), column that specifies timestamp of earliest start of all events per site. - - **Latest Event End** (*datetime.datetime), column that specifies timestamp for + - **Latest Event End** (*datetime.datetime*), column that specifies timestamp for latest conclusion of all events per site. - **Total Events** (*int*), column that specifies total number of events per site @@ -114,9 +111,7 @@ def summarize_overlaps(prod_df, om_df, prod_col_dict, om_col_dict): """ Provides general overview of the overlapping production and O&M data. - Parameters - ---------- prod_df: DataFrame A data frame corresponding to the production @@ -149,7 +144,6 @@ def summarize_overlaps(prod_df, om_df, prod_col_dict, om_col_dict): column name in om_df Returns - ------- prod_output: DataFrame A data frame that includes statistics for the production data per site in the data frame. @@ -166,7 +160,7 @@ def summarize_overlaps(prod_df, om_df, prod_col_dict, om_col_dict): - **Earliest Event Start** (*datetime.datetime*), column that specifies timestamp of earliest start of all events per site. - - **Latest Event End** (*datetime.datetime), column that specifies timestamp for + - **Latest Event End** (datetime.datetime*), column that specifies timestamp for latest conclusion of all events per site. - **Total Events** (*int*), column that specifies total number of events per site @@ -214,9 +208,7 @@ def om_summary_stats(om_df, meta_df, om_col_dict, meta_col_dict): Latter is calculated by using corresponding site commissioning date within the metadata dataframe. - Parameters - ---------- om_df: DataFrame A data frame corresponding to the O&M data after having been pre-processed @@ -249,17 +241,13 @@ def om_summary_stats(om_df, meta_df, om_col_dict, meta_col_dict): - **COD** (*string*), should be asigned to column name corresponding to associated commisioning dates for all sites captured in om_df - Returns - ------- om_df: DataFrame An updated version of the input dataframe, but with three new columns added for visualizations: event duration, month of event occurrence, and age of system at time of event occurrence. See om_col_dict for mapping of expected variables to user-defined variables. - - """ # assigning dictionary items to local variables for cleaner code @@ -326,9 +314,7 @@ def overlapping_data(prod_df, om_df, prod_col_dict, om_col_dict): for any given site. The outputs are a truncated version of the input data frames, that contains only data with overlapping dates between the two DFs. - Parameters - ---------- prod_df: DataFrame A data frame corresponding to the production @@ -364,7 +350,6 @@ def overlapping_data(prod_df, om_df, prod_col_dict, om_col_dict): column name in om_df Returns - ------- prod_df: DataFrame Production data frame similar to the input data frame, but truncated @@ -465,9 +450,7 @@ def prod_anomalies(prod_df, prod_col_dict, minval=1.0, repval=np.nan, ffill=True is set to 'True' in the input argument, a forward-fill method is used to replace the unexpected drops. - Parameters - ---------- prod_df: DataFrame A data frame corresponding to production data were production is logged on @@ -485,15 +468,14 @@ def prod_anomalies(prod_df, prod_col_dict, minval=1.0, repval=np.nan, ffill=True values below minval will be addressed by this function. Default minval is 1.0 repval: float - Value that should replace the anomalies in a cumulative production data format. - Default value is numpy's NAN. + Value that should replace the anomalies in a cumulative production data format. + Default value is numpy's NAN. ffill: boolean Boolean flag that determines whether NANs in production column in prod_df should be filled using a forward-fill method. Returns - ------- prod_df: DataFrame An updated version of the input dataframe, but with zero production values @@ -523,9 +505,7 @@ def prod_quant(prod_df, prod_col_dict, comp_type, ecumu=True): """ Compares performance of observed production data in relation to an expected baseline - Parameters - ---------- prod_df: DataFrame A data frame corresponding to the production data after having been @@ -554,13 +534,12 @@ def prod_quant(prod_df, prod_col_dict, comp_type, ecumu=True): (observed/baseline) ecumu: bool - Boolean flag that specifies whether the production (energy output) + Boolean flag that specifies whether the production (energy output) data is input as cumulative information ("True") or on a per time-step basis ("False"). - Returns - - ------- - DataFrame + Returns + ------- + DataFrame A data frame similar to the input, with an added column for the performance comparisons """ diff --git a/pvops/text2time/visualize.py b/pvops/text2time/visualize.py index 855390c..16855d8 100644 --- a/pvops/text2time/visualize.py +++ b/pvops/text2time/visualize.py @@ -14,7 +14,6 @@ def visualize_counts(om_df, om_col_dict, count_var, fig_sets): Parameters - ---------- om_df: DataFrame A data frame corresponding to the O&M data after having been pre-processed @@ -39,7 +38,6 @@ def visualize_counts(om_df, om_col_dict, count_var, fig_sets): - **fontsize** (*int*), which is the desired font-size for the figure Returns - ------- None @@ -98,9 +96,7 @@ def visualize_categorical_scatter(om_df, om_col_dict, cat_varx, cat_vary, fig_se Produces a seaborn categorical scatter plot to show the relationship between an O&M numerical column and a categorical column using sns.catplot() - Parameters - ---------- om_df: DataFrame A data frame corresponding to the O&M data after having been pre-processed @@ -129,7 +125,6 @@ def visualize_categorical_scatter(om_df, om_col_dict, cat_varx, cat_vary, fig_se - **fontsize** (*int*), which is the desired font-size for the figure Returns - ------- None @@ -196,9 +191,7 @@ def visualize_om_prod_overlap( Creates Plotly figures of performance data overlaid with coinciding O&M tickets. A separate figure for each site in the production data frame (prod_df) is generated. - Parameters - ---------- prod_df: DataFrame A data frame corresponding to the performance data after (ideally) having been @@ -265,7 +258,6 @@ def visualize_om_prod_overlap( 'False' will not. Returns - ------- list List of Plotly figure handles generated by function for each site within prod_df. diff --git a/pvops/timeseries/models/AIT.py b/pvops/timeseries/models/AIT.py index 0cf2810..e2f21b4 100644 --- a/pvops/timeseries/models/AIT.py +++ b/pvops/timeseries/models/AIT.py @@ -4,6 +4,9 @@ class Predictor: + """ + Predictor class + """ def __init__(self): super(Predictor, self).__init__() @@ -19,10 +22,12 @@ def apply_additive_polynomial_model(self, model_terms, Xs): model_terms : list of tuples Contain model coefficients and powers. For example, - [(0.29359785963294494, [1, 0]), - (0.754806343190528, [0, 1]), - (0.396833207207238, [1, 1]), - (-0.0588375219110795, [0, 0])] + .. code-block:: python + + [(0.29359785963294494, [1, 0]), + (0.754806343190528, [0, 1]), + (0.396833207207238, [1, 1]), + (-0.0588375219110795, [0, 0])] prod_col_dict : dict Dictionary mapping nicknamed parameters to @@ -178,7 +183,6 @@ def AIT_calc(prod_df, prod_col_dict): based on trained regression model from field data Parameters - ---------- prod_df: DataFrame A data frame corresponding to the production data @@ -200,20 +204,20 @@ def AIT_calc(prod_df, prod_col_dict): in prod_df Example - ------- - production_col_dict = {'irradiance': 'irrad_poa_Wm2', - 'ambient_temperature': 'temp_amb_C', - 'dcsize': 'capacity_DC_kW', - 'energyprod': 'energy_generated_kWh', - 'baseline': 'predicted' - } - data = AIT_calc(data, production_col_dict) + .. code-block:: python + production_col_dict = {'irradiance': 'irrad_poa_Wm2', + 'ambient_temperature': 'temp_amb_C', + 'dcsize': 'capacity_DC_kW', + 'energyprod': 'energy_generated_kWh', + 'baseline': 'predicted' + } + data = AIT_calc(data, production_col_dict) - Returns + Returns ------- DataFrame A data frame for production data with a new column, diff --git a/pvops/timeseries/models/linear.py b/pvops/timeseries/models/linear.py index ebada96..47ea1df 100644 --- a/pvops/timeseries/models/linear.py +++ b/pvops/timeseries/models/linear.py @@ -434,7 +434,6 @@ def modeller(prod_col_dict, and `train_df` Parameters - ---------- prod_col_dict: dict of {str : str} A dictionary that contains the column names relevant @@ -533,7 +532,6 @@ def modeller(prod_col_dict, execution. Returns - ------- `model`, which is a `pvops.timeseries.models.linear.Model` object, has a useful attribute `estimators`, which allows access to model performance and data splitting information. diff --git a/pvops/timeseries/preprocess.py b/pvops/timeseries/preprocess.py index 71b8a55..03a6bdf 100644 --- a/pvops/timeseries/preprocess.py +++ b/pvops/timeseries/preprocess.py @@ -12,7 +12,6 @@ def establish_solar_loc(prod_df, prod_col_dict, meta_df, meta_col_dict): sites simultaneously. Parameters - ---------- prod_df: DataFrame A data frame corresponding to production data containing a datetime index. @@ -35,7 +34,6 @@ def establish_solar_loc(prod_df, prod_col_dict, meta_df, meta_col_dict): - **latitude** (*string*), should be assigned to site's latitude Returns - ------- Original dataframe (copied) with new timeseries solar position data using the same column name definitions provided in pvLib. @@ -73,7 +71,6 @@ def normalize_production_by_capacity(prod_df, sites simultaneously. Parameters - ---------- prod_df: DataFrame A data frame corresponding to production data. @@ -86,6 +83,7 @@ def normalize_production_by_capacity(prod_df, - **siteid** (*string*), should be assigned to site-ID column name in prod_df - **capacity_normalized_power** (*string*), should be assigned to a column name where the normalized output signal will be stored + meta_df: DataFrame A data frame corresponding to site metadata. At the least, the columns in meta_col_dict be present. @@ -128,7 +126,6 @@ def prod_irradiance_filter(prod_df, prod_col_dict, meta_df, meta_col_dict, THIS METHOD IS CURRENTLY IN DEVELOPMENT. Parameters - ---------- prod_df: DataFrame A data frame corresponding to production data. @@ -138,7 +135,7 @@ def prod_irradiance_filter(prod_df, prod_col_dict, meta_df, meta_col_dict, which consist of at least: - **timestamp** (*string*), should be assigned to associated time-stamp - column name in prod_df + column name in prod_df - **siteid** (*string*), should be assigned to site-ID column name in prod_df - **irradiance** (*string*), should be assigned to associated irradiance column name in prod_df - **clearsky_irr** (*string*), should be assigned to clearsky irradiance column name in prod_df @@ -163,7 +160,6 @@ def prod_irradiance_filter(prod_df, prod_col_dict, meta_df, meta_col_dict, A pvanalytics parameter of maximum ratio of measured to clearsky (clearsky index). Returns - ------- prod_df: DataFrame A dataframe with new **clearsky_irr** column. If drop=True, a filtered prod_df according to clearsky. @@ -260,7 +256,6 @@ def prod_inverter_clipping_filter(prod_df, prod_col_dict, meta_df, meta_col_dict """Filter rows of production data frame according to performance and data quality Parameters - ---------- prod_df: DataFrame A data frame corresponding to production data. @@ -270,7 +265,7 @@ def prod_inverter_clipping_filter(prod_df, prod_col_dict, meta_df, meta_col_dict which consist of at least: - **timestamp** (*string*), should be assigned to associated time-stamp - column name in prod_df + column name in prod_df - **siteid** (*string*), should be assigned to site-ID column name in prod_df - **powerprod** (*string*), should be assigned to associated power production column name in prod_df @@ -293,7 +288,6 @@ def prod_inverter_clipping_filter(prod_df, prod_col_dict, meta_df, meta_col_dict Extra parameters passed to the relevant pvanalytics model. If none passed, defaults are used. Returns - ------- prod_df: DataFrame If drop=True, a filtered dataframe with clipping periods removed is returned. From a9da704ac07b05c44bb66397e9caf02fb871d6c0 Mon Sep 17 00:00:00 2001 From: klbonne Date: Tue, 25 Oct 2022 14:55:27 -0400 Subject: [PATCH 02/27] Added comments to explain extensions --- docs/conf.py | 39 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 8cd77b5..eda9311 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -13,31 +13,15 @@ import os import sys -import mock -MODULES = ['numpy', 'nltk', 'sklearn.pipeline', 'sklearn.model_selection', - 'scipy.sparse', 'pandas', 'scipy', 'sklearn.base', - 'gensim.models.doc2vec', 'nltk.tokenize', 'datefinder', - 'text_remove_nondate_nums', 'text_remove_numbers_stopwords', - 'get_dates', 'gensim.models', 'sklearn.svm', 'sklearn.tree', - 'sklearn.neural_network', 'sklearn.linear_model', - 'sklearn.ensemble', "sklearn.cluster", "networkx", "matplotlib", - "matplotlib.pyplot", "gensim.models.doc2vec", - "sklearn.feature_extraction.text", "nltk.tokenize", - "plotly.graph_objects", "scipy.signal", 'matplotlib.colors', - 'seaborn', 'matplotlib.ticker', 'scipy.signal.find_peaks', - 'pvlib', 'pvanalytics', 'timezonefinder', 'sklearn', "pyDOE", - "sklearn.metrics", "scipy.interpolate", "keras", "keras.layers", - "sklearn.utils", "sklearn.preprocessing", "keras.models", - "keras.utils"] - -for module in MODULES: - sys.modules[module] = mock.Mock() sys.path.insert(0, os.path.abspath("../pvops")) sys.path.insert(0, os.path.abspath("../pvops/text2time")) sys.path.insert(0, os.path.abspath("../pvops/text")) sys.path.insert(0, os.path.abspath("../pvops/timeseries")) +sys.path.insert(0, os.path.abspath("../pvops/timeseries/models")) sys.path.insert(0, os.path.abspath("../pvops/iv")) +sys.path.insert(0, os.path.abspath("../pvops/iv/models")) + # -- Project information ----------------------------------------------------- @@ -55,10 +39,19 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = ["sphinx.ext.autodoc", - "nbsphinx", - "nbsphinx_link", - "sphinx_copybutton"] +extensions = [ + "sphinx.ext.autodoc", #pull in documentation from docstrings in a semi-automatic way. + "nbsphinx", # nbsphinx is a Sphinx extension that provides a source parser for *.ipynb files + "nbsphinx_link", # A sphinx extension for including notebook files from outside the sphinx source root. + "sphinx_copybutton", # adds copy button to code blocks + "sphinx.ext.coverage", # `make coverage` summarizes what has docstrings + 'sphinx.ext.doctest', # allows for testing of code snippets + 'sphinx.ext.viewcode', # add links to highlighted source code + 'sphinx.ext.napoleon' # add parsing for google/numpy style docs + ] + + +coverage_show_missing_items = True # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] From f80797b8ee745b27e446b370bd6ee1d9aa4e9e2e Mon Sep 17 00:00:00 2001 From: klbonne Date: Tue, 25 Oct 2022 16:33:09 -0400 Subject: [PATCH 03/27] Make internal imports absolute with respect to root of source files. This removes the need to do things like # sys.path.append('..') # IV_path = os.path.join('..', 'pvops', 'iv') # sys.path.append(IV_path) --- pvops/iv/extractor.py | 6 +++--- pvops/iv/preprocess.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pvops/iv/extractor.py b/pvops/iv/extractor.py index 1c9b50f..f3bd79e 100644 --- a/pvops/iv/extractor.py +++ b/pvops/iv/extractor.py @@ -4,12 +4,12 @@ import numpy as np import matplotlib.pyplot as plt -from physics_utils import calculate_IVparams, smooth_curve import scipy import sklearn -from simulator import Simulator +from pvops.iv.simulator import Simulator import time -from physics_utils import iv_cutoff, T_to_tcell +from pvops.iv.physics_utils import iv_cutoff, T_to_tcell, \ + calculate_IVparams, smooth_curve class BruteForceExtractor(): diff --git a/pvops/iv/preprocess.py b/pvops/iv/preprocess.py index ba66959..d2fb27a 100644 --- a/pvops/iv/preprocess.py +++ b/pvops/iv/preprocess.py @@ -1,6 +1,6 @@ import numpy as np import pandas as pd -from physics_utils import gt_correction +from pvops.iv.physics_utils import gt_correction def preprocess(input_df, resmpl_resolution, iv_col_dict, resmpl_cutoff=0.03, From 75ccf24f37e8a6f3210ef109b7fbe6feea0ff71d Mon Sep 17 00:00:00 2001 From: klbonne Date: Thu, 27 Oct 2022 14:27:51 -0400 Subject: [PATCH 04/27] added comment in conf.py and fixed more formatting in docstrings --- docs/conf.py | 2 +- pvops/iv/models/nn.py | 11 ----------- pvops/text2time/visualize.py | 16 +++++++++------- 3 files changed, 10 insertions(+), 19 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index eda9311..1386d07 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -40,7 +40,7 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - "sphinx.ext.autodoc", #pull in documentation from docstrings in a semi-automatic way. + "sphinx.ext.autodoc", # pull in documentation from docstrings in a semi-automatic way. "nbsphinx", # nbsphinx is a Sphinx extension that provides a source parser for *.ipynb files "nbsphinx_link", # A sphinx extension for including notebook files from outside the sphinx source root. "sphinx_copybutton", # adds copy button to code blocks diff --git a/pvops/iv/models/nn.py b/pvops/iv/models/nn.py index 26bd4f7..03fb1cc 100644 --- a/pvops/iv/models/nn.py +++ b/pvops/iv/models/nn.py @@ -22,7 +22,6 @@ def get_diff_array(sample_V, sample_I, pristine_V, pristine_I, debug=False): """Generate IV current differential between sample and pristine. Parameters - ---------- sample_V : array Voltage array for a sample's IV curve @@ -34,7 +33,6 @@ def get_diff_array(sample_V, sample_I, pristine_V, pristine_I, debug=False): Current array for a pristine IV curve Returns - ------- all_V : array Combined voltage array @@ -82,7 +80,6 @@ def feature_generation(bigdf, iv_col_dict, slope of the cuve. Parameters - ---------- bigdf : dataframe Dataframe holding columns from `iv_col_dict`, except for the @@ -105,7 +102,6 @@ def feature_generation(bigdf, iv_col_dict, lowest temperature definitions is chosen. Returns - ------- all_V : array Combined voltage array @@ -154,7 +150,6 @@ def balance_df(df, iv_col_dict, balance_tactic='truncate'): unique `ycol` definition. Parameters - ---------- bigdf : dataframe Dataframe containing the `ycol` column. @@ -170,7 +165,6 @@ def balance_df(df, iv_col_dict, balance_tactic='truncate'): towards a central target. Returns - ------- dataframe, balanced according to the `balance_tactic`. """ @@ -275,7 +269,6 @@ def plot_profiles(df, colx, coly, iv_col_dict, cmap_name='brg'): in definitions. Parameters - ---------- df : dataframe Dataframe containing the `colx`, `coly`, and iv_col_dict['mode'] column @@ -292,7 +285,6 @@ def plot_profiles(df, colx, coly, iv_col_dict, cmap_name='brg'): Matplotlib colormap. Returns - ------- matplotlib figure """ @@ -394,7 +386,6 @@ def classify_curves(df, iv_col_dict, nn_config): """Build and evaluate an IV trace failure `mode` classifier. Parameters - ---------- df : dataframe Data with columns in `iv_col_dict` @@ -464,7 +455,6 @@ def structure(self, train, test): """Structure the data according to the chosen network model's input structure. Parameters - ---------- train : dataframe Train data containing IV data and associated features @@ -605,7 +595,6 @@ def predict(self, batch_size=8): """Predict using the trained model. Parameters - ---------- batch_size : int Number of samples per gradient update diff --git a/pvops/text2time/visualize.py b/pvops/text2time/visualize.py index 16855d8..82cf403 100644 --- a/pvops/text2time/visualize.py +++ b/pvops/text2time/visualize.py @@ -12,7 +12,6 @@ def visualize_counts(om_df, om_col_dict, count_var, fig_sets): """ Produces a seaborn countplot of an O&M categorical column using sns.countplot() - Parameters ---------- om_df: DataFrame @@ -93,15 +92,18 @@ def visualize_counts(om_df, om_col_dict, count_var, fig_sets): def visualize_categorical_scatter(om_df, om_col_dict, cat_varx, cat_vary, fig_sets): """ - Produces a seaborn categorical scatter plot to show the relationship between - an O&M numerical column and a categorical column using sns.catplot() + Produces a seaborn categorical scatter plot to show + the relationship between an O&M numerical column and + a categorical column using sns.catplot() Parameters ---------- om_df: DataFrame - A data frame corresponding to the O&M data after having been pre-processed - to address NANs and date consistency, and after applying the ``om_summary_stats`` function. - This data frame needs at least the columns specified in om_col_dict. + A data frame corresponding to the O&M data after having been + pre-processed to address NANs and date consistency, and after + applying the ``om_summary_stats`` function. + This data frame needs at least the columns specified + in om_col_dict. om_col_dict: dict of {str : str} A dictionary that contains the column names relevant for the O&M data @@ -220,7 +222,7 @@ def visualize_om_prod_overlap( - **siteid** (*string*), should be assigned to column name for user's site-ID - **datestart** (*string*), should be assigned to column name for user's - O&M event start-date + O&M event start-date - **dateend** (*string*), should be assigned to column name for user's O&M event end-date - **workID** (*string*), should be assigned to column name for user's O&M unique event ID - **worktype** (*string*), should be assigned to column name for user's From d7fd8d575b24fee7a9136a1e8076b18fc8733dbd Mon Sep 17 00:00:00 2001 From: klbonne Date: Fri, 28 Oct 2022 11:19:14 -0400 Subject: [PATCH 05/27] Adding dependencies to setup.py. Changed tensorflow and keras to >= rather than ==. Note that the protobuf version is specified as per the issue discussed here: https://stackoverflow.com/questions/72441758/typeerror-descriptors-cannot-not-be-created-directly --- setup.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index d2ef915..a2eba30 100644 --- a/setup.py +++ b/setup.py @@ -46,7 +46,11 @@ 'networkx', 'pvlib', 'pvanalytics', - 'timezonefinder' + 'timezonefinder', + 'pyDOE', + 'keras>=2.3.0', + 'tensorflow>=2.2.0', + 'protobuf==3.20.*' ] DOCS_REQUIRE = [ From 35d0bd1f22762359db9ea34f2a30145463ecb780 Mon Sep 17 00:00:00 2001 From: klbonne Date: Fri, 28 Oct 2022 11:19:37 -0400 Subject: [PATCH 06/27] adding sphinx rtd theme to docs requirements --- docs/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 8b5c6d4..41097b2 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,5 @@ ipykernel nbsphinx nbsphinx-link -sphinx-copybutton \ No newline at end of file +sphinx-copybutton +sphinx_rtd_theme \ No newline at end of file From 1997f578f0c44339579628050e13993168fb4d7f Mon Sep 17 00:00:00 2001 From: klbonne Date: Fri, 28 Oct 2022 11:29:18 -0400 Subject: [PATCH 07/27] updating req.txts (are these necessary given setup.py?) --- requirements-min.txt | 5 +++-- requirements.txt | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/requirements-min.txt b/requirements-min.txt index 51ae38a..661ea83 100644 --- a/requirements-min.txt +++ b/requirements-min.txt @@ -13,5 +13,6 @@ pvlib pvanalytics timezonefinder pyDOE -keras==2.3.0 -tensorflow==2.2.0rc4 \ No newline at end of file +keras>=2.3.0 +tensorflow>=2.2.0rc4 +protobuf==3.20.* \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 51ae38a..661ea83 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,5 +13,6 @@ pvlib pvanalytics timezonefinder pyDOE -keras==2.3.0 -tensorflow==2.2.0rc4 \ No newline at end of file +keras>=2.3.0 +tensorflow>=2.2.0rc4 +protobuf==3.20.* \ No newline at end of file From 0edeedc166df053bf8360fc7d521b893aab209ee Mon Sep 17 00:00:00 2001 From: klbonne Date: Tue, 1 Nov 2022 11:22:52 -0400 Subject: [PATCH 08/27] Added changes to conf.py and added new css theme to improve doc formatting. Minor changes in many files to fix formatting issues and make things consistent. Moved many pieces out of Simulator class docstring - plan to make a new page dedicated to these explanations/examples --- docs/_static/css/my_style.css | 5 ++ docs/conf.py | 8 ++ pvops/iv/simulator.py | 153 ++-------------------------------- pvops/text/classify.py | 32 +++---- pvops/text/defaults.py | 10 +-- pvops/text/nlp_utils.py | 11 +-- pvops/text/preprocess.py | 22 +++-- pvops/text/utils.py | 8 +- pvops/text/visualize.py | 21 +++-- 9 files changed, 75 insertions(+), 195 deletions(-) create mode 100644 docs/_static/css/my_style.css diff --git a/docs/_static/css/my_style.css b/docs/_static/css/my_style.css new file mode 100644 index 0000000..3f224ea --- /dev/null +++ b/docs/_static/css/my_style.css @@ -0,0 +1,5 @@ +@import url("theme.css"); + +.wy-nav-content { + max-width: 1000px !important; +} \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index 1386d07..afe19f0 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -52,6 +52,13 @@ coverage_show_missing_items = True +napoleon_numpy_docstring = True +napoleon_google_docstring = False +napoleon_use_rtype = False +numpydoc_show_class_members = True +numpydoc_show_inherited_class_members = False +numpydoc_class_members_toctree = False + # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] @@ -73,3 +80,4 @@ # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] +html_style = 'css/my_style.css' diff --git a/pvops/iv/simulator.py b/pvops/iv/simulator.py index 4801e17..389b899 100644 --- a/pvops/iv/simulator.py +++ b/pvops/iv/simulator.py @@ -100,113 +100,6 @@ class Simulator(): Dictionary of module-level parameters cell_parameters : dict Dictionary of cell-level parameters - - Methods - ------- - - add_preset_conditions(fault_name, fault_condition, save_name = None, kwargs) - Define a failure condition using a preset condition. See :py:meth:`add_preset_conditions` - - add_manual_conditions(modcell, condition_dict) - Define a failure by passing in modcell and cell condition definitons manually. See :py:meth:`add_manual_conditions` - - generate_many_samples(identifier, N, distributions = None, default_sample = None) - Generate `N` more definitions of the same failure cell condition by defining parameter `distributions` - - build_strings(config_dict) - Define a string as a list of modules which were defined in :py:meth:`add_preset_conditions` or :py:meth:`add_manual_conditions` - - simulate(sample_limit) - Simulate cell, substring, module, and string-level definitions - - print_info() - Display the number of definitions on the cell, module, and string levels - - visualize(lim = False) - Visualize the definitions and render parameter distributions - - Process - ------- - A `pristine` condition is created automatically - - - Specify failure conditions either by - - 1) add a preset configuration - - - ``add_preset_conditions('complete', fault_condition)`` - - ``add_preset_conditions('landscape', fault_condition, rows_aff = 2)`` - - ``add_preset_conditions('portrait', fault_condition, cols_aff = 2)`` - - ``add_preset_conditions('pole', fault_condition, width = 2, pos = None)`` - - ``add_preset_conditions('bird_droppings', fault_condition, n_droppings = None)`` - - 2) add a manual configuration - - - add_manual_conditions(modcell, condition_dict) - - 3) both - - - (Optional) Generate many definitions of a cell condition - ``generate_many_samples(identifier, N, distributions = None, default_sample = None)`` - - - (Optional) Define a string as a list of modules - ``build_strings(config_dict)`` - - - Simulate all levels of the designed PV system - ``simulate(sample_limit)`` - - - (Optional) Display information about the system - ``print_info()`` - ``visualize(lim = False)`` - - - Access simulations for your intended use - - 1) Export simulations as dataframe, which has columns: - ``df = sims_to_df(cutoff=False)`` - - 2) Access simulations manually - Inspect ``Simulator().multilevel_ivdata`` - See `Attributes` above for information on multilevel_ivdata. - - Example - ------- - .. code-block:: python - - sim = Simulator( - mod_specs = { - 'Jinko_Solar_Co___Ltd_JKM270PP_60': {'ncols': 6, - 'nsubstrings': 3 - } - }, - pristine_condition = { - 'identifier': 'pristine', - 'E': 1000, - 'Tc': 50, - 'Rsh_mult': 1, - 'Rs_mult': 1, - 'Io_mult': 1, - 'Il_mult': 1, - 'nnsvth_mult': 1, - }, - # Optional, Determined by IVProcessor() - replacement_5params = {'I_L_ref': 9.06157444e+00, - 'I_o_ref': 1.67727320e-10, # 0.3e-10, - 'R_s': 5.35574950e-03, - 'R_sh_ref': 3.03330425e+00, - 'a_ref': 2.54553421e-02} - ) - - condition = {'identifier':'light_shade','E':925} - sim.add_preset_conditions('complete', condition, save_name = f'Complete_lightshading') - - sim.build_strings({'Partial_lightshading': ['pristine']*6 + ['Complete_lightshading']*6}) - - sim.simulate() - - sim.print_info() - - # Look at a result! - Vsim = sim.multilevel_ivdata['string']['Partial_lightshading']['V'][0] - Isim = sim.multilevel_ivdata['string']['Partial_lightshading']['I'][0] """ def __init__(self, @@ -306,6 +199,7 @@ def add_preset_conditions(self, fault_name, fault_condition, save_name=None, **k Requires specification of cols_aff - e.g. add_preset_conditions('portrait', fault_condition, cols_aff = 2) + - 'pole': Place pole shadow over module Requires specification of width (integer), which designates the width of main shadow and \\ requires light_shading fault_condition specification which specifies less intense shading \\ @@ -315,6 +209,7 @@ def add_preset_conditions(self, fault_name, fault_condition, save_name=None, **k where left is number in the first column and right is number in last column if pos not specified, the positions are chosen randomly e.g. add_preset_conditions('pole', fault_condition, light_shading = light_fault_condition, width = 2, pos = (5, 56)) + - 'bird_droppings': Random positions are chosen for bird_dropping simulations - Optional specification is n_droppings. If not specified, chosen as random number between @@ -394,9 +289,9 @@ def _simulate_soiling_cases(self, case, vardict): def add_manual_conditions(self, modcell, condition_dict): """Create cell-level fault conditions manually - Parameters: - ----------- - modcell: dict + Parameters + ---------- + modcell : dict Key: name of the condition Value: list, @@ -449,44 +344,6 @@ def add_manual_conditions(self, modcell, condition_dict): Multiplier usually less than 1 to simulate a drop in IL NNSVTH_MULTIPLIER: numerical, Multiplier usually less than 1 to simulate a drop in NNSVTH, and therefore a_ref - - Example: - -------- - - .. code-block:: python - - modcells = {'unique_shading': [0,0,0,0,0,0,0,0,0,0, # Using 1D list - 1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1, - 0,0,0,0,0,0,0,0,0,0], - 'another_example': [[0,0,0,0,0,0,0,0,0,0, # Using 2D list (aka, multiple conditions as input) - 1,1,1,1,1,1,1,1,1,1, - 1,1,1,0,0,0,0,1,1,1, - 1,1,1,0,0,0,0,1,1,1, - 1,1,1,0,0,0,0,1,1,1, - 0,0,0,0,0,0,0,0,0,0], - [0,1,0,0,0,0,0,0,0,0, - 1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1, - 0,0,0,1,1,1,0,0,0,0, - 0,0,0,1,1,1,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0]] - } - # All numbers used in modcells must be defined here - # If defining a pristine condition, pass a blank dictionary - # If making edits to a pristine condition (e.g. dropping irradiance to 400) \\ - # you only need to a) specify the change made, and b) name an identifier string (for future reference) - # The pristine condition can be changed when first creating the class object - # To define a pristine, you can either pass an empty dictionary or pass {'identifier':'pristine'} - condition_dict = {0: {}, - 1: {'identifier': 'shading_cond1', - 'E': 400, - } - } - add_manual_conditions(modcell, condition_dict) - """ self._add_conditions(modcell, condition_dict) diff --git a/pvops/text/classify.py b/pvops/text/classify.py index 6b2f366..066600e 100644 --- a/pvops/text/classify.py +++ b/pvops/text/classify.py @@ -42,9 +42,9 @@ def classification_deployer( .. code-block:: python classifiers = { - 'LinearSVC': LinearSVC(), - 'AdaBoostClassifier': AdaBoostClassifier(), - 'RidgeClassifier': RidgeClassifier() + 'LinearSVC' : LinearSVC(), + 'AdaBoostClassifier' : AdaBoostClassifier(), + 'RidgeClassifier' : RidgeClassifier() } See ``supervised_classifier_defs.py`` or ``unsupervised_classifier_defs.py`` for this package's defaults. @@ -54,18 +54,18 @@ def classification_deployer( .. code-block:: python search_space = { - 'LinearSVC': { - 'clf__C': [1e-2,1e-1], + 'LinearSVC' : { + 'clf__C' : [1e-2,1e-1], 'clf__max_iter':[800,1000], }, - 'AdaBoostClassifier': { - 'clf__n_estimators': [50,100], + 'AdaBoostClassifier' : { + 'clf__n_estimators' : [50,100], 'clf__learning_rate':[1.,0.9,0.8], - 'clf__algorithm': ['SAMME.R'] + 'clf__algorithm' : ['SAMME.R'] }, - 'RidgeClassifier': { - 'clf__alpha': [0.,1e-3,1.], - 'clf__normalize': [False,True] + 'RidgeClassifier' : { + 'clf__alpha' : [0.,1e-3,1.], + 'clf__normalize' : [False,True] } } @@ -163,11 +163,11 @@ def classification_deployer( for param, score, time in zip(params, all_scores, r2): param["mean_fit_time"] = time d = { - "estimator": key, - "min_score": min(score), - "max_score": max(score), - "mean_score": np.mean(score), - "std_score": np.std(score), + "estimator" : key, + "min_score" : min(score), + "max_score" : max(score), + "mean_score" : np.mean(score), + "std_score" : np.std(score), } rows.append((pd.Series({**param, **d}))) diff --git a/pvops/text/defaults.py b/pvops/text/defaults.py index 45733cd..a606802 100644 --- a/pvops/text/defaults.py +++ b/pvops/text/defaults.py @@ -29,7 +29,7 @@ def supervised_classifier_defs(settings_flag): - """Esablish supervised classifier definitions + """Establish supervised classifier definitions which are non-specific to embeddor, and therefore, non-specific to the natural language processing application @@ -44,9 +44,9 @@ def supervised_classifier_defs(settings_flag): Returns ------- - search_space: dict + search_space : dict Hyperparameter instances for each clusterer - classifiers: dict + classifiers : dict Contains sklearn classifiers instances """ if settings_flag == "light": @@ -272,9 +272,9 @@ def unsupervised_classifier_defs(setting_flag, n_clusters): Returns ------- - search_space: dict + search_space : dict Hyperparameter instances for each clusterer - clusterers: dict + clusterers : dict Contains sklearn cluster instances """ diff --git a/pvops/text/nlp_utils.py b/pvops/text/nlp_utils.py index 7cfc34d..88121d1 100644 --- a/pvops/text/nlp_utils.py +++ b/pvops/text/nlp_utils.py @@ -152,16 +152,16 @@ def create_stopwords(lst_langs=["english"], lst_add_words=[], lst_keep_words=[]) Parameters ---------- - lst_langs: list + lst_langs : list List of strings designating the languages for a nltk.corpus.stopwords.words query. If empty list is passed, no stopwords will be queried from nltk. - lst_add_words: list + lst_add_words : list List of words(e.g., "road" or "street") to add to stopwords list. If these words are already included in the nltk query, a duplicate will not be added. - lst_keep_words: list + lst_keep_words : list List of words(e.g., "before" or "until") to remove from stopwords list. This is usually used to modify default stop words that might be of interest to PV. Returns ------- - List + list List of alphabetized stopwords """ lst_stopwords = set() @@ -190,7 +190,8 @@ def summarize_text_data(om_df, colname): Returns ------- - None + dict + dictionary containing printed summary data """ df = om_df.copy() text = df[colname].tolist() diff --git a/pvops/text/preprocess.py b/pvops/text/preprocess.py index 3b1a1ab..3c7fed0 100644 --- a/pvops/text/preprocess.py +++ b/pvops/text/preprocess.py @@ -19,12 +19,14 @@ def preprocessor( A pandas dataframe containing O&M data, which contains at least the columns within col_dict. lst_stopwords : list List of stop words which will be filtered in final preprocessing step - col_dict: dict of {str : str} + col_dict : dict of {str : str} A dictionary that contains the column names relevant for the get_dates fn - - **data** (*string*), should be assigned to associated column which stores the text logs - - **eventstart** (*string*), should be assigned to associated column which stores the log submission datetime - - **save_data_column** (*string*), should be assigned to associated column where the processed text should be stored - - **save_date_column** (*string*), should be assigned to associated column where the extracted dates from the text should be stored + + - data : string, should be assigned to associated column which stores the text logs + - eventstart : string, should be assigned to associated column which stores the log submission datetime + - save_data_column : string, should be assigned to associated column where the processed text should be stored + - save_date_column : string, should be assigned to associated column where the extracted dates from the text should be stored + print_info : bool Flag indicating whether to print information about the preprocessing progress extract_dates_only : bool @@ -33,7 +35,7 @@ def preprocessor( Returns ------- - DataFrame + df : DataFrame Contains the original columns as well as the processed data, located in columns defined by the inputs """ @@ -153,10 +155,12 @@ def get_dates( Designates the row of the dataframe which is currently being observed. This is required because if the current row does not have a valid date in the `eventstart`, then an iterative search is conducted by first starting at the nearest rows. - col_dict: dict of {str : str} + col_dict : dict of {str : str} A dictionary that contains the column names relevant for the get_dates fn - - **data** (*string*), should be assigned to associated column which stores the text logs - - **eventstart** (*string*), should be assigned to associated column which stores the log submission datetime + + - data : string, should be assigned to associated column which stores the text logs + - eventstart : string, should be assigned to associated column which stores the log submission datetime + print_info : bool Flag indicating whether to print information about the preprocessing progress infer_date_surrounding_rows : bool diff --git a/pvops/text/utils.py b/pvops/text/utils.py index 01bb39f..ae494ab 100644 --- a/pvops/text/utils.py +++ b/pvops/text/utils.py @@ -11,18 +11,18 @@ def remap_attributes(om_df, remapping_df, remapping_col_dict, ---------- om_df : DataFrame A pandas dataframe containing O&M data, which needs to be remapped. - remapping_df : dataframe + remapping_df : DataFrame Holds columns that define the remappings remapping_col_dict : dict of {str : str} A dictionary that contains the column names that describes how remapping is going to be done - - **attribute_col** (*string*), should be assigned to associated + - attribute_col : string, should be assigned to associated column name in om_df which will be remapped - - **remapping_col_from** (*string*), should be assigned + - remapping_col_from : string, should be assigned to associated column name in remapping_df that matches original attribute of interest in om_df - - **remapping_col_to** (*string*), should be assigned to + - remapping_col_to : string, should be assigned to associated column name in remapping_df that contains the final mapped entries allow_missing_mappings : bool diff --git a/pvops/text/visualize.py b/pvops/text/visualize.py index 0b8195c..062ecd5 100644 --- a/pvops/text/visualize.py +++ b/pvops/text/visualize.py @@ -34,10 +34,15 @@ def visualize_attribute_connectivity( ---------- om_df : DataFrame A pandas dataframe containing O&M data, which contains columns specified in om_col_dict - om_col_dict: dict of {str : str} - A dictionary that contains the column names that describes how remapping is going to be done - - **attribute1_col** (*string*), should be assigned to associated column name for first attribute of interest in om_df - - **attribute2_col** (*string*), should be assigned to associated column name for second attribute of interest in om_df + om_col_dict : dict of {str : str} + A dictionary that contains the column names to be used in + visualization:: + + { + 'attribute1_col' : string, + 'attribute2_col' : string + } + figsize : tuple Figure size attribute_colors : list @@ -45,10 +50,10 @@ def visualize_attribute_connectivity( edge_width_scalar : numeric Weight utilized to cause dynamic widths based on number of connections between Attribute 1 and Attribute 2. - graph_aargs + graph_aargs : dict Optional, arguments passed to networkx graph drawer. Suggested attributes to pass: - + - with_labels=True - font_weight='bold' - node_size=19000 @@ -112,7 +117,7 @@ def visualize_attribute_timeseries( ---------- om_df : DataFrame A pandas dataframe of O&M data, which contains columns in om_col_dict - om_col_dict: dict of {str : str} + om_col_dict : dict of {str : str} A dictionary that contains the column names relevant for the get_dates fn - **label** (*string*), should be assigned to associated column name for the label/attribute of interest in om_df @@ -120,7 +125,7 @@ def visualize_attribute_timeseries( date_structure : str Controls the resolution of the bar chart's timeseries - Default: "%Y-%m". Can change to include finer resolutions (e.g., by including day, "%Y-%m-%d") + Default : "%Y-%m". Can change to include finer resolutions (e.g., by including day, "%Y-%m-%d") or coarser resolutions (e.g., by year, "%Y") figsize : tuple Optional, figure size From 881ba78e1053088101da1b3766b1b3b056ed09eb Mon Sep 17 00:00:00 2001 From: klbonne Date: Tue, 1 Nov 2022 12:43:15 -0400 Subject: [PATCH 09/27] more docstring formatting --- docs/conf.py | 9 +- pvops/iv/extractor.py | 34 ++++---- pvops/iv/models/nn.py | 42 +-------- pvops/iv/physics_utils.py | 4 +- pvops/iv/preprocess.py | 8 +- pvops/iv/simulator.py | 177 +++++++++++++++++++------------------- 6 files changed, 116 insertions(+), 158 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index afe19f0..1c6de69 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -52,13 +52,14 @@ coverage_show_missing_items = True -napoleon_numpy_docstring = True -napoleon_google_docstring = False -napoleon_use_rtype = False +napoleon_numpy_docstring = True # use numpy style +napoleon_google_docstring = False # not google style +napoleon_use_rtype = False # option for return section formatting numpydoc_show_class_members = True numpydoc_show_inherited_class_members = False numpydoc_class_members_toctree = False - +napoleon_use_ivar = True # option for attribute section formatting +napoleon_use_param = False # option for parameter section formatting # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] diff --git a/pvops/iv/extractor.py b/pvops/iv/extractor.py index f3bd79e..d6c15a6 100644 --- a/pvops/iv/extractor.py +++ b/pvops/iv/extractor.py @@ -15,26 +15,25 @@ class BruteForceExtractor(): '''Process measured IV curves Requires a set of curves to create Isc vs Irr and Voc vs Temp vs Isc(Irr) + + Parameters + ---------- + input_df, df + Contains IV curves with a datetime index + current_col, str + Indicates column where current values in IV curve are located; each cell is an array of current values in a single IV curve + voltage_col, str + Indicates column where voltage values in IV curve are located; each cell is an array of voltage values in a single IV curve + irradiance_col, str + Indicates column where irradiance value (W/m2) + temperature_col, str + Indicates column where temperature value (C) + T_type: string, + Describe input temperature, either 'ambient' or 'module' or 'cell' ''' def __init__(self, input_df, current_col, voltage_col, irradiance_col, temperature_col, T_type, windspeed_col=None, Simulator_mod_specs=None, Simulator_pristine_condition=None): - ''' - Parameters - ---------- - input_df, df - Contains IV curves with a datetime index - current_col, str - Indicates column where current values in IV curve are located; each cell is an array of current values in a single IV curve - voltage_col, str - Indicates column where voltage values in IV curve are located; each cell is an array of voltage values in a single IV curve - irradiance_col, str - Indicates column where irradiance value (W/m2) - temperature_col, str - Indicates column where temperature value (C) - T_type: string, - Describe input temperature, either 'ambient' or 'module' or 'cell' - ''' self.Simulator_mod_specs = Simulator_mod_specs self.Simulator_pristine_condition = Simulator_pristine_condition @@ -76,6 +75,7 @@ def __init__(self, input_df, current_col, voltage_col, irradiance_col, temperatu self.params = {} def create_string_object(self, iph, io, rs, rsh, nnsvth): + #TODO write docstring kwargs = {} if self.Simulator_mod_specs is not None: kwargs.update({'mod_specs': self.Simulator_mod_specs}) @@ -127,7 +127,7 @@ def create_string_object(self, iph, io, rs, rsh, nnsvth): return sim def f_multiple_samples(self, params): - + #TODO write docstring iph, io, rs, rsh, nnsvth = params if self.user_func is None: diff --git a/pvops/iv/models/nn.py b/pvops/iv/models/nn.py index 03fb1cc..aa1cc9f 100644 --- a/pvops/iv/models/nn.py +++ b/pvops/iv/models/nn.py @@ -461,47 +461,7 @@ def structure(self, train, test): test : dataframe Test data containing IV data and associated features nn_config : dict - Parameters used for the IV trace classifier. These parameters are - disseminated into four categories. - - * Neural network parameters - - - **model_choice** (*str*), model choice, either "1DCNN" or - "LSTM_multihead" - - **params** (*list of str*), column names in train & test - dataframes, used in neural network. Each value in this column - must be a list. - - **dropout_pct** (*float*), rate at which to set input units - to zero. - - **verbose** (*int*), control the specificity of the prints. - - * Training parameters - - - **train_size** (*float*), split of training data used for - training - - **shuffle_split** (*bool*), shuffle data during test-train - split - - **balance_tactic** (*str*), mode balancing tactic, either - "truncate" or "gravitate". Truncate will utilize the exact - same number of samples for each category. Gravitate will sway - the original number of samples towards the same number. - Default= truncate. - - **n_split** (*int*), number of splits in the stratified KFold - cross validation. - - **batch_size** (*int*), number of samples per gradient update. - - **max_epochs** (*int*), maximum number of passes through the - training process. - - * LSTM parameters - - - **use_attention_lstm** (*bool*), if True, - use attention in LSTM network - - **units** (*int*), number of neurons in initial NN layer - - * 1DCNN parameters - - - **nfilters** (*int*), number of filters in the convolution. - - **kernel_size** (*int*), length of the convolution window. + Parameters used for the IV trace classifier. """ num_params = len(self.params) diff --git a/pvops/iv/physics_utils.py b/pvops/iv/physics_utils.py index 80e3c45..d562316 100644 --- a/pvops/iv/physics_utils.py +++ b/pvops/iv/physics_utils.py @@ -304,8 +304,8 @@ def add_series(voltage_1, current_1, voltage_2=None, current_2=None, v_bypass=No current : numeric Current for combined IV curve [V] - Notes - ----- + Note + ---- Current for the combined IV curve is the sorted union of the current of the two input IV curves. At current values in the other IV curve, voltage is determined by linear interpolation. Voltage at current values outside an diff --git a/pvops/iv/preprocess.py b/pvops/iv/preprocess.py index d2fb27a..d5ee627 100644 --- a/pvops/iv/preprocess.py +++ b/pvops/iv/preprocess.py @@ -11,16 +11,16 @@ def preprocess(input_df, resmpl_resolution, iv_col_dict, resmpl_cutoff=0.03, Parameters ---------- input_df : DataFrame - resmpl_resolution : - iv_col_dict : + resmpl_resolution : float + iv_col_dict : dict resmpl_cutoff : float correct_gt : bool normalize_y : bool - CECmodule_parameters : + CECmodule_parameters : None n_mods : int gt_correct_option : int - Results + Returns ------- df : DataFrame """ diff --git a/pvops/iv/simulator.py b/pvops/iv/simulator.py index 389b899..4fd0e5f 100644 --- a/pvops/iv/simulator.py +++ b/pvops/iv/simulator.py @@ -30,34 +30,28 @@ class Simulator(): A full condition is defined as a dictionary with the following key/value pairs: - - 'identifier': IDENTIFIER_NAME, - - 'E': IRRADIANCE, - - 'Tc': CELL_TEMPERATURE, - - 'Rsh_mult': RSH_MULTIPLIER, - - 'Rs_mult': RS_MULTIPLIER, - - 'Io_mult': IO_MULTIPLIER, - - 'Il_mult': IL_MULTIPLIER, - - 'nnsvth_mult': NNSVTH_MULTIPLIER, - - 'modname': MODULE_NAME_IN_CECDB - - IDENTIFIER_NAME: str, - Name used to define condition - IRRADIANCE: numerical, - Value of irradiance (Watts per meter-squared) - CELL_TEMPERATURE: numerical, - Value of cell temperature (Celcius) - RSH_MULTIPLIER: numerical, - Multiplier usually less than 1 to simulate a drop in Rsh - RS_MULTIPLIER: numerical, - Multiplier usually greater than 1 to simulate increase in Rs - IO_MULTIPLIER: numerical, - Multiplier usually less than 1 to simulate a drop in IO - IL_MULTIPLIER: numerical, - Multiplier usually less than 1 to simulate a drop in IL - NNSVTH_MULTIPLIER: numerical, - Multiplier usually less than 1 to simulate a drop in NNSVTH, and therefore a_ref - MODULE_NAME_IN_CECDB: str - Module name in CEC database (e.g. Jinko_Solar_Co___Ltd_JKMS260P_60) + .. code-block:: python + + { + 'identifier': IDENTIFIER_NAME, # (str) Name used to define condition + 'E': IRRADIANCE, # (numeric) Value of irradiance (Watts per meter-squared) + 'Tc': CELL_TEMPERATURE, # (numeric) Multiplier usually less than 1 + # to simulate a drop in Rsh + 'Rsh_mult': RSH_MULTIPLIER, # (numeric) Multiplier usually less than 1 + # to simulate a drop in RSH + 'Rs_mult': RS_MULTIPLIER, # (numeric) Multiplier usually less than 1 + # to simulate an increase in RS + 'Io_mult': IO_MULTIPLIER, # (numeric) Multiplier usually less than 1 + # to simulate a drop in IO + 'Il_mult': IL_MULTIPLIER, # (numeric) Multiplier usually less than 1 + # to simulate a drop in IL + 'nnsvth_mult': NNSVTH_MULTIPLIER, # (numeric) Multiplier usually less + # than 1 to simulate a drop in NNSVTH, and therefore a_ref + 'modname': MODULE_NAME_IN_CECDB # (str) Module name in CEC database + # (e.g. Jinko_Solar_Co___Ltd_JKMS260P_60) + } + + replacement_5params : dict Optional, replace the definitions of the five electrical parameters, which normally @@ -66,11 +60,15 @@ class Simulator(): Key/value pairs: - - 'I_L_ref': None - - 'I_o_ref': None - - 'R_s': None - - 'R_sh_ref': None - - 'a_ref': None + .. code-block:: python + + { + 'I_L_ref': None, + 'I_o_ref': None, + 'R_s': None, + 'R_sh_ref': None, + 'a_ref': None + } simulation_method : int Module simulation method (1 or 2) @@ -88,11 +86,11 @@ class Simulator(): Dictionary containing the simulated IV curves - For nth-definition of string curves, - multilevel_ivdata['string']['STRING IDENTIFIER'][n] + ``multilevel_ivdata['string']['STRING IDENTIFIER'][n]`` - For nth-definition of module curves, - multilevel_ivdata['module']['MODULE IDENTIFIER'][n] + ``multilevel_ivdata['module']['MODULE IDENTIFIER'][n]`` - For nth-definition of substring (substr_id = 1,2,3,...) curves, - multilevel_ivdata['module']['MODULE IDENTIFIER']['substr{sbstr_id}'][n] + ``multilevel_ivdata['module']['MODULE IDENTIFIER']['substr{sbstr_id}'][n]`` pristine_condition : dict Dictionary of conditions defining the pristine case @@ -221,8 +219,8 @@ def add_preset_conditions(self, fault_name, fault_condition, save_name=None, **k kwargs: variables dependent on which fault_name you choose, see above - Tip: - ---- + Tip + --- For a wider spectrum of cases, run all of these multiple times. Each time it's run, the case is saved """ acceptible_fault_names = [ @@ -298,13 +296,12 @@ def add_manual_conditions(self, modcell, condition_dict): - 1D list: Give a single situation for this condition - 2D list: Give multiple situations for this condition - A list where each value signifies a cell's condition. - - See below for example. - + If key is same as an existing key, the list is appended to list of scenarios \\ which that key owns condition_dict: dict Define the numerical value written in modcell + .. note:: If the variable is not defined, values will default to those specified \\ @@ -314,36 +311,21 @@ def add_manual_conditions(self, modcell, condition_dict): .. code-block:: python - {ID: {'identifier': IDENTIFIER_NAME, - 'E': IRRADIANCE, - 'Tc': CELL_TEMPERATURE, - 'Rsh_mult': RSH_MULTIPLIER, - 'Rs_mult': RS_MULTIPLIER, - 'Io_mult': IO_MULTIPLIER, - 'Il_mult': IL_MULTIPLIER, - 'nnsvth_mult': NNSVTH_MULTIPLIER - } - - Key/value pairs: - - ID: int, - Value corresponding to those in modcell - IDENTIFIER_NAME: str, - Name used to define condition - IRRADIANCE: numerical, - Value of irradiance (Watts per meter-squared) - CELL_TEMPERATURE: numerical, - Value of cell temperature (Celcius) - RSH_MULTIPLIER: numerical, - Multiplier usually less than 1 to simulate a drop in Rsh - RS_MULTIPLIER: numerical, - Multiplier usually greater than 1 to simulate increase in Rs - IO_MULTIPLIER: numerical, - Multiplier usually less than 1 to simulate a drop in IO - IL_MULTIPLIER: numerical, - Multiplier usually less than 1 to simulate a drop in IL - NNSVTH_MULTIPLIER: numerical, - Multiplier usually less than 1 to simulate a drop in NNSVTH, and therefore a_ref + {ID: {'identifier': IDENTIFIER_NAME, # (str) Name used to define condition + 'E': IRRADIANCE, # (numeric) Value of irradiance (Watts per meter-squared) + 'Tc': CELL_TEMPERATURE, # (numeric) Value of cell temperature (Celcius) + 'Rsh_mult': RSH_MULTIPLIER, # (numeric) Multiplier usually less than 1 + # to simulate a drop in Rsh + 'Rs_mult': RS_MULTIPLIER, # (numeric) Multiplier usually greater than 1 + # to simulate increase in Rs + 'Io_mult': IO_MULTIPLIER, # (numeric) Multiplier usually less than 1 + # to simulate a drop in IO + 'Il_mult': IL_MULTIPLIER, # (numeric) Multiplier usually less than 1 + # to simulate a drop in IL + 'nnsvth_mult': NNSVTH_MULTIPLIER # (numeric) Multiplier usually less than 1 to + # simulate a drop in NNSVTH, and therefore a_ref + } + } """ self._add_conditions(modcell, condition_dict) @@ -624,12 +606,13 @@ def sims_to_df(self, focus=['string', 'module'], cutoff=False): Returns ------- Dataframe with columns: - 'current': IV trace current - 'voltage': IV trace voltage - 'E': Average irradiance for all samples used to build this array - 'T': Average cell temperature for all samples used to build this array - 'mode': failure name - 'level': level of system (i.e. module, string), as defined by the input `focus` parameter + + - 'current': IV trace current + - 'voltage': IV trace voltage + - 'E': Average irradiance for all samples used to build this array + - 'T': Average cell temperature for all samples used to build this array + - 'mode': failure name + - 'level': level of system (i.e. module, string), as defined by the input `focus` parameter #TODO: create focus for cell. For now, one can do it manually themselves. """ @@ -1125,20 +1108,20 @@ def generate_many_samples(self, identifier, N, distributions=None, default_sampl One does not need to define distributions for all parameters, only those that you want altered. - distributions = { - 'Rsh_mult':{'mean':None, - 'std': None, - 'low': None, - 'upp': None}, - 'Rs_mult': {'mean':None, - 'std': None, - 'low': None, - 'upp': None}, - - ... + .. code-block:: python - All keys in self.acceptible_keys - } + distributions = { + 'Rsh_mult':{'mean':None, + 'std': None, + 'low': None, + 'upp': None}, + 'Rs_mult': {'mean':None, + 'std': None, + 'low': None, + 'upp': None}, + ... + # All keys in self.acceptible_keys + } default_sample : If provided, use this sample to replace the parameters which do not have distributions specified. Else, uses @@ -1959,6 +1942,20 @@ def _simulate_landscape(self, rows_aff): def create_df(Varr, Iarr, POA, T, mode): + """Builds a dataframe from the given parameters + + Parameters + ---------- + Varr + Iarr + POA + T + mode + + Returns + ------- + df : DataFrame + """ df = pd.DataFrame() df['voltage'] = Varr df['current'] = Iarr From 1984461625e41f249e6478d53d523e1f233f8a60 Mon Sep 17 00:00:00 2001 From: klbonne Date: Tue, 1 Nov 2022 14:13:59 -0400 Subject: [PATCH 10/27] Fixing imports on tests --- pvops/tests/test_iv.py | 21 +++++++++------------ pvops/tests/test_text.py | 5 +---- pvops/tests/test_text2time.py | 8 +------- pvops/tests/test_timeseries.py | 9 +++------ 4 files changed, 14 insertions(+), 29 deletions(-) diff --git a/pvops/tests/test_iv.py b/pvops/tests/test_iv.py index d10fb98..2d622db 100644 --- a/pvops/tests/test_iv.py +++ b/pvops/tests/test_iv.py @@ -3,13 +3,10 @@ import sys import pandas as pd import numpy as np - -iv_directory = os.path.join("pvops", "iv") -sys.path.append(iv_directory) -import timeseries_simulator -import preprocess -import simulator -from models import nn +import pvops.iv.timeseries_simulator +import pvops.iv.preprocess +import pvops.iv.simulator +from pvops.iv.models import nn datadir = os.path.join('examples', 'example_data') example_prodpath = os.path.join( @@ -19,7 +16,7 @@ def test_simulation(): random.seed(0) - sim = simulator.Simulator() + sim = pvops.iv.simulator.Simulator() # test adding presets heavy_shading = {'identifier': 'heavy_shade', @@ -104,7 +101,7 @@ def test_simulation(): def test_classification(): - sim = simulator.Simulator() + sim = pvops.iv.simulator.Simulator() condition = {'identifier': 'shade', 'Il_mult': 0.6} sim.add_preset_conditions('complete', condition, @@ -140,7 +137,7 @@ def test_classification(): } # Irradiance & Temperature correction, and normalize axes - prep_df = preprocess.preprocess(df, 0.05, iv_col_dict, + prep_df = pvops.iv.preprocess.preprocess(df, 0.05, iv_col_dict, resmpl_cutoff=0.03, correct_gt=True, normalize_y=False, CECmodule_parameters=sim.module_parameters, @@ -197,7 +194,7 @@ def test_timeseries_simulator(): # Reduce number of simulations for test env_df = env_df.iloc[0:100] - failureA = timeseries_simulator.TimeseriesFailure() + failureA = pvops.iv.timeseries_simulator.TimeseriesFailure() longterm_fcn_dict = { 'Rs_mult': "degrade" } @@ -215,7 +212,7 @@ def test_timeseries_simulator(): env_df['identifier'] = env_df.index.strftime("%Y-%m-%d %H:%M:%S") - time_simulator = timeseries_simulator.IVTimeseriesGenerator() + time_simulator = pvops.iv.timeseries_simulator.IVTimeseriesGenerator() time_simulator.generate( env_df, [failureA], iv_col_dict, 'identifier', plot_trends=False) diff --git a/pvops/tests/test_text.py b/pvops/tests/test_text.py index 32e9ccd..31935e1 100644 --- a/pvops/tests/test_text.py +++ b/pvops/tests/test_text.py @@ -1,10 +1,7 @@ import os import sys -pvops_path = os.path.join("pvops") -sys.path.append(pvops_path) - -from text import visualize, preprocess, nlp_utils +from pvops.text import visualize, preprocess, nlp_utils import pandas as pd import numpy as np diff --git a/pvops/tests/test_text2time.py b/pvops/tests/test_text2time.py index 590393d..f151a63 100755 --- a/pvops/tests/test_text2time.py +++ b/pvops/tests/test_text2time.py @@ -5,13 +5,7 @@ import sys import os import pandas.api.types as ptypes - -# Set sytem paths -pvops_path = os.path.join('pvops') -# T2time_path = os.path.join('.', 'text2time') - -sys.path.append(pvops_path) -from text2time import preprocess, utils +from pvops.text2time import preprocess, utils # from om_data_convert import om_data_convert # Import modules diff --git a/pvops/tests/test_timeseries.py b/pvops/tests/test_timeseries.py index 1fe52f9..ddd42b5 100644 --- a/pvops/tests/test_timeseries.py +++ b/pvops/tests/test_timeseries.py @@ -2,12 +2,9 @@ import sys import pandas as pd import numpy as np - -pvops_directory = os.path.join("pvops") -sys.path.append(pvops_directory) -from timeseries.models import linear -from timeseries import preprocess as tprep -from text2time import preprocess as t2tprep +from pvops.timeseries.models import linear +from pvops.timeseries import preprocess as tprep +from pvops.text2time import preprocess as t2tprep # Define csv paths datadir = os.path.join('examples', 'example_data') From fca24dec037c08d99a59e5cb0852c612b02acdd5 Mon Sep 17 00:00:00 2001 From: klbonne Date: Tue, 1 Nov 2022 14:43:08 -0400 Subject: [PATCH 11/27] Last big push before PR. This commit builds docs with no warnings coming from the docstrings or rst files (there are some from ipynb though). --- docs/whatsnew/alpha.rst | 2 - docs/whatsnew/beta.rst | 2 - pvops/iv/extractor.py | 12 ++--- pvops/iv/models/nn.py | 43 +++------------- pvops/text2time/preprocess.py | 60 ++++++++++------------ pvops/text2time/utils.py | 82 +++++++++++++------------------ pvops/text2time/visualize.py | 50 +++++++------------ pvops/timeseries/models/AIT.py | 4 +- pvops/timeseries/models/iec.py | 17 +++---- pvops/timeseries/models/linear.py | 32 ++++++------ pvops/timeseries/preprocess.py | 39 ++++++++------- 11 files changed, 137 insertions(+), 206 deletions(-) diff --git a/docs/whatsnew/alpha.rst b/docs/whatsnew/alpha.rst index b580387..2bcbc49 100644 --- a/docs/whatsnew/alpha.rst +++ b/docs/whatsnew/alpha.rst @@ -1,5 +1,3 @@ -.. _whatsnew_alpha: - First push ----------------------- diff --git a/docs/whatsnew/beta.rst b/docs/whatsnew/beta.rst index 3555114..0179c6d 100644 --- a/docs/whatsnew/beta.rst +++ b/docs/whatsnew/beta.rst @@ -1,5 +1,3 @@ -.. _whatsnew_beta: - beta ----------------------- diff --git a/pvops/iv/extractor.py b/pvops/iv/extractor.py index d6c15a6..c0f7b52 100644 --- a/pvops/iv/extractor.py +++ b/pvops/iv/extractor.py @@ -18,17 +18,17 @@ class BruteForceExtractor(): Parameters ---------- - input_df, df + input_df : DataFrame Contains IV curves with a datetime index - current_col, str + current_col : string Indicates column where current values in IV curve are located; each cell is an array of current values in a single IV curve - voltage_col, str + voltage_col : string Indicates column where voltage values in IV curve are located; each cell is an array of voltage values in a single IV curve - irradiance_col, str + irradiance_col : string Indicates column where irradiance value (W/m2) - temperature_col, str + temperature_col : string Indicates column where temperature value (C) - T_type: string, + T_type : string Describe input temperature, either 'ambient' or 'module' or 'cell' ''' diff --git a/pvops/iv/models/nn.py b/pvops/iv/models/nn.py index aa1cc9f..10caa0b 100644 --- a/pvops/iv/models/nn.py +++ b/pvops/iv/models/nn.py @@ -86,6 +86,7 @@ def feature_generation(bigdf, iv_col_dict, `derivative` and `current_diff` which are calculated here. iv_col_dict : dict Dictionary containing definitions for the column names in `df` + - **current** (*str*): column name for IV current arrays. - **voltage** (*str*): column name for IV voltage arrays. - **mode** (*str*): column name for failure mode identifier. @@ -95,10 +96,11 @@ def feature_generation(bigdf, iv_col_dict, calculated in this function. - **current_diff** (*str*): column name for current differential, as calculated in `get_diff_array`. + pristine_mode_identifier : str Pristine array identifier. The pristine curve is utilized in - `get_diff_array`. If multiple rows exist at this - `pristine_mode_identifier`, the one with the highest irradiance and + ``get_diff_array``. If multiple rows exist at this + ``pristine_mode_identifier``, the one with the highest irradiance and lowest temperature definitions is chosen. Returns @@ -166,7 +168,8 @@ def balance_df(df, iv_col_dict, balance_tactic='truncate'): Returns ------- - dataframe, balanced according to the `balance_tactic`. + balanced_df : DataFrame + balanced according to the `balance_tactic`. """ ycol = iv_col_dict['mode'] @@ -393,39 +396,7 @@ def classify_curves(df, iv_col_dict, nn_config): Dictionary containing definitions for the column names in `df` **mode** (*str*): column name for failure mode identifier nn_config : dict - Parameters used for the IV trace classifier. These parameters are - disseminated into four categories. - - * Neural network parameters - - - **model_choice** (*str*), model choice, either "1DCNN" or - "LSTM_multihead" - - **params** (*list of str*), column names in train & test - dataframes, used in neural network. Each value in this column - must be a list. - - **dropout_pct** (*float*), rate at which to set input units - to zero. - - **verbose** (*int*), control the specificity of the prints. - - * Training parameters - - - **train_size** (*float*), split of training data used for training - - **shuffle_split** (*bool*), shuffle data during test-train split - - **balance_tactic** (*str*), mode balancing tactic, either "truncate" - or "gravitate". Truncate will utilize the exact same number of samples - for each category. Gravitate will sway the original number of samples - towards the same number. Default= truncate. - - * LSTM parameters - - - **use_attention_lstm** (*bool*), if True, - use attention in LSTM network - - **units** (*int*), number of neurons in initial NN layer - - * 1DCNN parameters - - - **nfilters** (*int*), number of filters in the convolution. - - **kernel_size** (*int*), length of the convolution window. + Parameters used for the IV trace classifier. """ # Balance ys bal_df = balance_df( diff --git a/pvops/text2time/preprocess.py b/pvops/text2time/preprocess.py index f628265..36fd6eb 100644 --- a/pvops/text2time/preprocess.py +++ b/pvops/text2time/preprocess.py @@ -13,26 +13,23 @@ def data_site_na(pom_df, df_col_dict): Parameters ---------- - pom_df: DataFrame + pom_df : DataFrame A data frame corresponding to either the production or O&M data. - - df_col_dict: dict of {str : str} + df_col_dict : dict of {str : str} A dictionary that contains the column names associated with the input `pom_df` and contains at least: - **siteid** (*string*), should be assigned to column name - for - user's site-ID + for user's site-ID Returns ------- - pom_df: DataFrame + pom_df : DataFrame An updated version of the input data frame, where rows with site-IDs of NAN are dropped. - - addressed: DataFrame + addressed : DataFrame A data frame showing rows from the input that were removed by this function. """ @@ -56,10 +53,9 @@ def om_date_convert(om_df, om_col_dict, toffset=0.0): Parameters ---------- - om_df: DataFrame + om_df : DataFrame A data frame corresponding to O&M data. - - om_col_dict: dict of {str : str} + om_col_dict : dict of {str : str} A dictionary that contains the column names associated with the O&M data, which consist of at least: @@ -68,7 +64,7 @@ def om_date_convert(om_df, om_col_dict, toffset=0.0): - **dateend** (*string*), should be assigned to column name for O&M event end date in om_df - toffset: float + toffset : float Value that specifies how many hours the O&M data should be shifted by in case time-stamps in production data and O&M data don't align as they should @@ -108,10 +104,9 @@ def om_datelogic_check(om_df, om_col_dict, om_dflag="swap"): Parameters ---------- - om_df: DataFrame + om_df : DataFrame A data frame corresponding to O&M data. - - om_col_dict: dict of {str : str} + om_col_dict : dict of {str : str} A dictionary that contains the column names associated with the O&M data, which consist of at least: @@ -120,7 +115,7 @@ def om_datelogic_check(om_df, om_col_dict, om_dflag="swap"): - **dateend** (*string*), should be assigned to column name for associated O&M event end date in om_df - om_dflag: str + om_dflag : str A flag that specifies how to address rows where the start of an event occurs after its conclusion. A flag of 'drop' will drop those rows, and a flag of 'swap' swap the two dates for @@ -128,12 +123,11 @@ def om_datelogic_check(om_df, om_col_dict, om_dflag="swap"): Returns ------- - om_df: DataFrame + om_df : DataFrame An updated version of the input dataframe, but with O&M data quality issues addressed to ensure the start of an event precedes the event end date. - - addressed: DataFrame + addressed : DataFrame A data frame showing rows from the input that were addressed by this function. """ @@ -162,7 +156,7 @@ def om_datelogic_check(om_df, om_col_dict, om_dflag="swap"): def om_nadate_process(om_df, om_col_dict, om_dendflag="drop"): """ Addresses issues with O&M dataframe where dates are missing - (NAN). Two operations are performed: 1) rows are dropped + (NAN). Two operations are performed : 1) rows are dropped where start of an event is missing and (2) rows where the conclusion of an event is NAN can either be dropped or marked with the time at which program is run, depending on the user's @@ -170,10 +164,10 @@ def om_nadate_process(om_df, om_col_dict, om_dendflag="drop"): Parameters ---------- - om_df: DataFrame + om_df : DataFrame A data frame corresponding to O&M data. - om_col_dict: dict of {str : str} + om_col_dict : dict of {str : str} A dictionary that contains the column names associated with the O&M data, which consist of at least: @@ -182,7 +176,7 @@ def om_nadate_process(om_df, om_col_dict, om_dendflag="drop"): - **dateend** (*string*), should be assigned to column name for user's O&M event end-date - om_dendflag: str + om_dendflag : str A flag that specifies how to address rows where the conclusion of an event is missing (NAN). A flag of 'drop' will drop those rows, and a flag of 'today' will replace the NAN with the time @@ -191,11 +185,11 @@ def om_nadate_process(om_df, om_col_dict, om_dendflag="drop"): Returns ------- - om_df: DataFrame + om_df : DataFrame An updated version of the input dataframe, but with no missing time-stamps in the O&M data. - addressed: DataFrame + addressed : DataFrame A data frame showing rows from the input that were addressed by this function. """ @@ -238,17 +232,17 @@ def prod_date_convert(prod_df, prod_col_dict, toffset=0.0): Parameters ---------- - prod_df: DataFrame + prod_df : DataFrame A data frame corresponding to production data. - prod_col_dict: dict of {str : str} + prod_col_dict : dict of {str : str} A dictionary that contains the column names associated with the production data, which consist of at least: - **timestamp** (*string*), should be assigned to user's time-stamp column name - toffset: float + toffset : float Value that specifies how many hours the production data should be shifted by in case time-stamps in production data and O&M data don't align as they should. @@ -284,17 +278,17 @@ def prod_nadate_process(prod_df, prod_col_dict, pnadrop=False): Parameters ---------- - prod_df: DataFrame + prod_df : DataFrame A data frame corresponding to production data. - prod_df_col_dict: dict of {str : str} + prod_df_col_dict : dict of {str : str} A dictionary that contains the column names associated with the production data, which consist of at least: - **timestamp** (*string*), should be assigned to associated time-stamp column name in prod_df - pnadrop: bool + pnadrop : bool Boolean flag that determines what to do with rows where time-stamp is missing. A value of `True` will drop these rows. Leaving the default value of `False` will identify @@ -303,13 +297,13 @@ def prod_nadate_process(prod_df, prod_col_dict, pnadrop=False): Returns ------- - prod_df: DataFrame + prod_df : DataFrame The output data frame. If pflag = 'drop', an updated version of the input data frame is output, but rows with missing time-stamps are removed. If default value is maintained, the input data frame is output with no modifications. - addressed: DataFrame + addressed : DataFrame A data frame showing rows from the input that were addressed or identified by this function. """ diff --git a/pvops/text2time/utils.py b/pvops/text2time/utils.py index 652b954..84b58ca 100644 --- a/pvops/text2time/utils.py +++ b/pvops/text2time/utils.py @@ -13,17 +13,15 @@ def interpolate_data(prod_df, om_df, prod_col_dict, om_col_dict, om_cols_to_tran Parameters ---------- - prod_df: DataFrame + prod_df : DataFrame A data frame corresponding to the production data after having been processed by the perf_om_NA_qc function. This data frame needs the columns specified in prod_col_dict. - - om_df: DataFrame + om_df : DataFrame A data frame corresponding to the O&M data after having been processed by the perf_om_NA_qc function. This data frame needs the columns specified in om_col_dict. - - prod_col_dict: dict of {str : str} + prod_col_dict : dict of {str : str} A dictionary that contains the column names relevant for the production data - **siteid** (*string*), should be assigned to associated site-ID column name in prod_df @@ -34,7 +32,7 @@ def interpolate_data(prod_df, om_df, prod_col_dict, om_col_dict, om_cols_to_tran - **irradiance** (*string*), should be assigned to associated irradiance column name in prod_df - om_col_dict: dict of {str : str} + om_col_dict : dict of {str : str} A dictionary that contains the column names relevant for the O&M data - **siteid** (*string*), should be assigned to associated site-ID column name in om_df @@ -49,7 +47,7 @@ def interpolate_data(prod_df, om_df, prod_col_dict, om_col_dict, om_cols_to_tran Returns ------- - prod_output: DataFrame + prod_output : DataFrame A data frame that includes statistics for the production data per site in the data frame. Two statistical parameters are calculated and assigned to separate columns: @@ -58,7 +56,7 @@ def interpolate_data(prod_df, om_df, prod_col_dict, om_col_dict, om_cols_to_tran - **Max # Time Stamps** (*datetime.datetime*), maximum number of production time-stamps, including NANs - om_out: DataFrame + om_out : DataFrame A data frame that includes statistics for the O&M data per site in the data frame. Three statistical parameters are calculated and assigned to separate columns: @@ -113,17 +111,16 @@ def summarize_overlaps(prod_df, om_df, prod_col_dict, om_col_dict): Parameters ---------- - prod_df: DataFrame + prod_df : DataFrame A data frame corresponding to the production data after having been processed by the perf_om_NA_qc function. This data frame needs the columns specified in prod_col_dict. - - om_df: DataFrame + om_df : DataFrame A data frame corresponding to the O&M data after having been processed by the perf_om_NA_qc function. This data frame needs the columns specified in om_col_dict. - prod_col_dict: dict of {str : str} + prod_col_dict : dict of {str : str} A dictionary that contains the column names relevant for the production data - **siteid** (*string*), should be assigned to associated site-ID column name in prod_df @@ -134,7 +131,7 @@ def summarize_overlaps(prod_df, om_df, prod_col_dict, om_col_dict): - **irradiance** (*string*), should be assigned to associated irradiance column name in prod_df - om_col_dict: dict of {str : str} + om_col_dict : dict of {str : str} A dictionary that contains the column names relevant for the O&M data - **siteid** (*string*), should be assigned to associated site-ID column name in om_df @@ -145,16 +142,16 @@ def summarize_overlaps(prod_df, om_df, prod_col_dict, om_col_dict): Returns ------- - prod_output: DataFrame + prod_output : DataFrame A data frame that includes statistics for the production data per site in the data frame. Two statistical parameters are calculated and assigned to separate columns: - **Actual # Time Stamps** (*datetime.datetime*), total number of overlapping production time-stamps - - **Max # Time Stamps** (*datetime.datetime), maximum number of production time-stamps, + - **Max # Time Stamps** (*datetime.datetime*), maximum number of production time-stamps, including NANs - om_out: DataFrame + om_out : DataFrame A data frame that includes statistics for the O&M data per site in the data frame. Three statistical parameters are calculated and assigned to separate columns: @@ -210,15 +207,15 @@ def om_summary_stats(om_df, meta_df, om_col_dict, meta_col_dict): Parameters ---------- - om_df: DataFrame + om_df : DataFrame A data frame corresponding to the O&M data after having been pre-processed by the QC and overlappingDFs functions. This data frame needs to have the columns specified in om_col_dict. - meta_df: DataFrame + meta_df : DataFrame A data frame corresponding to the metadata that contains columns specified in meta_col_dict. - om_col_dict: dict of {str : str} + om_col_dict : dict of {str : str} A dictionary that contains the column names relevant for the O&M data which consist of at least: @@ -234,7 +231,7 @@ def om_summary_stats(om_df, meta_df, om_col_dict, meta_col_dict): - **agedatestart** (*string*), should be assigned to column name desired for calculated age of site when event started (calculated here, in days) - meta_col_dict: dict + meta_col_dict : dict A dictionary that contains the column names relevant for the meta-data - **siteid** (*string*), should be assigned to associated site-ID column name in meta_df @@ -243,7 +240,7 @@ def om_summary_stats(om_df, meta_df, om_col_dict, meta_col_dict): Returns ------- - om_df: DataFrame + om_df : DataFrame An updated version of the input dataframe, but with three new columns added for visualizations: event duration, month of event occurrence, and age of system at time of event occurrence. See om_col_dict for mapping @@ -316,20 +313,18 @@ def overlapping_data(prod_df, om_df, prod_col_dict, om_col_dict): Parameters ---------- - prod_df: DataFrame + prod_df : DataFrame A data frame corresponding to the production data after having been processed by the perf_om_NA_qc function. This data frame needs the columns specified in prod_col_dict. The time-stamp column should not have any NANs for proper operation of this function. - - om_df: DataFrame + om_df : DataFrame A data frame corresponding to the O&M data after having been processed by the perf_om_NA_qc function. This data frame needs the columns specified in om_col_dict. The time-stamp columns should not have any NANs for proper operation of this function. - - prod_col_dict: dict of {str : str} + prod_col_dict : dict of {str : str} A dictionary that contains the column names relevant for the production data - **siteid** (*string*), should be assigned to associated site-ID column name in prod_df @@ -340,7 +335,7 @@ def overlapping_data(prod_df, om_df, prod_col_dict, om_col_dict): - **irradiance** (*string*), should be assigned to associated irradiance column name in prod_df - om_col_dict: dict of {str : str} + om_col_dict : dict of {str : str} A dictionary that contains the column names relevant for the O&M data - **siteid** (*string*), should be assigned to associated site-ID column name in om_df @@ -351,11 +346,10 @@ def overlapping_data(prod_df, om_df, prod_col_dict, om_col_dict): Returns ------- - prod_df: DataFrame + prod_df : DataFrame Production data frame similar to the input data frame, but truncated to only contain data that overlaps in time with the O&M data. - - om_df: DataFrame + om_df : DataFrame O&M data frame similar to the input data frame, but truncated to only contain data that overlaps in time with the production data. @@ -452,36 +446,32 @@ def prod_anomalies(prod_df, prod_col_dict, minval=1.0, repval=np.nan, ffill=True Parameters ---------- - prod_df: DataFrame + prod_df : DataFrame A data frame corresponding to production data were production is logged on a cumulative basis. - - prod_col_dict: dict of {str : str} + prod_col_dict : dict of {str : str} A dictionary that contains the column names associated with the production data, which consist of at least: - **energyprod** (*string*), should be assigned to the associated cumulative production column name in prod_df - minval: float + minval : float Cutoff value for production data that determines where anomalies are defined. Any production values below minval will be addressed by this function. Default minval is 1.0 - - repval: float + repval : float Value that should replace the anomalies in a cumulative production data format. Default value is numpy's NAN. - - ffill: boolean + ffill : boolean Boolean flag that determines whether NANs in production column in prod_df should be filled using a forward-fill method. Returns ------- - prod_df: DataFrame + prod_df : DataFrame An updated version of the input dataframe, but with zero production values converted to user's preference. - - addressed: DataFrame + addressed : DataFrame A data frame showing rows from the input that were addressed by this function. """ @@ -507,12 +497,11 @@ def prod_quant(prod_df, prod_col_dict, comp_type, ecumu=True): Parameters ---------- - prod_df: DataFrame + prod_df : DataFrame A data frame corresponding to the production data after having been processed by the QC and overlappingDFs functions. This data frame needs at least the columns specified in prod_col_dict. - - prod_col_dict: dict of {str : str} + prod_col_dict : dict of {str : str} A dictionary that contains the column names relevant for the production data - **siteid** (*string*), should be assigned to associated site-ID column name in prod_df @@ -527,13 +516,12 @@ def prod_quant(prod_df, prod_col_dict, comp_type, ecumu=True): - **energy_pstep** (*string*), should be assigned to column name desired for energy per time-step (calculated here) - comp_type: str + comp_type : str Flag that specifies how the energy production should be compared to the expected baseline. A flag of 'diff' shows the subtracted difference between the two (baseline - observed). A flag of 'norm' shows the ratio of the two (observed/baseline) - - ecumu: bool + ecumu : bool Boolean flag that specifies whether the production (energy output) data is input as cumulative information ("True") or on a per time-step basis ("False"). diff --git a/pvops/text2time/visualize.py b/pvops/text2time/visualize.py index 82cf403..521e5e4 100644 --- a/pvops/text2time/visualize.py +++ b/pvops/text2time/visualize.py @@ -14,12 +14,11 @@ def visualize_counts(om_df, om_col_dict, count_var, fig_sets): Parameters ---------- - om_df: DataFrame + om_df : DataFrame A data frame corresponding to the O&M data after having been pre-processed to address NANs and date consistency, and after applying the ``om_summary_stats`` function. This data frame needs at least the columns specified in om_col_dict. - - om_col_dict: dict of {str : str} + om_col_dict : dict of {str : str} A dictionary that contains the column names relevant for the O&M data - **siteid** (*string*), should be assigned to column name for associated site-ID in om_df. @@ -28,8 +27,7 @@ def visualize_counts(om_df, om_col_dict, count_var, fig_sets): count_var:str Column name that contains categorical variable to be plotted - - fig_sets: dict + fig_sets : dict A dictionary that contains the settings to be used for the figure to be generated, and those settings should include: @@ -98,14 +96,13 @@ def visualize_categorical_scatter(om_df, om_col_dict, cat_varx, cat_vary, fig_se Parameters ---------- - om_df: DataFrame + om_df : DataFrame A data frame corresponding to the O&M data after having been pre-processed to address NANs and date consistency, and after applying the ``om_summary_stats`` function. This data frame needs at least the columns specified in om_col_dict. - - om_col_dict: dict of {str : str} + om_col_dict : dict of {str : str} A dictionary that contains the column names relevant for the O&M data - **eventdur** (*string*), should be assigned to column name desired for repair duration. @@ -113,13 +110,11 @@ def visualize_categorical_scatter(om_df, om_col_dict, cat_varx, cat_vary, fig_se - **agedatestart** (*string*), should be assigned to column name desired for age of site when event started. This column is calculated by ``om_summary_stats`` - cat_varx: str + cat_varx : str Column name that contains categorical variable to be plotted - - cat_vary: str + cat_vary : str Column name that contains numerical variable to be plotted - - fig_sets: dict + fig_sets : dict A dictionary that contains the settings to be used for the figure to be generated, and those settings should include: @@ -129,7 +124,6 @@ def visualize_categorical_scatter(om_df, om_col_dict, cat_varx, cat_vary, fig_se Returns ------- None - """ # assigning dictionary items to local variables for cleaner code om_rep_dur = om_col_dict["eventdur"] @@ -195,17 +189,15 @@ def visualize_om_prod_overlap( Parameters ---------- - prod_df: DataFrame + prod_df : DataFrame A data frame corresponding to the performance data after (ideally) having been processed by the perf_om_NA_qc and overlappingDFs functions. This data frame needs to contain the columns specified in prod_col_dict. - - om_df: DataFrame + om_df : DataFrame A data frame corresponding to the O&M data after (ideally) having been processed by the perf_om_NA_qc and overlappingDFs functions. This data frame needs to contain the columns specified in om_col_dict. - - prod_col_dict: dict of {str : str} + prod_col_dict : dict of {str : str} A dictionary that contains the column names relevant for the production data - **siteid** (*string*), should be assigned to associated site-ID column name in @@ -217,7 +209,7 @@ def visualize_om_prod_overlap( - **irradiance** (*string*), should be assigned to associated irradiance column name in prod_df. Data should be in [W/m^2]. - om_col_dict: dict of {str : str} + om_col_dict : dict of {str : str} A dictionary that contains the column names relevant for the O&M data - **siteid** (*string*), should be assigned to column name for user's site-ID @@ -230,29 +222,24 @@ def visualize_om_prod_overlap( - **asset** (*string*), should be assigned to column name for affected asset in user's O&M ticket - prod_fldr: str + prod_fldr : str Path to directory where plots should be saved. - - e_cumu: bool + e_cumu : bool Boolean flag that specifies whether the production (energy output) data is input as cumulative information ("True") or on a per time-step basis ("False"). - - be_cumu: bool + be_cumu : bool Boolean that specifies whether the baseline production data is input as cumulative information ("True") or on a per time-step basis ("False"). - - samp_freq: str + samp_freq : str Specifies how the performance data should be resampled. String value is any frequency that is valid for pandas.DataFrame.resample(). For example, a value of 'D' will resample on a daily basis, and a value of 'H' will resample on an hourly basis. - - pshift: float + pshift : float Value that specifies how many hours the performance data should be shifted by to help align performance data with O&M data. Mostly necessary when resampling frequencies are larger than an hour - - baselineflag: bool + baselineflag : bool Boolean that specifies whether or not to display the baseline (i.e., expected production profile) as calculated with the irradiance data using the baseline production data. A value of 'True' will display the @@ -263,7 +250,6 @@ def visualize_om_prod_overlap( ------- list List of Plotly figure handles generated by function for each site within prod_df. - """ # assigning dictionary items to local variables for cleaner code diff --git a/pvops/timeseries/models/AIT.py b/pvops/timeseries/models/AIT.py index e2f21b4..cc3b502 100644 --- a/pvops/timeseries/models/AIT.py +++ b/pvops/timeseries/models/AIT.py @@ -184,10 +184,10 @@ def AIT_calc(prod_df, prod_col_dict): Parameters ---------- - prod_df: DataFrame + prod_df : DataFrame A data frame corresponding to the production data - prod_col_dict: dict of {str : str} + prod_col_dict : dict of {str : str} A dictionary that contains the column names relevant for the production data diff --git a/pvops/timeseries/models/iec.py b/pvops/timeseries/models/iec.py index 4b8b37e..5ce436c 100644 --- a/pvops/timeseries/models/iec.py +++ b/pvops/timeseries/models/iec.py @@ -2,21 +2,18 @@ def iec_calc(prod_df, prod_col_dict, meta_df, meta_col_dict, gi_ref=1000.0): - """ - Calculates expected energy using measured irradiance + """Calculates expected energy using measured irradiance based on IEC calculations - Parameters - ---------- - prod_df: DataFrame + prod_df : DataFrame A data frame corresponding to the production data after having been processed by the perf_om_NA_qc and overlappingDFs functions. This data frame needs at least the columns specified in prod_col_dict. - prod_col_dict: dict of {str : str} + prod_col_dict : dict of {str : str} A dictionary that contains the column names relevant for the production data @@ -33,12 +30,12 @@ def iec_calc(prod_df, prod_col_dict, meta_df, meta_col_dict, - **dcsize**, (*string*), should be assigned to preferred column name for site capacity in prod_df - meta_df: DataFrame + meta_df : DataFrame A data frame corresponding to site metadata. At the least, the columns in meta_col_dict be present. - meta_col_dict: dict of {str : str} + meta_col_dict : dict of {str : str} A dictionary that contains the column names relevant for the meta-data @@ -48,14 +45,12 @@ def iec_calc(prod_df, prod_col_dict, meta_df, meta_col_dict, column name corresponding to site capacity, where data is in [kW] - gi_ref: float + gi_ref : float reference plane of array irradiance in W/m^2 at which a site capacity is determined (default value is 1000 [W/m^2]) - Returns - ------- DataFrame A data frame for production data with a new column, diff --git a/pvops/timeseries/models/linear.py b/pvops/timeseries/models/linear.py index 47ea1df..61c2e59 100644 --- a/pvops/timeseries/models/linear.py +++ b/pvops/timeseries/models/linear.py @@ -425,13 +425,11 @@ def modeller(prod_col_dict, """Wrapper method to conduct the modelling of the timeseries data. To input the data, there are two options. - - Option 1: include full production data in `prod_df` - parameter and `test_split` so that the test split is conducted - - Option 2: conduct the test-train split prior to calling - the function and pass in data under `test_df` - and `train_df` + + - Option 1: include full production data in `prod_df` + parameter and `test_split` so that the test split is conducted + - Option 2: conduct the test-train split prior to calling + the function and pass in data under `test_df` and `train_df` Parameters ---------- @@ -439,7 +437,7 @@ def modeller(prod_col_dict, A dictionary that contains the column names relevant for the production data - - **siteid** (*string*), should be assigned to + - siteid (*string*), should be assigned to site-ID column name in prod_df - **timestamp** (*string*), should be assigned to time-stamp column name in prod_df @@ -519,12 +517,12 @@ def modeller(prod_col_dict, A list of parameter definitions (defined as lists) to be excluded in the model. For example, if want to exclude a parameter in a 4-covariate model that uses 1 degree on first covariate, 2 degrees on second covariate, and no degrees for 3rd and 4th covariates, you would specify a - exclude_params as `[ [1,2,0,0] ]`. Multiple definitions can be added to list depending + exclude_params as ``[ [1,2,0,0] ]``. Multiple definitions can be added to list depending on how many terms need to be excluded. If a time_weighted parameter is selected, a time weighted definition will need to be appended to *each* exclusion definition. Continuing the example above, if one wants to exclude "hour 0" for the - same term, then the exclude_params must be `[ [1,2,0,0,0] ]`, where the last 0 represents the + same term, then the exclude_params must be ``[ [1,2,0,0,0] ]``, where the last 0 represents the time-weighted partition setting. verbose : int @@ -533,12 +531,14 @@ def modeller(prod_col_dict, Returns ------- - `model`, which is a `pvops.timeseries.models.linear.Model` object, has a useful attribute - `estimators`, which allows access to model performance and data splitting information. - - `train_df`, which is the training split of prod_df - - `test_df`, which is the testing split of prod_df + model + which is a ``pvops.timeseries.models.linear.Model`` object, has a useful attribute + estimators + which allows access to model performance and data splitting information. + train_df + which is the training split of prod_df + test_df + which is the testing split of prod_df """ estimators = estimators or {'OLS': {'estimator': LinearRegression()}, 'RANSAC': {'estimator': RANSACRegressor()}} diff --git a/pvops/timeseries/preprocess.py b/pvops/timeseries/preprocess.py index 03a6bdf..43abc13 100644 --- a/pvops/timeseries/preprocess.py +++ b/pvops/timeseries/preprocess.py @@ -13,21 +13,21 @@ def establish_solar_loc(prod_df, prod_col_dict, meta_df, meta_col_dict): Parameters ---------- - prod_df: DataFrame + prod_df : DataFrame A data frame corresponding to production data containing a datetime index. - prod_col_dict: dict of {str : str} + prod_col_dict : dict of {str : str} A dictionary that contains the column names associated with the production data, which consist of at least: - **siteid** (*string*), should be assigned to site-ID column name in prod_df - meta_df: DataFrame + meta_df : DataFrame A data frame corresponding to site metadata. At the least, the columns in meta_col_dict be present. The index must contain the site IDs used in prod_df. - meta_col_dict: dict of {str : str} + meta_col_dict : dict of {str : str} A dictionary that contains the column names relevant for the meta-data - **longitude** (*string*), should be assigned to site's longitude @@ -74,7 +74,6 @@ def normalize_production_by_capacity(prod_df, ---------- prod_df: DataFrame A data frame corresponding to production data. - prod_df_col_dict: dict of {str : str} A dictionary that contains the column names associated with the production data, which consist of at least: @@ -83,17 +82,21 @@ def normalize_production_by_capacity(prod_df, - **siteid** (*string*), should be assigned to site-ID column name in prod_df - **capacity_normalized_power** (*string*), should be assigned to a column name where the normalized output signal will be stored - + meta_df: DataFrame A data frame corresponding to site metadata. At the least, the columns in meta_col_dict be present. - meta_col_dict: dict of {str : str} A dictionary that contains the column names relevant for the meta-data - **siteid** (*string*), should be assigned to site-ID column name - **dcsize** (*string*), should be assigned to column name corresponding to site's DC size + + Returns + ------- + prod_df : DataFrame + normalized production data """ prod_df = prod_df.copy() @@ -127,10 +130,10 @@ def prod_irradiance_filter(prod_df, prod_col_dict, meta_df, meta_col_dict, Parameters ---------- - prod_df: DataFrame + prod_df : DataFrame A data frame corresponding to production data. - prod_df_col_dict: dict of {str : str} + prod_df_col_dict : dict of {str : str} A dictionary that contains the column names associated with the production data, which consist of at least: @@ -140,11 +143,11 @@ def prod_irradiance_filter(prod_df, prod_col_dict, meta_df, meta_col_dict, - **irradiance** (*string*), should be assigned to associated irradiance column name in prod_df - **clearsky_irr** (*string*), should be assigned to clearsky irradiance column name in prod_df - meta_df: DataFrame + meta_df : DataFrame A data frame corresponding to site metadata. At the least, the columns in meta_col_dict be present. - meta_col_dict: dict of {str : str} + meta_col_dict : dict of {str : str} A dictionary that contains the column names relevant for the meta-data - **siteid** (*string*), should be assigned to site-ID column name @@ -257,10 +260,9 @@ def prod_inverter_clipping_filter(prod_df, prod_col_dict, meta_df, meta_col_dict Parameters ---------- - prod_df: DataFrame + prod_df : DataFrame A data frame corresponding to production data. - - prod_df_col_dict: dict of {str : str} + prod_df_col_dict : dict of {str : str} A dictionary that contains the column names associated with the production data, which consist of at least: @@ -269,18 +271,17 @@ def prod_inverter_clipping_filter(prod_df, prod_col_dict, meta_df, meta_col_dict - **siteid** (*string*), should be assigned to site-ID column name in prod_df - **powerprod** (*string*), should be assigned to associated power production column name in prod_df - meta_df: DataFrame + meta_df : DataFrame A data frame corresponding to site metadata. At the least, the columns in meta_col_dict be present. - - meta_col_dict: dict of {str : str} + meta_col_dict : dict of {str : str} A dictionary that contains the column names relevant for the meta-data - **siteid** (*string*), should be assigned to site-ID column name - **latitude** (*string*), should be assigned to column name corresponding to site's latitude - **longitude** (*string*), should be assigned to column name corresponding to site's longitude - model: str + model : str A string distinguishing the inverter clipping detection model programmed in pvanalytics. Available options: ['geometric', 'threshold', 'levels'] @@ -289,7 +290,7 @@ def prod_inverter_clipping_filter(prod_df, prod_col_dict, meta_df, meta_col_dict Returns ------- - prod_df: DataFrame + prod_df : DataFrame If drop=True, a filtered dataframe with clipping periods removed is returned. """ From eb8a2aaf884e3b0ee057095eaa638f2a018275d9 Mon Sep 17 00:00:00 2001 From: klbonne Date: Thu, 3 Nov 2022 11:11:18 -0400 Subject: [PATCH 12/27] adding ait and iec to timeseries docs --- docs/timeseries.rst | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/docs/timeseries.rst b/docs/timeseries.rst index 12e461a..9a23bc2 100644 --- a/docs/timeseries.rst +++ b/docs/timeseries.rst @@ -17,3 +17,18 @@ timeseries.models.linear module :undoc-members: :show-inheritance: +timeseries.models.AIT module +------------------------------- + +.. automodule:: timeseries.models.AIT + :members: + :undoc-members: + :show-inheritance: + +timeseries.models.iec module +------------------------------- + +.. automodule:: timeseries.models.iec + :members: + :undoc-members: + :show-inheritance: \ No newline at end of file From 5d320c036e82f70e6913b87f00a2728030eca998 Mon Sep 17 00:00:00 2001 From: klbonne Date: Thu, 3 Nov 2022 11:11:48 -0400 Subject: [PATCH 13/27] Beginning efforts to build docs through GHactions --- .github/workflows/builddocs.yml | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 .github/workflows/builddocs.yml diff --git a/.github/workflows/builddocs.yml b/.github/workflows/builddocs.yml new file mode 100644 index 0000000..51bb454 --- /dev/null +++ b/.github/workflows/builddocs.yml @@ -0,0 +1,31 @@ +# This is a basic workflow to help you get started with Actions + +name: Docs + +# Controls when the workflow will run +on: + # Triggers the workflow on push or pull request events but only for the master branch + push: + branches: [ master ] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# A workflow run is made up of one or more jobs that can run sequentially or in parallel +jobs: + # This workflow contains a single job called "build" + build: + # The type of runner that the job will run on + runs-on: ubuntu-latest + + # Steps represent a sequence of tasks that will be executed as part of the job + steps: + # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + - uses: actions/setup-python@v2 + - run: pip install --upgrade pip && pip install sphinx + - run: git config user.name 'github-actions[bot]' && git config user.email 'github-actions[bot]@users.noreply.github.com' + - name: Publish docs + run: mkdocs gh-deploy \ No newline at end of file From d9408bd28cb9c775ac09e86245576d3b8f4323d7 Mon Sep 17 00:00:00 2001 From: klbonne Date: Fri, 4 Nov 2022 11:02:42 -0400 Subject: [PATCH 14/27] workflows --- .github/workflows/builddocs.yml | 20 +++++++++++++---- .github/workflows/builddocs2.yml | 37 ++++++++++++++++++++++++++++++++ requirements.txt | 14 +++++++++++- 3 files changed, 66 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/builddocs2.yml diff --git a/.github/workflows/builddocs.yml b/.github/workflows/builddocs.yml index 51bb454..83fc248 100644 --- a/.github/workflows/builddocs.yml +++ b/.github/workflows/builddocs.yml @@ -25,7 +25,19 @@ jobs: with: fetch-depth: 0 - uses: actions/setup-python@v2 - - run: pip install --upgrade pip && pip install sphinx - - run: git config user.name 'github-actions[bot]' && git config user.email 'github-actions[bot]@users.noreply.github.com' - - name: Publish docs - run: mkdocs gh-deploy \ No newline at end of file + with: + python-version: '3.8' + - name: Setup Python 3.8 + run: | + python -m pip install --upgrade pip + python -m pip install requirements.txt + pip install --upgrade coverage pytest + - name: Install package + run: | + python -m pip install -e . + - name: Build documentation + run: sphinx-build -b html -d docs/_build/doctrees docs/_build/html + - uses: actions/upload-artifact@v3 + with: + name: html-docs + path: docs/_build/html \ No newline at end of file diff --git a/.github/workflows/builddocs2.yml b/.github/workflows/builddocs2.yml new file mode 100644 index 0000000..88da951 --- /dev/null +++ b/.github/workflows/builddocs2.yml @@ -0,0 +1,37 @@ +# This is a basic workflow to help you get started with Actions + +name: Docs + +# Controls when the workflow will run +on: + # Triggers the workflow on push or pull request events but only for the master branch + push: + branches: [ master ] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + steps: + # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + - uses: actions/setup-python@v2 + with: + python-version: '3.8' + - name: Setup Python 3.8 + run: | + python -m pip install --upgrade pip + python -m pip install requirements.txt + pip install --upgrade coverage pytest + - name: Install package + run: | + python -m pip install -e . + - name: Build documentation + run: sphinx build -b + - run: git config user.name 'github-actions[bot]' && git config user.email 'github-actions[bot]@users.noreply.github.com' + - name: Publish docs + run: mkdocs gh-deploy \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 661ea83..8a4cb56 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +# Required pandas>=0.23.0 numpy>=1.15.0 scipy>=1.2.0 @@ -15,4 +16,15 @@ timezonefinder pyDOE keras>=2.3.0 tensorflow>=2.2.0rc4 -protobuf==3.20.* \ No newline at end of file +protobuf==3.20.* + +# Testing / Docs +pytest +ipykernel +nbsphinx +nbsphinx-link +sphinx-copybutton +nbformat +nbconvert +coverage +sphinx_rtd_theme \ No newline at end of file From 33cf185c956da0c18d5c4f3c29ba0e242f8a1d2f Mon Sep 17 00:00:00 2001 From: klbonne Date: Fri, 4 Nov 2022 11:08:45 -0400 Subject: [PATCH 15/27] linting --- docs/conf.py | 42 +++++++++++++++++++++++++----------------- pvops/iv/extractor.py | 2 +- setup.py | 4 ++-- 3 files changed, 28 insertions(+), 20 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 1c6de69..d908ce7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,8 +22,6 @@ sys.path.insert(0, os.path.abspath("../pvops/iv")) sys.path.insert(0, os.path.abspath("../pvops/iv/models")) - - # -- Project information ----------------------------------------------------- project = "pvops" @@ -40,26 +38,36 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - "sphinx.ext.autodoc", # pull in documentation from docstrings in a semi-automatic way. - "nbsphinx", # nbsphinx is a Sphinx extension that provides a source parser for *.ipynb files - "nbsphinx_link", # A sphinx extension for including notebook files from outside the sphinx source root. - "sphinx_copybutton", # adds copy button to code blocks - "sphinx.ext.coverage", # `make coverage` summarizes what has docstrings - 'sphinx.ext.doctest', # allows for testing of code snippets - 'sphinx.ext.viewcode', # add links to highlighted source code - 'sphinx.ext.napoleon' # add parsing for google/numpy style docs - ] + "sphinx.ext.autodoc", + # pull in documentation from docstrings in a semi-automatic way. + "nbsphinx", + # nbsphinx is a Sphinx extension that provides a source parser + # for *.ipynb files + "nbsphinx_link", + # A sphinx extension for including notebook files from outside + # the sphinx source root. + "sphinx_copybutton", + # adds copy button to code blocks + "sphinx.ext.coverage", + # `make coverage` summarizes what has docstrings + 'sphinx.ext.doctest', + # allows for testing of code snippets + 'sphinx.ext.viewcode', + # add links to highlighted source code + 'sphinx.ext.napoleon' + # add parsing for google/numpy style docs + ] coverage_show_missing_items = True -napoleon_numpy_docstring = True # use numpy style -napoleon_google_docstring = False # not google style -napoleon_use_rtype = False # option for return section formatting +napoleon_numpy_docstring = True # use numpy style +napoleon_google_docstring = False # not google style +napoleon_use_rtype = False # option for return section formatting numpydoc_show_class_members = True numpydoc_show_inherited_class_members = False numpydoc_class_members_toctree = False -napoleon_use_ivar = True # option for attribute section formatting -napoleon_use_param = False # option for parameter section formatting +napoleon_use_ivar = True # option for attribute section formatting +napoleon_use_param = False # option for parameter section formatting # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] @@ -81,4 +89,4 @@ # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] -html_style = 'css/my_style.css' +html_style = 'css/my_style.css' diff --git a/pvops/iv/extractor.py b/pvops/iv/extractor.py index c0f7b52..d79c1ed 100644 --- a/pvops/iv/extractor.py +++ b/pvops/iv/extractor.py @@ -127,7 +127,7 @@ def create_string_object(self, iph, io, rs, rsh, nnsvth): return sim def f_multiple_samples(self, params): - #TODO write docstring + # TODO write docstring iph, io, rs, rsh, nnsvth = params if self.user_func is None: diff --git a/setup.py b/setup.py index a2eba30..f096275 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ from photovoltaic power systems. The library includes functions for processing text data as well as fusion of the text information with time series data for visualization of contextual details for data -analysis. +analysis. Documentation: https://pvops.readthedocs.io/en/latest/index.html @@ -50,7 +50,7 @@ 'pyDOE', 'keras>=2.3.0', 'tensorflow>=2.2.0', - 'protobuf==3.20.*' + 'protobuf==3.20.*' ] DOCS_REQUIRE = [ From 4a3d6f03cf36c9be0c8e7b8e0c1cd2988f961034 Mon Sep 17 00:00:00 2001 From: klbonne Date: Fri, 4 Nov 2022 11:16:34 -0400 Subject: [PATCH 16/27] workflow --- .github/workflows/builddocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/builddocs.yml b/.github/workflows/builddocs.yml index 83fc248..fee8eeb 100644 --- a/.github/workflows/builddocs.yml +++ b/.github/workflows/builddocs.yml @@ -6,7 +6,7 @@ name: Docs on: # Triggers the workflow on push or pull request events but only for the master branch push: - branches: [ master ] + branches: [ master, docstrings ] # Allows you to run this workflow manually from the Actions tab workflow_dispatch: From b8a4dc835f290ebbc1f5e1f884e2d6d5c3b1d507 Mon Sep 17 00:00:00 2001 From: klbonne Date: Fri, 4 Nov 2022 11:17:59 -0400 Subject: [PATCH 17/27] fix --- .github/workflows/builddocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/builddocs.yml b/.github/workflows/builddocs.yml index fee8eeb..5d4c084 100644 --- a/.github/workflows/builddocs.yml +++ b/.github/workflows/builddocs.yml @@ -30,7 +30,7 @@ jobs: - name: Setup Python 3.8 run: | python -m pip install --upgrade pip - python -m pip install requirements.txt + python -m pip install -r requirements.txt pip install --upgrade coverage pytest - name: Install package run: | From ff7264a366d860174f111250259650608ca81bd9 Mon Sep 17 00:00:00 2001 From: klbonne Date: Fri, 4 Nov 2022 11:20:40 -0400 Subject: [PATCH 18/27] workflow --- .github/workflows/builddocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/builddocs.yml b/.github/workflows/builddocs.yml index 5d4c084..983e201 100644 --- a/.github/workflows/builddocs.yml +++ b/.github/workflows/builddocs.yml @@ -1,6 +1,6 @@ # This is a basic workflow to help you get started with Actions -name: Docs +name: docs build experiment # Controls when the workflow will run on: From 6fd9814d9d3ba9e0f4d4a7c4afd9142ad11d10e2 Mon Sep 17 00:00:00 2001 From: klbonne Date: Fri, 4 Nov 2022 11:31:32 -0400 Subject: [PATCH 19/27] action update --- .github/workflows/builddocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/builddocs.yml b/.github/workflows/builddocs.yml index 983e201..94c3d67 100644 --- a/.github/workflows/builddocs.yml +++ b/.github/workflows/builddocs.yml @@ -36,7 +36,7 @@ jobs: run: | python -m pip install -e . - name: Build documentation - run: sphinx-build -b html -d docs/_build/doctrees docs/_build/html + run: sphinx-build -b html docs/ docs/_build/html - uses: actions/upload-artifact@v3 with: name: html-docs From 2c94eaeae8b6beef0f0671aa983adcbdba2a8540 Mon Sep 17 00:00:00 2001 From: klbonne Date: Fri, 4 Nov 2022 11:50:49 -0400 Subject: [PATCH 20/27] adding install pandoc to workflow --- .github/workflows/builddocs.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/builddocs.yml b/.github/workflows/builddocs.yml index 94c3d67..f7a6382 100644 --- a/.github/workflows/builddocs.yml +++ b/.github/workflows/builddocs.yml @@ -24,6 +24,8 @@ jobs: - uses: actions/checkout@v2 with: fetch-depth: 0 + - name: Install pandoc + runs: apt-get update -y && apt-get install -y pandoc - uses: actions/setup-python@v2 with: python-version: '3.8' From a20e84a2547060b972838dbf30ab1f31573086dd Mon Sep 17 00:00:00 2001 From: klbonne Date: Fri, 4 Nov 2022 11:53:08 -0400 Subject: [PATCH 21/27] fixing imports --- pvops/iv/timeseries_simulator.py | 4 ++-- pvops/tests/test_iv.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pvops/iv/timeseries_simulator.py b/pvops/iv/timeseries_simulator.py index 10dcbd2..928fa49 100644 --- a/pvops/iv/timeseries_simulator.py +++ b/pvops/iv/timeseries_simulator.py @@ -1,10 +1,10 @@ import numpy as np import matplotlib.pyplot as plt from datetime import timedelta -import simulator +from pvops.iv.simulator import Simulator -class IVTimeseriesGenerator(simulator.Simulator): +class IVTimeseriesGenerator(Simulator): def __init__(self, **iv_sim_kwargs): """Simulate a PV System across time. diff --git a/pvops/tests/test_iv.py b/pvops/tests/test_iv.py index 2d622db..b5c7f70 100644 --- a/pvops/tests/test_iv.py +++ b/pvops/tests/test_iv.py @@ -1,6 +1,5 @@ import random import os -import sys import pandas as pd import numpy as np import pvops.iv.timeseries_simulator From eeceb4115619bf2d297b43cb04b00920fe934bf5 Mon Sep 17 00:00:00 2001 From: klbonne Date: Fri, 4 Nov 2022 11:57:01 -0400 Subject: [PATCH 22/27] typo --- .github/workflows/builddocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/builddocs.yml b/.github/workflows/builddocs.yml index f7a6382..36c9897 100644 --- a/.github/workflows/builddocs.yml +++ b/.github/workflows/builddocs.yml @@ -25,7 +25,7 @@ jobs: with: fetch-depth: 0 - name: Install pandoc - runs: apt-get update -y && apt-get install -y pandoc + run: apt-get update -y && apt-get install -y pandoc - uses: actions/setup-python@v2 with: python-version: '3.8' From f6023652fd3182360fc6d985f0439d02447ce3dd Mon Sep 17 00:00:00 2001 From: klbonne Date: Fri, 4 Nov 2022 11:59:50 -0400 Subject: [PATCH 23/27] adding sudo --- .github/workflows/builddocs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/builddocs.yml b/.github/workflows/builddocs.yml index 36c9897..2fc3091 100644 --- a/.github/workflows/builddocs.yml +++ b/.github/workflows/builddocs.yml @@ -25,7 +25,7 @@ jobs: with: fetch-depth: 0 - name: Install pandoc - run: apt-get update -y && apt-get install -y pandoc + run: sudo apt-get update -y && sudo apt-get install -y pandoc - uses: actions/setup-python@v2 with: python-version: '3.8' From b65a6d393fcd4e7f0c1ce63abf8b3976060b4f8b Mon Sep 17 00:00:00 2001 From: klbonne Date: Fri, 4 Nov 2022 12:26:56 -0400 Subject: [PATCH 24/27] remove unused action.ymls and fix llinting --- .github/workflows/builddocs2.yml | 37 ----------------------------- .github/workflows/pythonpublish.yml | 25 ------------------- docs/conf.py | 2 +- pvops/iv/extractor.py | 16 +++++++------ pvops/iv/simulator.py | 14 +++++------ pvops/tests/test_iv.py | 8 +++---- pvops/text/nlp_utils.py | 5 ++-- pvops/text/preprocess.py | 4 ++-- pvops/timeseries/models/linear.py | 2 +- pvops/timeseries/preprocess.py | 4 ++-- 10 files changed, 28 insertions(+), 89 deletions(-) delete mode 100644 .github/workflows/builddocs2.yml delete mode 100644 .github/workflows/pythonpublish.yml diff --git a/.github/workflows/builddocs2.yml b/.github/workflows/builddocs2.yml deleted file mode 100644 index 88da951..0000000 --- a/.github/workflows/builddocs2.yml +++ /dev/null @@ -1,37 +0,0 @@ -# This is a basic workflow to help you get started with Actions - -name: Docs - -# Controls when the workflow will run -on: - # Triggers the workflow on push or pull request events but only for the master branch - push: - branches: [ master ] - - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: - -jobs: - build: - runs-on: ubuntu-latest - steps: - # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - - uses: actions/checkout@v2 - with: - fetch-depth: 0 - - uses: actions/setup-python@v2 - with: - python-version: '3.8' - - name: Setup Python 3.8 - run: | - python -m pip install --upgrade pip - python -m pip install requirements.txt - pip install --upgrade coverage pytest - - name: Install package - run: | - python -m pip install -e . - - name: Build documentation - run: sphinx build -b - - run: git config user.name 'github-actions[bot]' && git config user.email 'github-actions[bot]@users.noreply.github.com' - - name: Publish docs - run: mkdocs gh-deploy \ No newline at end of file diff --git a/.github/workflows/pythonpublish.yml b/.github/workflows/pythonpublish.yml deleted file mode 100644 index 951a50b..0000000 --- a/.github/workflows/pythonpublish.yml +++ /dev/null @@ -1,25 +0,0 @@ -name: Upload to PyPi -on: - release: - types: [published] - -jobs: - deploy: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: '3.x' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install setuptools wheel twine - - name: Build and publish - env: - TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} - run: | - python setup.py sdist bdist_wheel - twine upload dist/* diff --git a/docs/conf.py b/docs/conf.py index d908ce7..cccee2c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -56,7 +56,7 @@ # add links to highlighted source code 'sphinx.ext.napoleon' # add parsing for google/numpy style docs - ] + ] coverage_show_missing_items = True diff --git a/pvops/iv/extractor.py b/pvops/iv/extractor.py index d79c1ed..df7962c 100644 --- a/pvops/iv/extractor.py +++ b/pvops/iv/extractor.py @@ -21,9 +21,11 @@ class BruteForceExtractor(): input_df : DataFrame Contains IV curves with a datetime index current_col : string - Indicates column where current values in IV curve are located; each cell is an array of current values in a single IV curve + Indicates column where current values in IV curve are located; + each cell is an array of current values in a single IV curve voltage_col : string - Indicates column where voltage values in IV curve are located; each cell is an array of voltage values in a single IV curve + Indicates column where voltage values in IV curve are located; + each cell is an array of voltage values in a single IV curve irradiance_col : string Indicates column where irradiance value (W/m2) temperature_col : string @@ -75,7 +77,7 @@ def __init__(self, input_df, current_col, voltage_col, irradiance_col, temperatu self.params = {} def create_string_object(self, iph, io, rs, rsh, nnsvth): - #TODO write docstring + # TODO write docstring kwargs = {} if self.Simulator_mod_specs is not None: kwargs.update({'mod_specs': self.Simulator_mod_specs}) @@ -241,11 +243,11 @@ def fit_params(self, cell_parameters, n_mods, bounds_func, user_func=None, verbo Parameters ---------- cell_parameters : dict - Cell-level parameters, usually extracted from the CEC + Cell-level parameters, usually extracted from the CEC database, which will be used as the initial guesses in the optimization process. n_mods : int - if int, defines the number of modules in a + if int, defines the number of modules in a string(1=simulate a single module) bounds_func : function Function to establish the bounded search space @@ -261,9 +263,9 @@ def bounds_func(iph,io,rs,rsh,nnsvth,perc_adjust=0.5): (nnsvth - 10*nnsvth*perc_adjust, nnsvth + 10*nnsvth*perc_adjust)) user_func : function - Optional, a function similar to `self.create_string_object` + Optional, a function similar to `self.create_string_object` which has the following inputs: - `self, iph, io, rs, rsh, nnsvth`. This can be used to + `self, iph, io, rs, rsh, nnsvth`. This can be used to extract unique failure parameterization. verbose : int if verbose >= 1, print information about fitting diff --git a/pvops/iv/simulator.py b/pvops/iv/simulator.py index 4fd0e5f..9184db0 100644 --- a/pvops/iv/simulator.py +++ b/pvops/iv/simulator.py @@ -51,8 +51,6 @@ class Simulator(): # (e.g. Jinko_Solar_Co___Ltd_JKMS260P_60) } - - replacement_5params : dict Optional, replace the definitions of the five electrical parameters, which normally are extracted from the CEC database. These parameters can be determined by @@ -207,16 +205,16 @@ def add_preset_conditions(self, fault_name, fault_condition, save_name=None, **k where left is number in the first column and right is number in last column if pos not specified, the positions are chosen randomly e.g. add_preset_conditions('pole', fault_condition, light_shading = light_fault_condition, width = 2, pos = (5, 56)) - + - 'bird_droppings': Random positions are chosen for bird_dropping simulations - + - Optional specification is n_droppings. If not specified, chosen as random number between 1 and the number of cells in a column e.g. add_preset_conditions('bird_droppings', fault_condition, n_droppings = 3) - + fault_location: dict Same dict as one shown in __init__. - + kwargs: variables dependent on which fault_name you choose, see above Tip @@ -296,13 +294,13 @@ def add_manual_conditions(self, modcell, condition_dict): - 1D list: Give a single situation for this condition - 2D list: Give multiple situations for this condition - A list where each value signifies a cell's condition. - + If key is same as an existing key, the list is appended to list of scenarios \\ which that key owns condition_dict: dict Define the numerical value written in modcell - .. note:: + .. note:: If the variable is not defined, values will default to those specified \\ in the pristine condition, defined in __init__. diff --git a/pvops/tests/test_iv.py b/pvops/tests/test_iv.py index b5c7f70..d27513f 100644 --- a/pvops/tests/test_iv.py +++ b/pvops/tests/test_iv.py @@ -137,10 +137,10 @@ def test_classification(): # Irradiance & Temperature correction, and normalize axes prep_df = pvops.iv.preprocess.preprocess(df, 0.05, iv_col_dict, - resmpl_cutoff=0.03, correct_gt=True, - normalize_y=False, - CECmodule_parameters=sim.module_parameters, - n_mods=12, gt_correct_option=3) + resmpl_cutoff=0.03, correct_gt=True, + normalize_y=False, + CECmodule_parameters=sim.module_parameters, + n_mods=12, gt_correct_option=3) # Shuffle bigdf = prep_df.sample(frac=1).reset_index(drop=True) bigdf.dropna(inplace=True) diff --git a/pvops/text/nlp_utils.py b/pvops/text/nlp_utils.py index 88121d1..87f4167 100644 --- a/pvops/text/nlp_utils.py +++ b/pvops/text/nlp_utils.py @@ -7,6 +7,7 @@ import numpy as np from gensim.models import Word2Vec + class Doc2VecModel(BaseEstimator): """Performs a gensim Doc2Vec transformation of the input documents to create embedded representations of the documents. See gensim's @@ -93,8 +94,7 @@ class DataDensifier(BaseEstimator): """A data structure transformer which converts sparse data to dense data. This process is usually incorporated in this library when doing unsupervised machine learning. This class is built specifically to work inside a sklearn pipeline. - Therefore, it uses the default ``transform``, ``fit``, ``fit_transform`` method structure. - + Therefore, it uses the default ``transform``, ``fit``, ``fit_transform`` method structure. """ def transform(self, X, y=None): @@ -176,6 +176,7 @@ def create_stopwords(lst_langs=["english"], lst_add_words=[], lst_keep_words=[]) lst_stopwords = list(set(lst_stopwords) - set(lst_keep_words)) return sorted(list(set(lst_stopwords))) + def summarize_text_data(om_df, colname): """Display information about a set of documents located in a dataframe, including the number of samples, average number of words, vocabulary size, and number of words diff --git a/pvops/text/preprocess.py b/pvops/text/preprocess.py index 3c7fed0..38646f4 100644 --- a/pvops/text/preprocess.py +++ b/pvops/text/preprocess.py @@ -157,10 +157,10 @@ def get_dates( by first starting at the nearest rows. col_dict : dict of {str : str} A dictionary that contains the column names relevant for the get_dates fn - + - data : string, should be assigned to associated column which stores the text logs - eventstart : string, should be assigned to associated column which stores the log submission datetime - + print_info : bool Flag indicating whether to print information about the preprocessing progress infer_date_surrounding_rows : bool diff --git a/pvops/timeseries/models/linear.py b/pvops/timeseries/models/linear.py index 61c2e59..b44c2a9 100644 --- a/pvops/timeseries/models/linear.py +++ b/pvops/timeseries/models/linear.py @@ -425,7 +425,7 @@ def modeller(prod_col_dict, """Wrapper method to conduct the modelling of the timeseries data. To input the data, there are two options. - + - Option 1: include full production data in `prod_df` parameter and `test_split` so that the test split is conducted - Option 2: conduct the test-train split prior to calling diff --git a/pvops/timeseries/preprocess.py b/pvops/timeseries/preprocess.py index 43abc13..6cd58cd 100644 --- a/pvops/timeseries/preprocess.py +++ b/pvops/timeseries/preprocess.py @@ -82,7 +82,7 @@ def normalize_production_by_capacity(prod_df, - **siteid** (*string*), should be assigned to site-ID column name in prod_df - **capacity_normalized_power** (*string*), should be assigned to a column name where the normalized output signal will be stored - + meta_df: DataFrame A data frame corresponding to site metadata. At the least, the columns in meta_col_dict be present. @@ -92,7 +92,7 @@ def normalize_production_by_capacity(prod_df, - **siteid** (*string*), should be assigned to site-ID column name - **dcsize** (*string*), should be assigned to column name corresponding to site's DC size - + Returns ------- prod_df : DataFrame From a8e08d94a9b58511238168fc3214b243d5b54784 Mon Sep 17 00:00:00 2001 From: klbonne Date: Fri, 4 Nov 2022 13:28:51 -0400 Subject: [PATCH 25/27] final linting fix (hopefully) --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index cccee2c..ca07736 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -56,7 +56,7 @@ # add links to highlighted source code 'sphinx.ext.napoleon' # add parsing for google/numpy style docs - ] +] coverage_show_missing_items = True From ae494b39d689ec23693d98deaaeabd2080585ff3 Mon Sep 17 00:00:00 2001 From: klbonne Date: Fri, 11 Nov 2022 15:10:55 -0500 Subject: [PATCH 26/27] Fixing tensorflow issues by removing version requirements: best to let pip settle things... Fixed deprecation warning in text_text2time --- pvops/tests/test_text2time.py | 6 +++--- requirements-min.txt | 10 ++++------ requirements.txt | 10 ++++------ setup.py | 10 ++++------ 4 files changed, 15 insertions(+), 21 deletions(-) diff --git a/pvops/tests/test_text2time.py b/pvops/tests/test_text2time.py index f151a63..c9eee51 100755 --- a/pvops/tests/test_text2time.py +++ b/pvops/tests/test_text2time.py @@ -56,10 +56,10 @@ # Read data prod_data = pd.read_csv( - example_prodpath, error_bad_lines=False, engine='python') -om_data = pd.read_csv(example_OMpath, error_bad_lines=False, engine='python') + example_prodpath, on_bad_lines='skip', engine='python') +om_data = pd.read_csv(example_OMpath, on_bad_lines='skip', engine='python') metadata = pd.read_csv( - example_metapath, error_bad_lines=False, engine='python') + example_metapath, on_bad_lines='skip', engine='python') def check_same(df1, df2, col): diff --git a/requirements-min.txt b/requirements-min.txt index 661ea83..dd01359 100644 --- a/requirements-min.txt +++ b/requirements-min.txt @@ -1,6 +1,6 @@ -pandas>=0.23.0 -numpy>=1.15.0 -scipy>=1.2.0 +pandas +numpy +scipy scikit-learn nltk datefinder @@ -13,6 +13,4 @@ pvlib pvanalytics timezonefinder pyDOE -keras>=2.3.0 -tensorflow>=2.2.0rc4 -protobuf==3.20.* \ No newline at end of file +tensorflow \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 8a4cb56..b96b56a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ # Required -pandas>=0.23.0 -numpy>=1.15.0 -scipy>=1.2.0 +pandas +numpy +scipy scikit-learn nltk datefinder @@ -14,9 +14,7 @@ pvlib pvanalytics timezonefinder pyDOE -keras>=2.3.0 -tensorflow>=2.2.0rc4 -protobuf==3.20.* +tensorflow # Testing / Docs pytest diff --git a/setup.py b/setup.py index f096275..46ff2c8 100644 --- a/setup.py +++ b/setup.py @@ -33,9 +33,9 @@ ] INSTALL_REQUIRES = [ - 'numpy >= 1.15.0', - 'pandas >= 0.23.0', - 'scipy >= 1.2.0', + 'numpy', + 'pandas', + 'scipy', 'scikit-learn', 'nltk', 'datefinder', @@ -48,9 +48,7 @@ 'pvanalytics', 'timezonefinder', 'pyDOE', - 'keras>=2.3.0', - 'tensorflow>=2.2.0', - 'protobuf==3.20.*' + 'tensorflow', ] DOCS_REQUIRE = [ From 65576a35058c88d43845584384355c6ea9aa3ae2 Mon Sep 17 00:00:00 2001 From: klbonne Date: Fri, 11 Nov 2022 15:30:57 -0500 Subject: [PATCH 27/27] Bringing back pythonpublish.yml (deleted by accident) --- .github/workflows/pythonpublish.yml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 .github/workflows/pythonpublish.yml diff --git a/.github/workflows/pythonpublish.yml b/.github/workflows/pythonpublish.yml new file mode 100644 index 0000000..951a50b --- /dev/null +++ b/.github/workflows/pythonpublish.yml @@ -0,0 +1,25 @@ +name: Upload to PyPi +on: + release: + types: [published] + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + - name: Build and publish + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} + run: | + python setup.py sdist bdist_wheel + twine upload dist/*