From 7405811bbcc43b47f6ff3ab1a402869eed09961a Mon Sep 17 00:00:00 2001 From: Colin Wood Date: Thu, 9 Jan 2025 13:36:43 -0700 Subject: [PATCH] integrate _process_categorical_variables into main function, other small changes --- q2_composition/_ancombc2.py | 18 +++++++++--------- q2_composition/tests/test_ancombc2.py | 7 ++++--- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/q2_composition/_ancombc2.py b/q2_composition/_ancombc2.py index 7684824..1c4a125 100644 --- a/q2_composition/_ancombc2.py +++ b/q2_composition/_ancombc2.py @@ -96,8 +96,7 @@ def ancombc2( verbose=True, ) - # extract data of interest from the returned R list and transform to output - # format + # extract data of interest from the returned R list model_statistics = output[output.names.index('res')] with (ro.default_converter + pandas2ri.converter).context(): model_statistics_df = ro.conversion.get_conversion().rpy2py( @@ -118,7 +117,8 @@ def ancombc2( # rename columns to original names slices = _rename_columns(slices, metadata) - # split categorical variables from levels and append reference where needed + # split categorical variables from levels and annotate references + slices = _process_categorical_variables(slices, metadata) return transform(data=slices, to_type=ANCOMBC2OutputDirFmt) @@ -498,7 +498,7 @@ def _split_into_slices(model_statistics: pd.DataFrame) -> ANCOMBC2SliceMapping: # remove slice prefix from column names where present slice_df = slice_df.rename( - lambda name: name.lstrip(f'{slice_name}_'), axis='columns' + lambda name: name.removeprefix(f'{slice_name}_'), axis='columns' ) slices[slice_name] = slice_df @@ -536,7 +536,7 @@ def _rename_columns( for slice_column in slice_df.columns: for r_name, name in r_names.items(): if slice_column.startswith(r_name): - renamed = name + slice_column.lstrip(r_name) + renamed = name + slice_column.removeprefix(r_name) slice_df.rename( {slice_column: renamed}, axis='columns', inplace=True ) @@ -590,8 +590,8 @@ def _process_categorical_variables( ) for slice_df in slices.values(): for column in slice_df.columns: - if '::' in column: - variable, _ = column.split('::') + if _is_categorical(column, metadata): + variable, _ = _parse_variable_and_level(column, metadata) reference_level = reference_levels[variable] slice_df[column].attrs['reference'] = reference_level @@ -622,8 +622,8 @@ def _is_categorical(column: str, metadata: qiime2.Metadata) -> bool: metadata.get_column(md_column), CategoricalMetadataColumn ): return True - else: - return False + + return False return False diff --git a/q2_composition/tests/test_ancombc2.py b/q2_composition/tests/test_ancombc2.py index 6143789..8e93463 100644 --- a/q2_composition/tests/test_ancombc2.py +++ b/q2_composition/tests/test_ancombc2.py @@ -65,7 +65,10 @@ def test_wrapped_ancombc2(self): using the moving pictures tutorial data. Note: the `_rename_columns` function is patched so that column names - are shared between the R output and the wrapper's output. + are shared between the R output and the wrapper's output. Note also + that `_process_categorical_variables` does nothing in this case because + columns have not been renamed and are thus not detected as categorical + in the metadata. These methods are tested elsewhere. ''' model_stats_fp = self.test_data_fp / 'r-model-statistics.tsv' ground_truth_model_stats = pd.read_csv(model_stats_fp, sep='\t') @@ -89,8 +92,6 @@ def test_wrapped_ancombc2(self): struc_zeros = output_format.structural_zeros.view(pd.DataFrame) - print(ground_truth_model_stats.columns.difference(model_stats.columns)) - print(model_stats.columns.difference(ground_truth_model_stats.columns)) assert_frame_equal(ground_truth_model_stats, model_stats) assert_frame_equal(ground_truth_struc_zeros, struc_zeros)