Skip to content

Commit

Permalink
integrate _process_categorical_variables into main function, other sm…
Browse files Browse the repository at this point in the history
…all changes
  • Loading branch information
colinvwood committed Jan 9, 2025
1 parent 7c5fb0a commit 7405811
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 12 deletions.
18 changes: 9 additions & 9 deletions q2_composition/_ancombc2.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,7 @@ def ancombc2(
verbose=True,
)

# extract data of interest from the returned R list and transform to output
# format
# extract data of interest from the returned R list
model_statistics = output[output.names.index('res')]
with (ro.default_converter + pandas2ri.converter).context():
model_statistics_df = ro.conversion.get_conversion().rpy2py(
Expand All @@ -118,7 +117,8 @@ def ancombc2(
# rename columns to original names
slices = _rename_columns(slices, metadata)

# split categorical variables from levels and append reference where needed
# split categorical variables from levels and annotate references
slices = _process_categorical_variables(slices, metadata)

return transform(data=slices, to_type=ANCOMBC2OutputDirFmt)

Expand Down Expand Up @@ -498,7 +498,7 @@ def _split_into_slices(model_statistics: pd.DataFrame) -> ANCOMBC2SliceMapping:

# remove slice prefix from column names where present
slice_df = slice_df.rename(
lambda name: name.lstrip(f'{slice_name}_'), axis='columns'
lambda name: name.removeprefix(f'{slice_name}_'), axis='columns'
)

slices[slice_name] = slice_df
Expand Down Expand Up @@ -536,7 +536,7 @@ def _rename_columns(
for slice_column in slice_df.columns:
for r_name, name in r_names.items():
if slice_column.startswith(r_name):
renamed = name + slice_column.lstrip(r_name)
renamed = name + slice_column.removeprefix(r_name)
slice_df.rename(
{slice_column: renamed}, axis='columns', inplace=True
)
Expand Down Expand Up @@ -590,8 +590,8 @@ def _process_categorical_variables(
)
for slice_df in slices.values():
for column in slice_df.columns:
if '::' in column:
variable, _ = column.split('::')
if _is_categorical(column, metadata):
variable, _ = _parse_variable_and_level(column, metadata)
reference_level = reference_levels[variable]
slice_df[column].attrs['reference'] = reference_level

Expand Down Expand Up @@ -622,8 +622,8 @@ def _is_categorical(column: str, metadata: qiime2.Metadata) -> bool:
metadata.get_column(md_column), CategoricalMetadataColumn
):
return True
else:
return False

return False

return False

Expand Down
7 changes: 4 additions & 3 deletions q2_composition/tests/test_ancombc2.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,10 @@ def test_wrapped_ancombc2(self):
using the moving pictures tutorial data.
Note: the `_rename_columns` function is patched so that column names
are shared between the R output and the wrapper's output.
are shared between the R output and the wrapper's output. Note also
that `_process_categorical_variables` does nothing in this case because
columns have not been renamed and are thus not detected as categorical
in the metadata. These methods are tested elsewhere.
'''
model_stats_fp = self.test_data_fp / 'r-model-statistics.tsv'
ground_truth_model_stats = pd.read_csv(model_stats_fp, sep='\t')
Expand All @@ -89,8 +92,6 @@ def test_wrapped_ancombc2(self):

struc_zeros = output_format.structural_zeros.view(pd.DataFrame)

print(ground_truth_model_stats.columns.difference(model_stats.columns))
print(model_stats.columns.difference(ground_truth_model_stats.columns))
assert_frame_equal(ground_truth_model_stats, model_stats)
assert_frame_equal(ground_truth_struc_zeros, struc_zeros)

Expand Down

0 comments on commit 7405811

Please sign in to comment.