-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathio.py
484 lines (419 loc) · 23.5 KB
/
io.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
# -*- coding: utf-8 -*-
"""io.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1PI5VL7o_z4X9TLPTv9UNY8FtjkiUqpTp
"""
!pip install pandas matplotlib seaborn networkx shap scikit-learn plotly scipy
# -*- coding: utf-8 -*-
"""Targeted Anxiety Intervention Analysis with Subgroup Discovery
This notebook enhances the MoE framework to incorporate subgroup discovery
techniques. It aims to identify specific subgroups within intervention groups
that show particularly strong or weak responses to the intervention. This
allows for a more targeted analysis of intervention effectiveness and
personalized insights.
Workflow:
1. Data Loading and Validation: Load synthetic anxiety intervention data, validate its structure, content, and data types. Handle potential errors gracefully.
2. Data Preprocessing: One-hot encode the group column and scale numerical features.
3. Subgroup Discovery: Implement a flexible subgroup discovery method to identify response-based subgroups.
4. SHAP Value Analysis: Quantify feature importance within discovered subgroups.
5. Data Visualization: Generate KDE, Violin, Parallel Coordinates, and Hypergraph plots, highlighting subgroups.
6. Statistical Summary: Perform bootstrap analysis and generate summary statistics for subgroups.
7. LLM Insights Report: Synthesize findings using Grok, Claude, and Grok-Enhanced, emphasizing subgroup-specific insights, validating LLM outputs, and handling potential LLM API errors.
Keywords: Subgroup Discovery, Targeted Analysis, Personalized Intervention, Anxiety, LLMs, SHAP, Data Visualization, Machine Learning
"""
# Suppress warnings (with caution - better to handle specific warnings)
import warnings
import logging # Use logging for more informative error/warning messages
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning, module="plotly")
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
import shap
import os
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
import numpy as np
from io import StringIO
import plotly.express as px
from scipy.stats import bootstrap
from matplotlib.colors import LinearSegmentedColormap
# --- Setup Logging ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
# Google Colab environment check
try:
from google.colab import drive
drive.mount("/content/drive")
COLAB_ENV = True
except ImportError:
COLAB_ENV = False
logging.info("Not running in Google Colab environment.")
# Constants
OUTPUT_PATH = "./output_anxiety_subgroup_discovery/" if not COLAB_ENV else "/content/drive/MyDrive/output_anxiety_subgroup_discovery/"
PARTICIPANT_ID_COLUMN = "participant_id"
GROUP_COLUMN = "group" # Keep this for the initial loading and validation
ANXIETY_PRE_COLUMN = "anxiety_pre"
ANXIETY_POST_COLUMN = "anxiety_post"
MODEL_GROK_NAME = "grok-base"
MODEL_CLAUDE_NAME = "claude-3.7-sonnet"
MODEL_GROK_ENHANCED_NAME = "grok-enhanced"
LINE_WIDTH = 2.5
BOOTSTRAP_RESAMPLES = 500
# Placeholder API Keys (Security Warning)
GROK_API_KEY = "YOUR_GROK_API_KEY" # Placeholder
CLAUDE_API_KEY = "YOUR_CLAUDE_API_KEY" # Placeholder
# --- Functions ---
def create_output_directory(path):
"""Creates the output directory if it doesn't exist, handling errors."""
try:
os.makedirs(path, exist_ok=True)
logging.info(f"Output directory created/exists: {path}")
return True
except OSError as e:
logging.error(f"Failed to create output directory: {path}. Error: {e}")
return False
def load_data_from_synthetic_string(csv_string):
"""Loads data from a synthetic CSV string, handling errors."""
try:
csv_file = StringIO(csv_string)
df = pd.read_csv(csv_file)
logging.info(f"Data loaded successfully. First 5 rows:\n{df.head()}")
logging.info(f"Data types:\n{df.dtypes}")
return df
except pd.errors.ParserError as e:
logging.error(f"Error parsing CSV data: {e}")
return None
except Exception as e:
logging.error(f"Error loading data: {e}")
return None
def validate_dataframe(df, required_columns):
"""Validates the DataFrame against required columns and data types, handling errors."""
if df is None:
logging.error("DataFrame is None. Cannot validate.")
return False
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
logging.error(f"Missing columns: {missing_columns}")
return False
for col in required_columns:
if col != PARTICIPANT_ID_COLUMN and col != GROUP_COLUMN:
if not pd.api.types.is_numeric_dtype(df[col]):
logging.error(f"Non-numeric values found in column: {col}")
return False
if df[PARTICIPANT_ID_COLUMN].duplicated().any():
logging.error("Duplicate participant IDs found.")
return False
valid_groups = ["Group A", "Group B", "Control"]
if not df[GROUP_COLUMN].isin(valid_groups).all():
logging.error(f"Invalid group labels found. Must be one of: {valid_groups}")
return False
for col in [ANXIETY_PRE_COLUMN, ANXIETY_POST_COLUMN]:
if df[col].min() < 0 or df[col].max() > 10:
logging.error(f"Anxiety scores in column '{col}' are out of range (0-10).")
return False
logging.info("DataFrame validation successful.")
return True
def analyze_text_with_llm(text, model_name): # Placeholder LLM analysis
"""Placeholder for LLM analysis. Replace with actual API calls."""
text_lower = text.lower()
logging.info(f"Calling {model_name} with text: {text[:50]}...") # Log first 50 chars
if model_name == MODEL_GROK_NAME:
if "subgroup analysis" in text_lower:
return "Grok-base: Subgroup analysis reveals varied responses to the intervention, with some subgroups showing significant improvement while others show minimal change."
elif "shap summary" in text_lower:
return "Grok-base: SHAP values highlight feature importance across subgroups, indicating that pre-anxiety is a strong predictor of post-anxiety in all subgroups, but group membership has varying effects."
else:
return f"Grok-base: General analysis on '{text}'."
elif model_name == MODEL_CLAUDE_NAME:
if "subgroup analysis" in text_lower:
return "Claude 3.7: Subgroup discovery shows distinct patterns of response to the intervention, identifying groups with strong, weak, and typical responses based on pre- and post-anxiety levels."
elif "violin plot" in text_lower:
return "Claude 3.7: Violin plots detail subgroup distributions, clearly showing the differences in anxiety levels and variability between the identified subgroups."
else:
return f"Claude 3.7: Enhanced subgroup analysis on '{text}'."
elif model_name == MODEL_GROK_ENHANCED_NAME:
if "subgroup analysis" in text_lower:
return "Grok-Enhanced: Subgroup analysis provides nuanced insights into targeted interventions, revealing specific characteristics of participants who respond differently to the intervention."
elif "parallel coordinates" in text_lower:
return "Grok-Enhanced: Parallel coordinates visualize subgroup-specific trajectories, showing how individual participants within each subgroup change from pre- to post-intervention anxiety levels."
else:
return f"Grok-Enhanced: In-depth subgroup-focused analysis on '{text}'."
return f"Model '{model_name}' not supported."
def scale_data(df, columns):
"""Scales specified columns of the DataFrame using MinMaxScaler, handling errors."""
try:
scaler = MinMaxScaler()
df[columns] = scaler.fit_transform(df[columns])
logging.info(f"Data scaled successfully. Description:\n{df[columns].describe()}")
return df
except Exception as e:
logging.error(f"Error scaling data: {e}")
return None # Return None on error
def discover_subgroups(df, encoded_group_cols, output_path):
"""Identifies subgroups based on intervention response, handling errors.
Args:
df: DataFrame with one-hot encoded group columns.
encoded_group_cols: List of the one-hot encoded group column names.
output_path: Path for output (not used here, but good practice).
Returns:
DataFrame with 'response_level' column, and subgroup description. Returns
(None, error_message) on failure.
"""
try:
df['response_level'] = 'typical' # Default response level
# Construct conditions using the encoded columns
for group_col in encoded_group_cols:
if 'Group A' in group_col: # Check if this encoded column represents Group A
# Strong responders in Group A: post-anxiety is less than the *overall* mean of pre-anxiety
df.loc[(df[group_col] == 1) & (df[ANXIETY_POST_COLUMN] < df[ANXIETY_PRE_COLUMN].mean()), 'response_level'] = 'strong'
elif 'Group B' in group_col: # Check if this encoded column represents Group B
# Weak responders in Group B: post-anxiety is *greater* than the *overall* mean of pre-anxiety
df.loc[(df[group_col] == 1) & (df[ANXIETY_POST_COLUMN] > df[ANXIETY_PRE_COLUMN].mean()), 'response_level'] = 'weak'
subgroup_desc = (
"Subgroups identified based on response to intervention:\n"
"- Strong Responders (Group A, anxiety_post < mean(anxiety_pre)):\n"
" Participants in Group A showing a strong decrease in post-intervention anxiety.\n"
"- Weak Responders (Group B, anxiety_post > mean(anxiety_pre)):\n"
" Participants in Group B showing a weak or no decrease in post-intervention anxiety.\n"
"- Typical Responders: Participants not classified as strong or weak responders.\n"
)
logging.info(f"Subgroup Discovery Placeholder Output:\n{subgroup_desc}")
logging.info(f"Response level value counts:\n{df['response_level'].value_counts()}")
return df, subgroup_desc
except Exception as e:
logging.error(f"Error during subgroup discovery: {e}")
return None, str(e)
def calculate_shap_values(df, feature_columns, target_column, output_path):
"""Calculates SHAP values using a RandomForestRegressor, handling errors."""
try:
model_rf = RandomForestRegressor(random_state=42).fit(df[feature_columns], df[target_column]) # Added random_state
explainer = shap.TreeExplainer(model_rf)
shap_values = explainer.shap_values(df[feature_columns])
plt.figure(figsize=(10, 8))
plt.style.use('dark_background')
shap.summary_plot(shap_values, df[feature_columns], show=False, color_bar=True)
plt.savefig(os.path.join(output_path, 'shap_summary.png'))
plt.close()
logging.info(f"SHAP summary plot saved to {output_path}")
return f"SHAP summary for features {feature_columns} predicting {target_column}"
except Exception as e:
logging.error(f"Error calculating SHAP values: {e}")
return "Error calculating SHAP values."
def create_kde_plot(df, column1, column2, output_path, colors):
"""Creates a KDE plot of two columns, handling errors."""
try:
plt.figure(figsize=(10, 6))
plt.style.use('dark_background')
sns.kdeplot(
data=df[column1], color=colors[0], label=column1.capitalize(), linewidth=LINE_WIDTH
)
sns.kdeplot(
data=df[column2], color=colors[1], label=column2.capitalize(), linewidth=LINE_WIDTH
)
plt.title("KDE Plot of Anxiety Levels", fontsize=16, color="white")
plt.legend(facecolor="black", edgecolor="white", labelcolor="white")
plt.grid(alpha=0.2, linestyle='--')
plt.tight_layout()
plt.savefig(os.path.join(output_path, "kde_plot.png"))
plt.close()
return f"KDE plot visualizing distributions of {column1} and {column2}"
except Exception as e:
logging.error(f"Error creating KDE plot: {e}")
return "Error creating KDE plot."
def create_violin_plot(df, group_column, y_column, output_path, colors):
"""Creates a violin plot, handling errors."""
try:
plt.figure(figsize=(10, 6))
plt.style.use('dark_background')
sns.violinplot(data=df, x=group_column, y=y_column, palette=colors, linewidth=LINE_WIDTH)
plt.title('Violin Plot of Anxiety Distribution by Group', color='white')
plt.savefig(os.path.join(output_path, 'violin_plot.png'))
plt.close()
logging.info(f"Violin plot saved to {output_path}")
return f"Violin plot showing {y_column} across {group_column}"
except Exception as e:
logging.error(f"Error creating violin plot: {e}")
return "Error creating violin plot."
def create_parallel_coordinates_plot(df, group_column, anxiety_pre_column, anxiety_post_column, output_path, colors):
"""Creates a parallel coordinates plot, handling errors."""
try:
plot_df = df[[group_column, anxiety_pre_column, anxiety_post_column, 'response_level']].copy()
unique_groups = plot_df['response_level'].unique()
group_color_map = {group: colors[i % len(colors)] for i, group in enumerate(unique_groups)}
plot_df['color'] = plot_df['response_level'].map(group_color_map)
fig = px.parallel_coordinates(
plot_df,
color='color',
dimensions=[anxiety_pre_column, anxiety_post_column],
title="Anxiety Levels: Pre- vs Post-Intervention by Response Subgroup",
color_continuous_scale=px.colors.sequential.Viridis
)
fig.update_layout(plot_bgcolor='black', paper_bgcolor='black', font_color='white', title_font_size=16)
fig.write_image(os.path.join(output_path, 'parallel_coordinates_plot_subgroups.png'))
logging.info(f"Parallel coordinates plot saved to {output_path}")
return "Parallel coordinates plot of anxiety pre vs post intervention by response subgroup"
except Exception as e:
logging.error(f"Error creating parallel coordinates plot: {e}")
return "Error creating parallel coordinates plot."
def visualize_hypergraph(df, anxiety_pre_column, anxiety_post_column, output_path, colors):
"""Visualizes a hypergraph, handling errors."""
try:
G = nx.Graph()
participant_ids = df[PARTICIPANT_ID_COLUMN].tolist()
G.add_nodes_from(participant_ids, bipartite=0)
feature_sets = {
"anxiety_pre": df[PARTICIPANT_ID_COLUMN][df[anxiety_pre_column] > df[anxiety_pre_column].mean()].tolist(),
"anxiety_post": df[PARTICIPANT_ID_COLUMN][df[anxiety_post_column] > df[anxiety_post_column].mean()].tolist(),
"strong_response": df[PARTICIPANT_ID_COLUMN][df['response_level'] == 'strong'].tolist()
}
feature_nodes = list(feature_sets.keys())
G.add_nodes_from(feature_nodes, bipartite=1)
for feature, participants in feature_sets.items():
for participant in participants:
G.add_edge(participant, feature)
pos = nx.bipartite_layout(G, participant_ids)
color_map = [colors[0] if node in participant_ids else colors[1] for node in G]
plt.figure(figsize=(12, 10))
plt.style.use('dark_background')
nx.draw(G, pos, with_labels=True, node_color=color_map, font_color="white", edge_color="gray",
width=LINE_WIDTH, node_size=700, font_size=10)
plt.title("Hypergraph Representation of Anxiety Patterns with Subgroups", color="white")
plt.savefig(os.path.join(output_path, "hypergraph_subgroups.png"))
plt.close()
logging.info(f"Hypergraph saved to {output_path}")
return "Hypergraph visualizing participant relationships, highlighting response subgroups"
except Exception as e:
logging.error(f"Error creating hypergraph: {e}")
return "Error creating hypergraph."
def perform_bootstrap(data, statistic, n_resamples=BOOTSTRAP_RESAMPLES):
"""Performs bootstrap analysis, handling errors."""
try:
bootstrap_result = bootstrap((data,), statistic, n_resamples=n_resamples, method='percentile', random_state=42) # Added random_state
logging.info(f"Bootstrap CI: {bootstrap_result.confidence_interval}")
return bootstrap_result.confidence_interval
except Exception as e:
logging.error(f"Error performing bootstrap: {e}")
return None
def save_summary(df, bootstrap_ci, output_path):
"""Saves summary statistics, handling errors."""
try:
summary_text = (
df.describe().to_string() +
f"\nBootstrap CI for anxiety_post mean (all participants): {bootstrap_ci}\n\n"
f"Summary by Response Subgroup:\n"
f"{df.groupby('response_level')[[ANXIETY_PRE_COLUMN, ANXIETY_POST_COLUMN]].describe().to_string()}"
)
with open(os.path.join(output_path, 'summary.txt'), 'w') as f:
f.write(summary_text)
logging.info(f"Summary statistics saved to {output_path}")
return summary_text
except Exception as e:
logging.error(f"Error saving summary: {e}")
return "Error saving summary."
def generate_insights_report(summary_stats_text, subgroup_desc, shap_analysis_info, kde_plot_desc, violin_plot_desc, parallel_coords_desc, hypergraph_desc, output_path):
"""Generates a combined insights report using (simulated) LLM calls."""
try:
grok_insights = (
analyze_text_with_llm(f"Analyze summary statistics including subgroup analysis:\n{summary_stats_text}",
MODEL_GROK_NAME) + "\n\n" +
analyze_text_with_llm(f"Interpret SHAP summary for subgroups: {shap_analysis_info}", MODEL_GROK_NAME) + "\n\n" +
analyze_text_with_llm(f"Describe the identified subgroups: {subgroup_desc}", MODEL_GROK_NAME) + "\n\n"
)
claude_insights = (
analyze_text_with_llm(f"Interpret KDE plot for subgroups: {kde_plot_desc}", MODEL_CLAUDE_NAME) + "\n\n" +
analyze_text_with_llm(f"Interpret Violin plot for subgroups: {violin_plot_desc}",
MODEL_CLAUDE_NAME) + "\n\n" +
analyze_text_with_llm(f"Interpret Parallel Coordinates Plot for subgroups: {parallel_coords_desc}",
MODEL_CLAUDE_NAME) + "\n\n" +
analyze_text_with_llm(f"Interpret Hypergraph highlighting subgroups: {hypergraph_desc}",
MODEL_CLAUDE_NAME) + "\n\n"
)
grok_enhanced_insights = analyze_text_with_llm(
f"Provide enhanced insights on anxiety intervention effectiveness based on subgroup analysis, SHAP, and Parallel Coordinates, focusing on differences between subgroups.",
MODEL_GROK_ENHANCED_NAME)
combined_insights = f"""
Combined Insights Report: Anxiety Intervention Analysis with Subgroup Discovery
Grok-base Analysis:
{grok_insights}
Claude 3.7 Sonnet Analysis:
{claude_insights}
Grok-Enhanced Analysis (Subgroup Focused):
{grok_enhanced_insights}
Synthesized Summary:
This report synthesizes insights from Grok-base, Claude 3.7 Sonnet, and Grok-Enhanced, focusing on subgroup discovery to refine the analysis of anxiety intervention effectiveness. Grok-base provides a statistical overview, initial subgroup interpretations, and feature importances across subgroups, noting the strong influence of pre-anxiety. Claude 3.7 Sonnet details visual patterns and distributions, highlighting subgroup-specific variations and the shift towards lower anxiety in the 'strong responders' subgroup. Grok-Enhanced, with a focus on subgroups, delivers nuanced interpretations and actionable recommendations tailored to different response patterns, revealing specific characteristics of participants. The combined expert analyses, enhanced by subgroup discovery, provide a targeted and personalized understanding of the anxiety intervention, enabling tailored strategies for different responder profiles. The identified subgroups ('strong responders', 'weak responders', and 'typical responders') show distinct patterns in their response to the intervention.
"""
with open(os.path.join(output_path, 'insights.txt'), 'w') as f:
f.write(combined_insights)
logging.info(f"Insights report saved to {output_path}")
return "Insights report generated successfully."
except Exception as e:
logging.error(f"Error generating insights report: {e}")
return "Error generating insights report."
# --- Main Script ---
if __name__ == "__main__":
# Create output directory
if not create_output_directory(OUTPUT_PATH):
exit()
# Synthetic dataset (small, embedded in code)
synthetic_dataset = """
participant_id,group,anxiety_pre,anxiety_post
P001,Group A,4,2
P002,Group A,3,1
P003,Group A,5,3
P004,Group B,6,5
P005,Group B,5,4
P006,Group B,7,6
P007,Control,3,3
P008,Control,4,4
P009,Control,2,2
P010,Control,5,5
"""
# Load and validate data
df = load_data_from_synthetic_string(synthetic_dataset)
if df is None:
exit()
required_columns = [PARTICIPANT_ID_COLUMN, GROUP_COLUMN, ANXIETY_PRE_COLUMN, ANXIETY_POST_COLUMN]
if not validate_dataframe(df, required_columns):
exit()
# Keep a copy of the original dataframe for visualizations
df_original = df.copy()
# One-hot encode 'group' *before* subgroup discovery and scaling
df = pd.get_dummies(df, columns=[GROUP_COLUMN], prefix=GROUP_COLUMN, drop_first=False) # One-hot encode, keep all groups
encoded_group_cols = [col for col in df.columns if col.startswith(f"{GROUP_COLUMN}_")]
# Scale data
df = scale_data(df, [ANXIETY_PRE_COLUMN, ANXIETY_POST_COLUMN] + encoded_group_cols)
if df is None:
exit()
# Subgroup Discovery (using the encoded group columns)
df, subgroup_desc = discover_subgroups(df, encoded_group_cols, OUTPUT_PATH)
if df is None:
exit()
# SHAP analysis (using the encoded group columns)
shap_feature_columns = encoded_group_cols + [ANXIETY_PRE_COLUMN]
shap_analysis_info = calculate_shap_values(df.copy(), shap_feature_columns, ANXIETY_POST_COLUMN,
OUTPUT_PATH)
# Visualization colors
neon_colors = ["#FF00FF", "#00FFFF", "#FFFF00", "#00FF00"]
# Create visualizations (using df_original for plots that need original group labels)
kde_plot_desc = create_kde_plot(
df, ANXIETY_PRE_COLUMN, ANXIETY_POST_COLUMN, OUTPUT_PATH, neon_colors[:2]
) # Use scaled, encoded df
violin_plot_desc = create_violin_plot(
df, 'response_level', ANXIETY_POST_COLUMN, OUTPUT_PATH, neon_colors
) # Use the new 'response_level' column
parallel_coords_desc = create_parallel_coordinates_plot(
df, 'response_level', ANXIETY_PRE_COLUMN, ANXIETY_POST_COLUMN, OUTPUT_PATH, neon_colors
) # Use 'response_level'
hypergraph_desc = visualize_hypergraph(
df, ANXIETY_PRE_COLUMN, ANXIETY_POST_COLUMN, OUTPUT_PATH, neon_colors[:2]
) # Use the modified df
# Bootstrap analysis
bootstrap_ci = perform_bootstrap(df[ANXIETY_POST_COLUMN], np.mean)
# Save summary statistics
summary_stats_text = save_summary(df, bootstrap_ci, OUTPUT_PATH)
# Generate insights report
generate_insights_report(summary_stats_text, subgroup_desc, shap_analysis_info, kde_plot_desc, violin_plot_desc, parallel_coords_desc, hypergraph_desc, OUTPUT_PATH)
print("Execution completed successfully - Subgroup Discovery Enhanced Notebook.")