Merge pull request #474 from cameron-a-johnson/dev/fixes-to-gsp

Purg · web-flow · commit 6c25358fde5e · 2024-11-21T20:26:23.000-05:00
Functionality restored for GSP training.
diff --git a/TRAIN_AND_RUN_README.md b/TRAIN_AND_RUN_README.md
@@ -338,6 +338,75 @@ train_command \
   task_name=my_m2_training
 ```
 
+## The Global Step Predictor (GSP)
+### How the GSP relates to the TCN Activity Classifier
+
+The above TCN activity classifier in its current configuration takes in a second or 
+two of video artifacts (e.g. for the "Locs&Confs" version, pose joint pixel coordinates 
+and confidences, the user's hand detection locations and confidences, and other procedure-relevant 
+object pixel coordinates and confidences), and outputs confidences for each of a vector 
+of activities (examples: "labels" in config/activity_labels/medical), assuming up to 
+one activity is ocurring "presently."
+
+Now, the GSP takes as input the confidence vector per frame window, and keeps track over time of which
+activities ("steps" in the GSP context) have occurred, and which step a user is on.
+
+Basically, if the "next step" at any point has been activated long enough, and with enough confidence, 
+the GSP progresses to that step as the latest "completed" step.
+
+Assumptions:
+- One activity or "background" (none of the listed activities) happens at a time.
+- The activities must happen in a specific linear order.
+- So, if an activity is detected with strong confidence way out of order (e.g. we're on step 3
+and we detect step 8), the GSP does not mark step 8 as completed.
+- A single "skipped step" is possible given some criteria. Skipping one step can be allowed unconditionally.
+Skipping one step can also be allowed if the "skipped step" has been activated with some "easier" criteria
+(a lower confidence threshold and/or fewer frames above that threshold). We can also configure to skip one step
+simply given that a threshold number of frames have passed since we completed the most recent step.
+
+### Training the GSP
+
+To "train" the GSP, we simply compute the average true positive output scores per class- that is, the 
+average confidence of each TCN activity classification in its output vector,
+only when ground truth states that activity is happening. This includes the background class.
+
+To do this, we must run inference on videos for which the TCN has never seen ground truth (and are hopefully 
+quite independent from the training videos). The validation or test splits of your dataset may suffice.
+*Note:* If you have run training, test set prediction outputs should have been produced, in a 
+file named `tcn_activity_predictions.kwcoco.json`.
+
+If you don't have that file, he TCN's training harness can be run with `train=false` to only run 
+inference and save the test data's output in the needed KWCOCO output format. Example:
+
+```
+python train_command \
+    experiment=r18/feat_locsconfs \
+    paths.root_dir=/path/to/my/data/splits/ \ # See above TCN docs for training data structure
+    task_name=r18_my_TCN_i_just_trained \
+    train=false \
+    ckpt_path=model_files/activity_classifier/r18_tcn.ckpt \
+```
+
+This should create a new predictions output file, e.g. `tcn_activity_predictions.kwcoco.json`. 
+
+Then, for each class, we filter the video outputs by those which the ground truth indicates that class
+activity is occurring. Then we simply average the TCN output for that activity, for those frames.
+
+Given the test dataset split ground truth you just gave as input to the `train_command`, and the predictions
+output file your `train_command` produced, create the average TP activation numpy file as follows:
+
+```
+python angel_system/global_step_prediction/run_expirement.py r18 \
+     path/to/TEST-activity_truth.coco.json \
+     path/to/tcn_activity_predictions.kwcoco.json \
+     path/to/tcn_activity_predictions.kwcoco.json
+
+```
+
+That numpy file then can be provisioned to load to the default GSP `model_files` filepath, e.g. in the case
+of the R18 task, `model_files/task_monitor/global_step_predictor_act_avgs_r18.npy`.
+
+
 ## Docker local testing
 
 ***to start the service run:***
diff --git a/angel_system/global_step_prediction/global_step_predictor.py b/angel_system/global_step_prediction/global_step_predictor.py
@@ -143,20 +143,21 @@ def get_activity_order_from_config(self, config_fn):
 
     def compute_average_TP_activations(
         self,
-        coco_train: kwcoco.CocoDataset,
+        coco_preds: kwcoco.CocoDataset,
+        coco_truth: kwcoco.CocoDataset,
     ):
         # For each activity, given the Ground Truth-specified
         # frame subset where that activity is happening, get the
         # average activation of that class.
 
-        all_activity_ids = coco_train.categories().get("id")
+        all_activity_ids = coco_preds.categories().get("id")
         print(f"all_activity_ids: {all_activity_ids}")
 
         # create a mapping of all the annots ids to image ids in the training set
-        tr_aid_to_gid = coco_train.annots().get("image_id", keepid=True)
+        tr_aid_to_gid = coco_preds.annots().get("image_id", keepid=True)
         print(f"training set annotations: {len(tr_aid_to_gid)}")
 
-        all_vid_ids = coco_train.videos().get("id")
+        all_vid_ids = coco_preds.videos().get("id")
         print(
             f"Computing average true positive activations for {len(all_vid_ids)} video(s)."
         )
@@ -165,10 +166,18 @@ def compute_average_TP_activations(
         avg_probs = np.zeros(max(all_activity_ids) + 1)
 
         for activity_id in all_activity_ids:
-            sub_dset = coco_train.subset(gids=tr_aid_to_gid.keys(), copy=True)
-            probs_for_true_inds = np.asarray(sub_dset.annots().get("prob"))[
-                :, activity_id
-            ]
+            probs_for_true_inds = np.array([])
+
+            for ann_id in coco_truth.index.anns:
+                ann = coco_truth.index.anns[ann_id]
+                if ann["category_id"] == activity_id:
+                    referenced_img_id = ann["image_id"]
+                    pred_ann = coco_preds.annots(image_id=referenced_img_id).objs
+                    assert len(pred_ann) == 1
+                    pred_ann = pred_ann[0]
+                    TP_prob = pred_ann["prob"][activity_id]
+                    probs_for_true_inds = np.append(probs_for_true_inds, TP_prob)
+
             avg_prob = np.mean(probs_for_true_inds)
             avg_probs[activity_id] = avg_prob
 
diff --git a/angel_system/global_step_prediction/run_expirement.py b/angel_system/global_step_prediction/run_expirement.py
@@ -53,7 +53,9 @@ def run_inference_all_vids(
         if avg_probs is not None:
             step_predictor.get_average_TP_activations_from_array(avg_probs)
         else:
-            avg_probs = step_predictor.compute_average_TP_activations(coco_train)
+            avg_probs = step_predictor.compute_average_TP_activations(
+                coco_train, coco_test
+            )
             save_file = (
                 code_dir
                 / "model_files"
@@ -104,57 +106,12 @@ def get_unique(activity_ids):
 
         print(f"unique broad steps: {get_unique(broad_step_gts)}")
 
-        _, granular_preds, granular_gt = step_predictor.plot_gt_vs_predicted_one_recipe(
+        _ = step_predictor.plot_gt_vs_predicted_one_recipe(
             granular_step_gts,
             recipe_type,
             fname_suffix=f"{str(vid_id)}_granular_{extra_output_suffix}",
             granular_or_broad="granular",
         )
-        # _, broad_preds, broad_gt = step_predictor.plot_gt_vs_predicted_one_recipe(
-        #     broad_step_gts,
-        #     recipe_type,
-        #     fname_suffix=f"{str(vid_id)}_broad_{extra_output_suffix}",
-        #     granular_or_broad="broad",
-        # )
-
-        # print(f"broad_gt len: {len(broad_gt)}")
-        # print(f"broad_preds len: {len(broad_preds)}")
-        # print(f"granular_gt len: {len(granular_gt)}")
-        # print(f"granular_preds len: {len(granular_preds)}")
-
-        min_length = min(len(granular_preds), len(granular_gt))
-
-        preds.extend(granular_preds[:min_length])
-        gt.extend(granular_gt[:min_length])
-
-    num_act_classes = len(step_predictor.activity_config["labels"])
-    fig, ax = plt.subplots(figsize=(num_act_classes, num_act_classes))
-
-    print(f"gt len: {len(gt)}")
-    print(f"preds len: {len(preds)}")
-    print(f"labels: {step_predictor.activity_config['labels']}")
-    label_ids = [item["id"] for item in step_predictor.activity_config["labels"]]
-    labels = [item["full_str"] for item in step_predictor.activity_config["labels"]]
-
-    broad_cm = confusion_matrix(gt, preds, labels=label_ids, normalize="true")
-
-    # granular_cm = confusion_matrix(
-    #     granular_step_gts,
-    #     granular_preds,
-    #     labels=step_predictor.activity_config["labels"],
-    #     normalize="true"
-    # )
-
-    sns.heatmap(broad_cm, annot=True, ax=ax, fmt=".2f", linewidth=0.5, vmin=0, vmax=1)
-
-    ax.set_xlabel("Predicted labels")
-    ax.set_ylabel("True labels")
-    # ax.set_title(f'CM GSP Accuracy: {acc:.4f}')
-    ax.xaxis.set_ticklabels(labels, rotation=25)
-    ax.yaxis.set_ticklabels(labels, rotation=0)
-    # fig.savefig(f"{self.hparams.output_dir}/confusion_mat_val_acc_{acc:.4f}.png", pad_inches=5)
-    print(f"Saving confusion matrix to {out_file}")
-    fig.savefig(out_file.as_posix(), pad_inches=5)
 
 
 @click.command(context_settings={"help_option_names": ["-h", "--help"]})