Merge overleaf-2024-11-24-0236 into main

yamanksingla · Nov 24, 2024 · a04dd0e · a04dd0e
2 parents ecebbae + a36ba4a
commit a04dd0e
Show file tree

Hide file tree

Showing 23 changed files with 1,376 additions and 74 deletions.
diff --git a/chapter-explaining-behavior.tex b/chapter-explaining-behavior.tex
diff --git a/images/2-stage-ci-uat-2.pdf b/images/2-stage-ci-uat-2.pdf
diff --git a/images/Class-Impressions.pdf b/images/Class-Impressions.pdf
diff --git a/images/MRPC_Mean_Entropy.pdf b/images/MRPC_Mean_Entropy.pdf
diff --git a/images/MRPC_Mean_Entropy_.pdf b/images/MRPC_Mean_Entropy_.pdf
diff --git a/images/MRPC_TSNE_PERPLEXITY_30.pdf b/images/MRPC_TSNE_PERPLEXITY_30.pdf
diff --git a/images/SNLI_Mean_Entropy.pdf b/images/SNLI_Mean_Entropy.pdf
diff --git a/images/SNLI_Mean_Entropy_.pdf b/images/SNLI_Mean_Entropy_.pdf
diff --git a/images/SNLI_TSNE_PERPLEXITY_120.pdf b/images/SNLI_TSNE_PERPLEXITY_120.pdf
diff --git a/images/SST_Mean_Entropy.pdf b/images/SST_Mean_Entropy.pdf
diff --git a/images/SST_Mean_Entropy_.pdf b/images/SST_Mean_Entropy_.pdf
diff --git a/images/SST_TSNE_PERPLEXITY_100.pdf b/images/SST_TSNE_PERPLEXITY_100.pdf
diff --git a/images/UAT.jpg b/images/UAT.jpg
diff --git a/images/UAT.pdf b/images/UAT.pdf
diff --git a/images/boxplot_entropy.pdf b/images/boxplot_entropy.pdf
diff --git a/images/length_wise_plot.jpg b/images/length_wise_plot.jpg
diff --git a/images/length_wise_plot.png b/images/length_wise_plot.png
diff --git a/images/success_rate_vs_token_length.png b/images/success_rate_vs_token_length.png
diff --git a/images/thesis-link.pdf b/images/thesis-link.pdf
diff --git a/introduction.tex b/introduction.tex
@@ -148,7 +148,8 @@ \chapter{Introduction: The Two Cultures of Behavioral Sciences}
 
 
 
-\textit{Outline for the upcoming chapters}: Following the two traditions of behavioral sciences, we delve into both explanation and prediction. Figure~\ref{fig:factors-of-communication-thesis-links} gives a visual description of the various chapters and how they link with each other. In Chapter-\ref{chatper:Explaining Behavior: Persuasion Strategies}, we start with a more traditional approach to behavior explanation, where we cover the first works on extracting persuasion strategies in advertisements (both images and videos) \cite{kumar2023persuasion,bhattacharya2023video}. The contributions of these works include constructing the largest set of generic persuasion strategies based on theoretical and empirical studies in marketing, social psychology, and machine learning literature and releasing the first datasets to enable the study and model development for the same. These works have been deployed to understand the correlation between the kinds of marketing campaigns and customer behavior measured by clicks, views, and other marketing key performance indicators (KPIs). 
+\textit{Outline for the upcoming chapters}: Following the two traditions of behavioral sciences, we delve into both explanation and prediction. Figure~\ref{fig:factors-of-communication-thesis-links} gives a visual description of the various chapters and how they link with each other. In Chapter-\ref{chatper:Explaining Behavior: Persuasion Strategies}, we start with a more traditional approach to behavior explanation, where we cover the first works on extracting persuasion strategies in advertisements (both images and videos) \cite{kumar2023persuasion,bhattacharya2023video}. The contributions of these works include constructing the largest set of generic persuasion strategies based on theoretical and empirical studies in marketing, social psychology, and machine learning literature and releasing the first datasets to enable the study and model development for the same. These works have been deployed to understand the correlation between the kinds of marketing campaigns and customer behavior measured by clicks, views, and other marketing key performance indicators (KPIs). Further, we also introduce methods to mine behavioral models to understand what they learn. This approach provides a converse approach for human behavior understanding. While persuasion strategies help a human correlate and understand content (message) and behavior, universal adversarial triggers help understand what models learn, which makes them successful in predicting behavior.
+
 
 
 Following this, in Chapter-\ref{chatper:Content and Behavior Models}, we delve into the question of modeling behavior. The key insight behind this chapter is that behavior is always produced by a receiver in response to a content sent by a sender at a time. We model behavior together with the pieces of sender, receiver, time, and content. We show that while large language models already model content, they do not model the other pieces of sender, receiver, and time. We model these factors together and show emergent abilities in understanding behavior. We observe that teaching the Large Content and Behavior Models (LCBM) behavior and content simulation improves its capabilities on them (expected), but the model also shows signs of domain-adaptation in behavior modality (few-shot capability, unexpected) and improvements in behavior understanding (zero-shot capability, unexpected). To spur research on the topic of large content and behavior models, we release our generated behavior instruction fine-tuning data from over 40,000 public domain YouTube videos and 168 million Twitter posts. The data contains: 1) YouTube video links, automatically extracted key scenes, scene verbalizations, replay graph data, video views, likes, comments, channel name, and subscriber count at the time of collection, and 2) Twitter extracted account names, tweet text, associated media (image and video) verbalizations (including image captions, keywords, colors, and tones), tweet timestamps, and like counts. We also release a benchmark to test performance on the joint content behavior space introducing two types of tasks in this space: predictive and descriptive. In the predictive benchmark, we test the model’s ability to predict behavior given the content and predict content given the behavior. In the descriptive benchmark, we validate its explanation of human behavior by comparing it with ground-truth annotations we obtain from human annotators that try to explain human behavior.
@@ -159,7 +160,7 @@ \chapter{Introduction: The Two Cultures of Behavioral Sciences}
 
 %the more modern approach of behavior prediction and leveraging the huge repositories of behavior data available. First, we propose models to integrate behavior with relatively smaller language models like BERT \cite{devlin2018bert}, and show that the resultant models can understand content better than the base models \cite{khurana-etal-2023-synthesizing}. Then, we propose an approach to integrate behavior and content together as part of a single model. We call these models Large Content and Behavior Models (LCBM) \cite{khandelwal2023large}. We show that these models can predict and explain behavior. 
 
-%Diagram for chapters - XXX
+%Diagram for chapters 
 
 
 
@@ -197,12 +198,20 @@ \chapter{Introduction: The Two Cultures of Behavioral Sciences}
 
 Therefore, we will cover explanation, analysis, prediction, and generation aspects of behavior. We will cover the following works in this thesis:
 \begin{enumerate}
+    \item MINIMAL: Mining models for data-free universal adversarial triggers. AAAI, 2022, (covered in Chapter-\ref{chatper:Explaining Behavior: Persuasion Strategies})
+
     \item Persuasion Strategies in Advertisements, AAAI, 2023, (covered in Chapter-\ref{chatper:Explaining Behavior: Persuasion Strategies})
+
     \item A Video Is Worth 4096 Tokens: Verbalize Videos To Understand Them In Zero Shot, EMNLP, 2023, \textbf{Nominated for best paper award} (covered in Chapter-\ref{chatper:Explaining Behavior: Persuasion Strategies})
+
     \item Large Content And Behavior Models To Understand, Simulate, And Optimize Content And Behavior, ICLR, 2024, \textbf{Nominated for best paper award} (covered in Chapter-\ref{chatper:Content and Behavior Models})
+
     \item Synthesizing Human Gaze Feedback for Improved NLP Performance, EACL, 2023 (covered in Chapter-\ref{chapter:Encoding Behavior To Improve Content Understanding})
+
     \item Teaching Human Behavior Improves Content Understanding Abilities Of VLMs, Arxiv preprint (under review), 2024 (covered in Chapter-\ref{chapter:Encoding Behavior To Improve Content Understanding})
+
     \item Long-Term Ad Memorability: Understanding and Generating Memorable Ads, WACV, 2025 (covered in Chapter-\ref{chatper:Generating Content Leading to Optimal Behavior})
+
     \item Measuring And Improving Engagement of Text-to-Image Generation Models, Arxiv preprint (under review), 2024 (covered in Chapter-\ref{chatper:Generating Content Leading to Optimal Behavior})
 \end{enumerate}
 

diff --git a/math_commands.tex b/math_commands.tex
@@ -2,6 +2,27 @@
 
 \usepackage{amsmath,amsfonts,bm}
 
+
+
+\definecolor{adversarial}{rgb}{0.90, 0.02, 0.03}
+\definecolor{orange2}{rgb}{0.95,0.35,0}
+\definecolor{trigger}{HTML}{FFC7BF}
+\newcommand{\glove}{GloVe}
+\newcommand{\wtovec}{word2vec}
+\newcommand{\elmo}{ELMo}
+\newcommand{\bert}{BERT}
+\newcommand{\gpttwo}{GPT-2}
+\newcommand{\universal}[1]{``#1''}
+\newcommand{\xmark}{\ding{55}}
+\newcommand{\Checkmark}{\textbf{\ding{51}}}
+\newcommand{\mb}[1]{\boldsymbol{\mathbf{#1}}}
+\newcommand{\loss}{\ensuremath\mathcal{L}}
+\newcommand{\PreserveBackslash}[1]{\let\temp=\\#1\let\\=\temp}
+\newcolumntype{C}[1]{>{\PreserveBackslash\centering}p{#1}}
+\newcolumntype{R}[1]{>{\PreserveBackslash\raggedleft}p{#1}}
+\newcolumntype{L}[1]{>{\PreserveBackslash\raggedright}p{#1}}
+
+
 % Mark sections of captions for referring to divisions of figures
 \newcommand{\figleft}{{\em (Left)}}
 \newcommand{\figcenter}{{\em (Center)}}

diff --git a/publications.tex b/publications.tex
@@ -1,6 +1,8 @@
 \chapter*{Publications covered as part of thesis}
 \addcontentsline{toc}{chapter}{Publications covered as part of thesis}
 \begin{enumerate}
+    \item Singla, Y. K., Parekh, S., Singh, S., Chen, C., Krishnamurthy, B., \& Shah, R. R. (2022). MINIMAL: Mining models for data-free universal adversarial triggers. Proceedings of the AAAI Conference on Artificial Intelligence (AAAI).
+
     \item Khurana, V., Kumar, Y., Hollenstein, N., Kumar, R., \& Krishnamurthy, B. (2023). Synthesizing Human Gaze Feedback for Improved NLP Performance. In Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics (pp. 1895-1908).
 
     \item Kumar, Y., Jha, R., Gupta, A., Aggarwal, M., Garg, A., Malyan, T., Bhardwaj, A., Ratn Shah, R., Krishnamurthy, B., \& Chen, C. (2023). Persuasion Strategies in Advertisements. Proceedings of the AAAI Conference on Artificial Intelligence, 37(1), 57-66. \url{https://doi.org/10.1609/aaai.v37i1.25076}
@@ -55,8 +57,6 @@ \chapter*{Other Publications}
 
     \item S., S., Pupneja, A., Mital, S., Shah, C., Bawkar, M., Gupta, L. P., Kumar, A., Singla, Y. K., Gupta, R., \& Shah, R. R. (2023). H-AES: Towards automated essay scoring for Hindi. Proceedings of the Educational Advances in Artificial Intelligence (EAAI) at AAAI.
 
-    \item Singla, Y. K., Parekh, S., Singh, S., Chen, C., Krishnamurthy, B., \& Shah, R. R. (2022). MINIMAL: Mining models for data-free universal adversarial triggers. Proceedings of the AAAI Conference on Artificial Intelligence (AAAI).
-
     \item Ghosh, S., Kumar, S., Singla, Y. K., Shah, R. R., \& Umesh, S. (2022). Span classification with structured information for disfluency detection in spoken utterances. Proceedings of Interspeech.
 
     \item Singla, Y. K., Krishna, S., Shah, R. R., \& Chen, C. (2022). Using sampling to estimate and improve performance of automated scoring systems with guarantees. Proceedings of the AAAI Conference on Artificial Intelligence - Educational Advances in Artificial Intelligence (AAAI-EAAI).