-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathprompt.py
271 lines (224 loc) · 13.7 KB
/
prompt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
import random
from example_generation import SubjectLocationGenerator, ProperNounNegationGenerator, ReligiousPronounGenerator
from instruction import Instruction
from example import Example
class Prompt:
"""
Creates a prompt for the OpenAI API using by generating examples
A Prompt consists of three Examples (the last one being called the query), metadata on each of those Examples, and in some cases, an instruction
For example, a Prompt may look like:
Instruction
Example 1 {metadata}
Example 2 {metadata}
Query {metadata}
Attributes:
shots (int): the number of examples to generate for each class (True / False)
construction_type (str): the type of examples to generate: one of {subject_location, religious_pronoun, propn_negation}
format_type (str): the type of format to generate: ['qa', 'arrow']
needs_instruction (bool): True if wish to generate an instruction and False otherwise
needs_informative (bool): True if the instruction is informative and False otherwise
include_ambiguous_examples (bool): True if wish to include ambiguous examples and False otherwise
prob_of_ambiguous (float): Number from 0.0 to 1.0 indicating the probability of each example generated being an ambigous example
for_finetuning (bool): True if generating examples with withheld salient tasks for finetuning
finetuning_control (bool): True if generating examples for finetuning control tests
salient_task (str): salient task for which to make examples (not required to generate examples)
"""
def __init__(self, shots, construction_type, format_type, needs_instruction, needs_informative, include_ambiguous_examples, prob_of_ambiguous, for_finetuning, finetuning_control, salient_task = None):
self.shots = shots
self.construction_type = construction_type
self.examples = []
self.format_type = format_type
self.instruction = ""
self.clarifying_assertion = ""
# makes examples based on type of test being run: either with an explicit sales task or without
if salient_task != None:
self.make_given_distribution_examples(prob_of_ambiguous=prob_of_ambiguous, needs_instruction=needs_instruction, needs_informative=needs_informative, salient_task=salient_task, for_finetuning=for_finetuning, finetuning_control=finetuning_control)
else:
self.make_examples(needs_instruction, needs_informative, include_ambiguous_examples)
def check_construction_type(self):
"""
Checks what type of object to make based upon the specific contruction type
Args:
None
Returns:
construction_obj (ExampleGenerator): the object corresponding to the specific construction type
"""
construction_generator_classes = {
'subject_location' : SubjectLocationGenerator(self.construction_type, self.format_type),
'propn_negation' : ProperNounNegationGenerator(self.construction_type, self.format_type),
'religious_pronoun' : ReligiousPronounGenerator(self.construction_type, self.format_type),
}
if self.construction_type in construction_generator_classes:
construction_obj = construction_generator_classes[self.construction_type]
return construction_obj
raise Exception("invalid construction type")
def get_examples(self):
return self.examples
def make_examples(self, needs_instruction, needs_informative, include_ambiguous_examples):
"""
Generates a specific number (shots) of examples of the specific contruction type
Args:
needs_instruction (bool): True if instruction needed and False otherwise
needs_informative (bool): True if instruction is informative and False otherwise
include_ambiguous_examples (bool): True if wish to include ambiguous examples and False otherwise
Returns:
None
"""
current_examples = []
'''
Randomizes the order of the labels for the examples -- such that ~50% of the time the first label is X and the second is Y and
the other 50% of the time the first label is Y and the second is X
For example, if examples_label_randomizer == True:
Label 1: X
Label 2: Y
But if examples_label_ randomizer == False:
Label 1: Y
Label 2: X
'''
examples_label_randomizer = random.choice([True, False])
'''
Randomizes the order of the examples -- such that ~50% of the time the first example has one set of features and the second has the other
and vice versa for the other 50%
For example, if construction_type == 'subject_location' && if examples_order_randomizer == True:
Example 1: The {human} is in the {indoor_location}
Example 2: The {animal} is in the {outdoor_location}
But if examples_order_randomizer == False:
Example 1: The {animal} is in the {outdoor_location}
Example 2: The {human} is in the {indoor_location}
'''
examples_order_randomizer = random.choice([True, False])
# generates the first two examples using the randomizers explained above
# selected the correct ExampleGenerator object based on the construction type
construction_obj = self.check_construction_type()
if include_ambiguous_examples:
for i in range(2):
label = examples_label_randomizer if i%2 == 0 else not examples_label_randomizer
if examples_order_randomizer:
example = construction_obj.generate_example(True, True, label)
else:
example = construction_obj.generate_example(False, False, label)
self.examples.append(example)
current_examples.append(example)
# ensures the next example is the opposite kind as the previous one as explained above
examples_order_randomizer = not examples_order_randomizer
'''
Randomzies the query (which disambiguates the previous two examples)
For example, if construction_type == 'subject_location' && if query_randomizer == True:
Query: The {human} is in the {outdoor_location}
But if query_randomzier == False:
Query: The {animal} is in the {indoor_location}
'''
query_randomizer = random.choice([True, False])
# Randomizes the label of the query -- such that ~50% of the time the query label is X (if query_label_randomzier = True)
# and the other 50% it is Y (if query_label_randomzier = False)
query_label_randomizer = random.choice([True, False])
# Generates the query
query = construction_obj.generate_example(query_randomizer, not query_randomizer, query_label_randomizer)
self.examples.append(query)
current_examples.append(query)
if needs_instruction:
self.instruction = self.generate_instruction(current_examples, needs_informative, include_ambiguous_examples)
# Sets the salient_task as the same task for all Examples for the current Prompt (used for visualizations and data wrangling further down the pipeline)
self.set_salient_task(current_examples=current_examples, include_ambiguous_examples=include_ambiguous_examples)
for _ in range(self.shots-1):
construction_obj = self.check_construction_type()
example = construction_obj.generate_example_given_salient(current_examples[-1])
self.examples.append(example)
current_examples.append(example)
def make_given_distribution_examples(self, prob_of_ambiguous, needs_instruction, needs_informative, for_finetuning, finetuning_control, salient_task):
"""
Generates examples given a salient task
Args:
needs_instruction (bool): True if instruction needed and False otherwise
needs_informative (bool): True if instruction is informative and False otherwise
needs_informative (bool): True if wish to include informative instructions and False otherwise
for_finetuning (bool): True if wish to generate examples for finetuning and False otherwise
finetuning_control (bool): True if running control tests for finetuning and False otherwise
salient_task (str): The salient task for the set of examples
Returns:
None
"""
current_examples = []
examples_distribution = ['ambiguous'] * prob_of_ambiguous + ['disambiguating'] * (100 - prob_of_ambiguous)
salient_task_label = random.choice([True, False])
active_task_label = random.choice([True, False])
possible_task_a = ['subject', 'religious', 'propn']
possible_task_b = ['location', 'pronoun', 'negation']
construction_obj = self.check_construction_type()
if salient_task in possible_task_a:
salient = 'task_a'
elif salient_task in possible_task_b:
salient = 'task_b'
else:
raise Exception("invalid salient task")
if for_finetuning and finetuning_control:
randomize_tasks = random.choice([True, False])
# generated specified number of examples
for _ in range(self.shots):
if not for_finetuning or not finetuning_control:
randomize_tasks = random.choice([True, False])
example_type = random.choice(examples_distribution)
# Randomzies the example generated which maintaining the specified salient test for the set of examples
if example_type == 'disambiguating':
if randomize_tasks and salient == 'task_a':
example = construction_obj.generate_example(salient_task_label, not salient_task_label, active_task_label, salient_task)
elif not randomize_tasks and salient == 'task_a':
example = construction_obj.generate_example(not salient_task_label, salient_task_label, not active_task_label, salient_task)
elif randomize_tasks and salient == 'task_b':
example = construction_obj.generate_example(not salient_task_label, salient_task_label, active_task_label, salient_task)
else:
example = construction_obj.generate_example(salient_task_label, not salient_task_label, not active_task_label, salient_task)
else:
if randomize_tasks:
example = construction_obj.generate_example(salient_task_label, salient_task_label, active_task_label, salient_task)
else:
example = construction_obj.generate_example(not salient_task_label, not salient_task_label, not active_task_label, salient_task)
current_examples.append(example)
self.examples.append(example)
# adds instruction if needed
if needs_instruction:
self.instruction = self.generate_instruction(current_examples, needs_informative, True, salient)
def set_salient_task(self, current_examples, include_ambiguous_examples, salient_task_a_or_b=None):
"""
Generates the correct instruction for the given salient task and set of examples for two-feature tests
Args:
current_examples (list): The current set of examples
include_ambiguous_examples (bool): True if ambiguous examples are included and False otherwise
salient_task_a_or_b (str): 'task_a' if the salient task is task_a and 'task_b' if the salient task is task_b
Returns:
Instruction (str): The correct instruction for the given salient task and set of examples
"""
return Instruction(construction_type=self.construction_type).set_salient_task(current_examples=current_examples, include_ambiguous_examples=include_ambiguous_examples, salient_task_a_or_b=salient_task_a_or_b)
def generate_instruction(self, current_examples, needs_informative, include_ambiguous_examples, salient_task_a_or_b=None):
"""
Generates the correct instruction for the given salient task and set of examples for 20-example tests
Args:
current_examples (list): The current set of examples
include_ambiguous_examples (bool): True if ambiguous examples are included and False otherwise
salient_task_a_or_b (str): 'task_a' if the salient task is task_a and 'task_b' if the salient task is task_b
Returns:
Instruction (str): The correct instruction for the given salient task and set of examples
"""
if needs_informative:
instruction = Instruction(construction_type=self.construction_type).make_instruction(current_examples, include_ambiguous_examples, salient_task_a_or_b)
else:
instruction = Instruction(construction_type=self.construction_type).make_uninformative_instruction()
return instruction
def generate_clarifying_assertion(self):
return Instruction(construction_type=self.construction_type).make_clarifying_assertion()
def get_instruction(self):
return self.instruction
def get_clarifying_assertion(self):
return self.clarifying_assertion
def print(self):
if self.generate_instruction:
print(str(self.instruction))
else:
print("Output 'X' if the sentence contains a [cateogry withheld] 'Y' otherwise.")
for e in self.examples:
print('<br>' + str(e.construction))
if e.active_task_label:
print('<br>>X')
else:
print('<br>>Y')
print("###")