Skip to content

Commit 4252ded

Browse files
committed
Add flag to close old issues
1 parent b81970a commit 4252ded

File tree

1 file changed

+39
-7
lines changed

1 file changed

+39
-7
lines changed

bin/process_bulk_metadata.py

+39-7
Original file line numberDiff line numberDiff line change
@@ -141,10 +141,11 @@ def _apply_changes_to_xml(self, xml_path, anthology_id, changes):
141141
"""
142142
real_ids = set()
143143
for author in changes["authors"]:
144-
id_ = author["id"]
145-
existing_author = paper_node.findall(f"author[@id='{id_}']")
146-
if existing_author is None:
147-
real_ids.add(id_)
144+
id_ = author.get("id", None)
145+
if id_:
146+
existing_author = paper_node.find(f"author[@id='{id_}']")
147+
if existing_author is not None:
148+
real_ids.add(id_)
148149

149150
# remove existing author nodes
150151
for author_node in paper_node.findall("author"):
@@ -183,7 +184,7 @@ def _apply_changes_to_xml(self, xml_path, anthology_id, changes):
183184
return None
184185

185186
def process_metadata_issues(
186-
self, ids=[], verbose=False, skip_validation=False, dry_run=False
187+
self, ids=[], verbose=False, skip_validation=False, dry_run=False, close_old_issues=False
187188
):
188189
"""Process all metadata issues and create PR with changes."""
189190
# Get all open issues with required labels
@@ -228,8 +229,32 @@ def process_metadata_issues(
228229
# Parse metadata changes from issue
229230
json_block = self._parse_metadata_changes(issue.body)
230231
if not json_block:
231-
if verbose:
232-
print("-> Skipping (no JSON block)", file=sys.stderr)
232+
if close_old_issues:
233+
# for old issues, filed without a JSON block, we append a comment
234+
# alerting them to how to file a new issue using the new format.
235+
# If possible, we first parse the Anthology ID out of the title:
236+
# Metadata correction for {anthology_id}. We can then use this to
237+
# post a link to the original paper so they can go through the
238+
# automated process.
239+
anthology_id = None
240+
match = re.search(r"Paper Metadata: [\{]?(.*)[\}]?", issue.title)
241+
if match:
242+
anthology_id = match[1]
243+
if anthology_id:
244+
print(
245+
f"-> Closing issue {issue.number} with a link to the new process",
246+
file=sys.stderr,
247+
)
248+
url = f"https://aclanthology.org/{anthology_id}"
249+
issue.create_comment(
250+
f"The Anthology has had difficulty keeping up with the manual process we use for the large number of metadata corrections we receive. We have therefore updated our workflow with a more automatated process. We are closing this issue, and ask that you help us out by recreating your request using this new workflow. You can do this by visiting [the paper page associated with this issue]({url}) and clicking on the yellow 'Fix metadata' button. This will take you through a few steps simple steps."
251+
)
252+
# close the issue as "not planned"
253+
issue.edit(state="closed", state_reason="not_planned")
254+
continue
255+
else:
256+
if verbose:
257+
print("-> Skipping (no JSON block)", file=sys.stderr)
233258
continue
234259

235260
# Skip issues that are not approved by team member
@@ -308,6 +333,12 @@ def process_metadata_issues(
308333
action="store_true",
309334
help="Dry run (do not create PRs)",
310335
)
336+
parser.add_argument(
337+
"--close-old-issues",
338+
action="store_true",
339+
help="Close old metadata requests with a comment (those without a JSON block)",
340+
)
341+
311342
args = parser.parse_args()
312343

313344
if not github_token:
@@ -319,4 +350,5 @@ def process_metadata_issues(
319350
verbose=args.verbose,
320351
skip_validation=args.skip_validation,
321352
dry_run=args.dry_run,
353+
close_old_issues=args.close_old_issues,
322354
)

0 commit comments

Comments
 (0)