-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathtest.py
133 lines (110 loc) · 5.19 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/python3
from datetime import date
import re
import sys
import rtyaml
has_error = False
def error(*args):
global has_error
has_error = True
if len(args) == 1:
incident, consequence, message = None, None, args[0]
elif len(args) == 2:
incident, consequence, message = args[0], None, args[1]
elif len(args) == 3:
incident, consequence, message = args
else:
raise ValueError(args)
if incident: print("In <", rtyaml.dump(incident)[:64].replace("\n"," --- "), ">", file=sys.stderr)
if consequence: print("... <", rtyaml.dump(consequence)[:64].replace("\n"," --- "), ">", file=sys.stderr)
print(message, file=sys.stderr)
print(file=sys.stderr)
def remove_markdown_link_urls(s):
return re.sub(r"\(http.*?\)", "", s)
try:
misconduct = rtyaml.load(open("misconduct.yaml"))
except Exception as e:
error(str(e))
sys.exit(1)
if not isinstance(misconduct, list):
error("misconduct.yaml is not a list.")
for incident in misconduct:
if not isinstance(incident, dict):
error(incident, "Incident is not a dict.")
if not isinstance(incident.get("person"), int):
error(incident, "Incident is missing or has invalid 'person', should be an integer.")
# TODO: Check ID is a real GovTrack person ID.
if not isinstance(incident.get("name"), str):
error(incident, "Incident is missing or has invalid 'name', should be a string.")
if not isinstance(incident.get("text"), str):
error(incident, "Incident is missing or has invalid 'text', should be a string.")
if not isinstance(incident.get("text", ""), str):
continue
if not isinstance(incident.get("allegation"), str):
error(incident, "Incident is missing or has invalid 'allegation', should be a string.")
if not isinstance(incident.get("consequences"), list):
error(incident, "Incident is missing or has invalid 'consequences', should be a list.")
continue
if not isinstance(incident.get("tags"), str):
error(incident, "Incident is missing or has invalid 'tags', should be a string.")
continue
elif "tags" in incident:
tags = set(incident["tags"].split(" "))
bad_tags = tags - {
"elections", "corruption", "sexual-harassment-abuse", "crime",
"ethics", "resolved", "unresolved"}
if bad_tags:
error(incident, "Incident has invalid 'tags': {}".format(bad_tags))
for cons in incident["consequences"]:
if not isinstance(cons, dict):
error(incident, cons, "Consequence should be a dict.")
if isinstance(cons.get("date"), date):
pass # good, a full date or a year
elif not isinstance(cons.get("date"), (int, str)):
error(incident, cons, "Consequence is missing or has an invalid date.")
elif not re.match(r"(\d\d\d\d)(-(\d\d)(-(\d\d))?)?$", str(cons["date"])):
error(incident, cons, "Consequence has an invalid date.")
if "body" not in cons and "text" not in cons:
error(incident, cons, "Consequence should have either 'body' or 'text'.")
elif "body" in cons and "text" in cons:
error(incident, cons, "Consequence cannot have both 'body' and 'text'.")
elif "text" in cons:
if not isinstance(cons["text"], str):
error(incident, cons, "Consequence 'text' should be a string.")
elif cons["text"][0] == cons["text"][0].lower() or cons["text"][-1] != ".":
error(incident, cons, "Consequence text should be a full sentence starting with a capital letter and ending in a period.")
else:
if not isinstance(cons["body"], str):
error(incident, cons, "Consequence 'body' should be a string.")
if not isinstance(cons.get("action"), str):
error(incident, cons, "In consequence with body, 'action' should be a string.")
for field in ("text", "action"):
if field in cons:
if "](" in cons[field]:
error(incident, cons, "Consequence looks like it has a Markdown link in {} that should be in the link field instead.".format(field))
if not isinstance(cons.get("link"), (type(None), str, list)):
error(incident, cons, "Consequence has an invalid 'link' value.")
if isinstance(cons.get("link"), list):
for item in cons["link"]:
if not isinstance(item, str):
error(incident, cons, "Consequence has an invalid 'link' value.")
if "tags" in cons and not isinstance(cons["tags"], str):
error(incident, cons, "Consequence has invalid 'tags', should be a string.")
continue
elif "tags" in cons:
tags = set(cons["tags"].split(" "))
bad_tags = tags - {
"expulsion", "censure", "contempt", "reprimand", "fined", "resignation", "exclusion",
"settlement", "conviction", "plea", "confirmation" }
if bad_tags:
error(incident, cons, "Consequence has invalid 'tags': {}.".format(bad_tags))
# Suggest incidents whose allegation or text fields probably could be shortened.
if len(incident["allegation"]) > 700:
error(incident, "'allegation' could probably be shorter.")
if incident.get("person") != 456921:
if len(incident["consequences"]) > 2 and len(remove_markdown_link_urls(incident["text"])) > 1200:
error(incident, "'text' could probably be shorter.")
elif len(incident["consequences"]) > 2 and len(incident["text"]) > 400 and len(remove_markdown_link_urls(incident["text"])) > .8 * (len(incident["allegation"]) + len(" ".join(remove_markdown_link_urls(str(cons)) for cons in incident["consequences"]))):
error(incident, "'text' could probably be shorter.")
if has_error:
sys.exit(1)