-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfetch_character_weapons.py
41 lines (38 loc) · 1.19 KB
/
fetch_character_weapons.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/usr/bin/env python3
import json
from lxml import html
from lxml.cssselect import CSSSelector
import requests
html_filename = 'character_weapons.html'
html_content = None
try:
with open(html_filename, 'r') as f:
html_content = f.read()
except IOError as e:
response = requests.get('https://gbf.wiki/Module:Sandbox/Botanist/CharacterWeapons/doc')
html_content = response.content
with open(html_filename, 'wb') as f:
f.write(html_content)
tree = html.fromstring(html_content)
def extract_row(row):
name = row[5].text
if name.startswith('Sandbox/'):
name = row[4][0].text
return {
'name': name,
'rarity': row[6].text,
'element': row[7].text,
'type': row[8].text,
'race': row[9].text,
'join_weapon': row[10].text
}
def keep_row(row):
join_weapon = row['join_weapon']
name = row['name']
return join_weapon != None and \
join_weapon != '???' and \
join_weapon != '' and \
name != 'name'
data = list(filter(keep_row, map(extract_row, CSSSelector('table tr')(tree))))
data.sort(key=lambda row: row['name'])
print(json.dumps({'characters': data}, indent=4, sort_keys=True))