forked from OwseiWasTaken/devaps
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsonar.py
executable file
·51 lines (42 loc) · 1.3 KB
/
sonar.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#! /bin/python3
from sys import argv, stderr
from typing import Any
import requests
from bs4 import BeautifulSoup
argv.pop(0)
SITE = argv.pop(0)
config: dict[str, Any] = {
"headers": {},
"usehttp": False,
"depth": 2,
"path": "/",
"cookies": {},
}
while argv:
arg = argv.pop(0)
if (arg in config):
value = argv.pop(0)
try:
config[arg] = eval(value)
except (NameError, SyntaxError):
config[arg] = value
def scrape(url: str) -> set[str]:
r = requests.get(url, cookies=config["cookies"])
html = BeautifulSoup(r.text,"html.parser")
links = set([tag.get("href") for tag in html.find_all('a', href=True)])
return links
def keep_from(links: set[str], site: str) -> set[str]:
return {link for link in links if link.startswith(site)}
links: set[str] = keep_from(scrape(SITE+config["path"]), SITE)
seen:dict[str, set[str]] = { }
while links:
link = links.pop()
if link in seen: continue
newlinks = keep_from(scrape(link), SITE)
seen[link] = newlinks
newlinks = {newlink for newlink in newlinks if newlink not in seen}
# add unseen links
links.update(newlinks)
links = {link for link in links if link not in seen}
print(f"scanning: {link} // {len(newlinks)} to go\n", newlinks, file=stderr)
print(seen)