Skip to content

Commit e979388

Browse files
committed
GH-6 # clean up init_db script and add download option
1 parent 798a97c commit e979388

File tree

5 files changed

+71
-27
lines changed

5 files changed

+71
-27
lines changed

.dockerignore

+11-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,14 @@
11
src/static/*.bundle.js
22
package-lock.json
33

4-
node_modules/
4+
node_modules/
5+
6+
.git/
7+
.idea/
8+
.venv/
9+
10+
__pycache__/
11+
**/__pycache__/
12+
13+
*.py[cod]
14+
*$py.class

README.md

+4-2
Original file line numberDiff line numberDiff line change
@@ -91,13 +91,15 @@ docker compose up
9191

9292
Dans un nouveau terminal, si vous avez déjà une base de mot de passe type rockyou.txt en local:
9393
```shell
94-
docker compose run -v <path_to_directory_having_rockyou.txt>:/rockdir fastapi python src/init_scripts/init_db_with_password_file.py /rockdir/rockyou.txt
94+
docker compose run --rm -v <path_to_directory_having_rockyou.txt>:/rockdir fastapi python src/init_scripts/init_db_with_password_file.py /rockdir/rockyou.txt
9595
```
9696
si vous souhaitez le télécharger automatiquement:
9797
```shell
98-
TODO
98+
docker compose run --rm fastapi python src/init_scripts/init_db_with_password_file.py --download
9999
```
100100

101+
La base de donnée utilise un volume Docker qui peut être supprimé avec `docker volume remove hibp_kvrocks_data_volume`.
102+
101103
### Setup dev
102104

103105
`poetry sync` permet d'installer les dépendances. L'outils justfile (voir [ici pour l'installation](https://github.com/casey/just?tab=readme-ov-file#cross-platform)) permet de faire tourner les tests et le linter.

poetry.lock

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ build-backend = "poetry.core.masonry.api"
1818
pytest = "^8.3.4"
1919
ruff = "^0.9.4"
2020
testcontainers = {extras = ["generic"], version = "^4.9.1"}
21-
httpx = "^0.28.1" # required to test fastapi
2221

2322

2423
[tool.poetry.dependencies]
@@ -27,6 +26,8 @@ docopt = "^0.6.2"
2726
xxhash = "^3.5.0"
2827
fastapi = {extras = ["standard"], version = "^0.115.8"}
2928
pydantic-settings = "^2.7.1"
29+
httpx = "^0.28.1"
30+
rich = "^13.9.4"
3031

3132
[tool.ruff]
3233
line-length = 120

src/init_scripts/init_db_with_password_file.py

+53-22
Original file line numberDiff line numberDiff line change
@@ -12,45 +12,76 @@
1212
1313
"""
1414

15-
import os
15+
import tempfile
1616
import time
1717

18+
import httpx
19+
import rich
1820
from docopt import docopt
1921
from redis import Redis
22+
from rich import print
23+
from rich.progress import MofNCompleteColumn, Progress, SpinnerColumn
2024

2125
from src.common import PasswordHasher, PasswordStorage, Settings
2226

27+
ROCKYOU_DOWNLOAD_URL = "https://github.com/brannondorsey/naive-hashcat/releases/download/data/rockyou.txt"
2328

24-
def init_db(file_path: str, db_client: Redis) -> int:
29+
30+
def init_db(file_path: str, db_client: Redis, password_count: int) -> int:
2531
"""Return the number of password inserted in db"""
26-
print("") # allow print below to clear this line return
32+
processed = 0
2733
hasher = PasswordHasher()
2834
storage = PasswordStorage(client=db_client)
29-
processed = 0
30-
with open(file_path, encoding="latin-1") as file:
31-
for line in file:
32-
password = line.strip()
33-
prefix = hasher.prefix(password)
34-
storage.add_password(prefix=prefix, password=password)
35-
processed += 1
36-
if processed % 100 == 0:
37-
print(f"\r{processed=}", end="")
35+
36+
with Progress(
37+
SpinnerColumn(),
38+
*Progress.get_default_columns(),
39+
MofNCompleteColumn(),
40+
) as progress:
41+
rich_task = progress.add_task("[green]Inserting in database...", total=password_count)
42+
# Actual work
43+
with open(file_path, encoding="latin-1") as file:
44+
for line in file:
45+
password = line.strip()
46+
prefix = hasher.prefix(password)
47+
storage.add_password(prefix=prefix, password=password)
48+
49+
processed += 1
50+
progress.update(rich_task, advance=1)
51+
3852
storage.flush()
39-
print("\ndone")
53+
print("[green]done")
4054
return processed
4155

4256

4357
if __name__ == "__main__":
58+
start = time.time()
4459
args = docopt(__doc__)
4560
if args.get("--download"):
46-
print("not implemented yet")
47-
exit(0)
61+
print("Initializing DB with rockyou passwords from internet")
62+
# Create a temporary file that is deleted automatically on program close
63+
download_file = tempfile.NamedTemporaryFile(delete=True, delete_on_close=False)
64+
with httpx.stream("GET", ROCKYOU_DOWNLOAD_URL, follow_redirects=True) as r:
65+
for data in r.iter_bytes():
66+
download_file.write(data)
67+
download_file.close()
68+
print("download complete")
69+
password_path = download_file.name
4870
elif password_path := args.get("<passwords-path>"):
4971
print(f"Initializing db with {password_path}")
50-
start = time.time()
51-
settings = Settings()
52-
redis_client = Redis.from_url(str(settings.kvrocks_url))
53-
init_db(password_path, db_client=redis_client)
54-
duration = time.time() - start
55-
print(f"Took {duration:.2f}s")
56-
# else docopt will show help
72+
else:
73+
print("Missing arguments")
74+
exit(1)
75+
76+
settings = Settings()
77+
password_count = 0
78+
with rich.progress.open(
79+
password_path, encoding="latin-1", description="[orange3]Determining password count...", transient=True,
80+
) as file:
81+
for line in file:
82+
password_count += 1
83+
84+
redis_client = Redis.from_url(str(settings.kvrocks_url))
85+
init_db(password_path, db_client=redis_client, password_count=password_count)
86+
duration = time.time() - start
87+
print(f"Took {duration:.2f}s")

0 commit comments

Comments
 (0)