Skip to content

Commit

Permalink
Added archive problems
Browse files Browse the repository at this point in the history
Custom proxy
zstd and deflate encoding
Account rating prediction
Common rating history chart
profile_url simple_tag
  • Loading branch information
aropan committed Aug 26, 2024
1 parent 86fbc2d commit d8b5b27
Show file tree
Hide file tree
Showing 56 changed files with 997 additions and 350 deletions.
3 changes: 2 additions & 1 deletion config/cron
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@ DJANGO_ENV_FILE=.env.prod
*/1 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_SENDING_NOTIFICATIONS /usr/src/clist/run-manage.bash sendout_tasks
*/1 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_PARSING_STATISTICS /usr/src/clist/run-manage.bash parse_statistic
*/5 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_PARSING_ACCOUNTS /usr/src/clist/run-manage.bash parse_accounts_infos
30 * * * * /usr/src/clist/run-manage.bash parse_archive_problems
*/1 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_CHECKING_LOGS /usr/src/clist/run-manage.bash check_logs
# 32 1 * * wed env MONITOR_NAME=SENTRY_CRON_MONITOR_REINDEX /usr/src/clist/run-manage.bash reindex
*/15 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_SET_ACCOUNT_RANK /usr/src/clist/run-manage.bash set_account_rank
*/20 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_SET_COUNTRY_RANK /usr/src/clist/run-manage.bash set_country_rank
15 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_UPDATE_AUTO_RATING /usr/src/clist/run-manage.bash update_auto_rating

# 32 1 * * wed env MONITOR_NAME=SENTRY_CRON_MONITOR_REINDEX /usr/src/clist/run-manage.bash reindex
# # 58 3 14-20 * * [ "$(date '+\%u')" -eq 4 ] && cd $PROJECT_DIR && run-one ./manage.py runscript calculate_account_contests >logs/command/calculate_account_contests.log 2>&1
# 58 4 * * 4 cd $PROJECT_DIR && run-one ./manage.py runscript calculate_coder_n_accounts_and_coder_n_contests >logs/command/calculate_coder_n_accounts_and_coder_n_contests.log 2>&1
# 58 5 * * 4 cd $PROJECT_DIR && run-one ./manage.py runscript calculate_resource_contests >logs/command/calculate_resource_contests.log 2>&1
27 changes: 27 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ services:
build:
context: .
target: prod
networks:
- clist_network
volumes:
- static_files:/usr/src/clist/staticfiles/
- media_files:/usr/src/clist/mediafiles/
Expand All @@ -22,6 +24,8 @@ services:
build:
context: .
target: dev
networks:
- clist_network
volumes:
- ./src/:/usr/src/clist/
- ./legacy/api/:/usr/src/clist/legacy/api/
Expand All @@ -40,9 +44,13 @@ services:
restart: unless-stopped
memcached:
image: memcached
networks:
- clist_network
restart: unless-stopped
legacy:
build: ./legacy
networks:
- clist_network
volumes:
- ./legacy:/usr/src/legacy/
depends_on:
Expand All @@ -53,6 +61,8 @@ services:
restart: unless-stopped
pgadmin:
image: dpage/pgadmin4:latest
networks:
- clist_network
volumes:
- pgadmin_data:/var/lib/pgadmin
env_file:
Expand All @@ -62,6 +72,8 @@ services:
restart: unless-stopped
db:
image: postgres:14.3-alpine
networks:
- clist_network
volumes:
- postgres_data:/var/lib/postgresql/data/
env_file:
Expand All @@ -83,6 +95,8 @@ services:
build:
context: .
target: nginx
networks:
- clist_network
volumes:
- static_files:/staticfiles/
- media_files:/mediafiles/
Expand All @@ -98,6 +112,8 @@ services:
restart: unless-stopped
certbot:
image: certbot/certbot:latest
networks:
- clist_network
volumes:
- ./logs/letsencrypt:/var/log/letsencrypt
- certbot_www:/var/www/certbot/
Expand All @@ -110,13 +126,17 @@ services:
build:
context: .
target: loggly
networks:
- clist_network
volumes:
- ./logs/:/logs/
secrets:
- loggly_token
restart: unless-stopped
netdata:
image: netdata/netdata:latest
networks:
- clist_network
hostname: netdata.clist.by
pid: host
cap_add:
Expand All @@ -140,6 +160,13 @@ services:
- ./.env.netdata
restart: unless-stopped

networks:
clist_network:
driver: bridge
driver_opts:
com.docker.network.bridge.name: br-clist
name: clist

secrets:
db_conf:
file: ./.env.db
Expand Down
11 changes: 11 additions & 0 deletions legacy/module/hackerearth.com/index.php
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
<?php
require_once dirname(__FILE__) . '/../../config.php';

$proxy_file = dirname(__FILE__) . "/../../logs/hackerearth.proxy";
$proxy = file_exists($proxy_file)? json_decode(file_get_contents($proxy_file)) : false;
if ($proxy) {
echo " (proxy)";
curl_setopt($CID, CURLOPT_PROXY, $proxy->addr . ':' . $proxy->port);
}

$data = curlexec($URL, NULL, array('json_output' => 1));
if (!isset($data['response'])) {
echo "No response, data = " . debug_content($data);
Expand Down Expand Up @@ -103,4 +110,8 @@
if ($RID === -1) {
print_r($contests);
}

if ($proxy) {
curl_setopt($CID, CURLOPT_PROXY, null);
}
?>
4 changes: 2 additions & 2 deletions legacy/module/hackerrank.com/index.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
for (;;) {
$url = "$base_url?limit=$limit&offset=$offset";
$json = curlexec($url, NULL, array('json_output' => true));
if (!is_array($json['models'])) {
print_r($json);
if (!is_array($json) || !is_array($json['models'])) {
var_dump($json);
trigger_error("Expected array ['models']", E_USER_WARNING);
break;
}
Expand Down
6 changes: 5 additions & 1 deletion legacy/module/highload.fun/index.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@
require_once dirname(__FILE__) . '/../../config.php';

$data = curlexec($URL, null, array("json_output" => 1));
if (!$data || !is_array($data)) {
trigger_error("Fetch data = '{$data}'", E_USER_WARNING);
return;
}
if ($data['result'] !== 'OK') {
trigger_error("Failed to fetch data, result = '{$data['result']}', error = '{$data['error']}'", E_USER_WARNING);
trigger_error("Fetch result = '{$data['result']}', error = '{$data['error']}'", E_USER_WARNING);
return;
}

Expand Down
2 changes: 0 additions & 2 deletions legacy/module/projecteuler.net/index.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
if ($proxy) {
echo " (proxy)";
curl_setopt($CID, CURLOPT_PROXY, $proxy->addr . ':' . $proxy->port);
} else {
return;
}

$urls = array('https://projecteuler.net/recent');
Expand Down
7 changes: 4 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Django==5.0.7
Django==5.1
django-add-default-value==0.10.0
django-brotli==0.2.1
django-cache-machine==1.2.0
Expand Down Expand Up @@ -84,12 +84,12 @@ tailslide==0.1.1
feedgen==1.0.0
pycountry==24.6.1
multiset==3.1.0
twisted[tls,http2]==24.3.0
twisted[tls,http2]==24.7.0
uWSGI==2.0.26
supervisor==4.2.5
daphne==4.1.2
tabulate==0.9.0
sentry-sdk==2.5.1
sentry-sdk==2.8.0
chardet==5.2.0
python-magic==0.4.27
prettytable==3.10.0
Expand All @@ -103,6 +103,7 @@ numba==0.59.1
ipwhois==1.2.0
fontawesomefree==6.5.1
brotli==1.0.9
zstandard==0.23.0
geoip2==4.8.0
xgboost==2.1.1
scikit-learn==1.5.1
Expand Down
9 changes: 5 additions & 4 deletions src/clist/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,12 @@ def parse_statistic(self, request, queryset):
'with_medals', 'related', 'merging_contests', 'series',
'allow_updating_statistics_for_participants',
'set_matched_coders_to_members']}],
['Timing', {'fields': ['parsed_time', 'wait_for_successful_update_timing',
['Timing', {'fields': ['statistics_update_required', 'parsed_time', 'wait_for_successful_update_timing',
'statistic_timing', 'notification_timing',
'rating_prediction_timing', 'created', 'modified', 'updated']}],
['Rating', {'fields': ['rating_prediction_hash', 'has_fixed_rating_prediction_field',
'rating_prediction_fields']}],
['Problem', {'fields': ['n_problems', 'problem_rating_hash']}],
['Problem', {'fields': ['n_problems', 'problem_rating_hash', 'problem_rating_update_required']}],
['Submission', {'fields': ['has_submissions', 'has_submissions_tests']}],
]
list_display = ['title', 'host', 'start_time', 'url', 'is_rated', 'invisible', 'key', 'standings_url',
Expand Down Expand Up @@ -142,8 +142,9 @@ def queryset(self, request, queryset):
['Account information', {'fields': ['has_accounts_infos_update', 'n_accounts_to_update', 'has_multi_account',
'has_account_verification', 'has_standings_renamed_account',
'skip_for_contests_chart', 'accounts_fields']}],
['Problem information', {'fields': ['has_problem_rating', 'has_problem_update', 'has_new_problems',
'has_upsolving', 'problems_fields', 'problem_rating_predictor']}],
['Problem information', {'fields': ['has_problem_rating', 'has_problem_update', 'has_problem_archive',
'problem_archive_update_time', 'has_upsolving', 'problems_fields',
'problem_rating_predictor']}],
['Statistics information', {'fields': ['statistics_fields']}],
['Other information', {'fields': ['info']}],
]
Expand Down
142 changes: 142 additions & 0 deletions src/clist/management/commands/parse_archive_problems.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
#!/usr/bin/env python3


import operator
from collections import defaultdict
from copy import deepcopy
from datetime import timedelta
from functools import reduce
from logging import getLogger

from django.core.management.base import BaseCommand
from django.db.models import Q
from django.utils import timezone
from tqdm import tqdm

from clist.models import Problem, Resource
from clist.templatetags.extras import canonize
from logify.models import EventLog, EventStatus
from logify.utils import failed_on_exception
from utils.attrdict import AttrDict
from utils.timetools import parse_duration


class Command(BaseCommand):
help = 'Parsing problems infos'

def __init__(self, *args, **kw):
super(Command, self).__init__(*args, **kw)
self.logger = getLogger('clist.parse.archive_problems')

def add_arguments(self, parser):
parser.add_argument('-r', '--resources', metavar='HOST', nargs='*', help='host name for update')
parser.add_argument('-f', '--force', action='store_true', help='force update problems')
parser.add_argument('-l', '--limit', default=None, type=int, help='limit problems for one resource')
parser.add_argument('-d', '--delay', default='1 day', type=str, help='modified field delay')

def handle(self, *args, **options):
self.stdout.write(str(options))
args = AttrDict(options)

if args.resources:
filters = [Q(host__startswith=r) | Q(short_host=r) for r in args.resources]
resources = Resource.objects.filter(reduce(operator.or_, filters))
else:
resources = Resource.available_for_update_objects.filter(has_problem_archive=True)

if args.delay and not args.force:
delay = timezone.now() - parse_duration(args.delay)
resource_filter = Q(problem_archive_update_time__lte=delay) | Q(problem_archive_update_time__isnull=True)
resources = resources.filter(resource_filter)

for resource in resources:
event_log = EventLog.objects.create(name='parse_archive_problems',
related=resource,
status=EventStatus.IN_PROGRESS)
with failed_on_exception(event_log):
counter = defaultdict(int)
now = timezone.now()
archive_problems = resource.plugin.Statistic.get_archive_problems(resource=resource, limit=args.limit)
problem_keys = [archive_problem['key'] for archive_problem in archive_problems]
contest_problems = Problem.objects.filter(resource=resource, key__in=problem_keys)
contest_problems = contest_problems.filter(Q(contest__isnull=False) | Q(contests__isnull=False))
contest_problem_keys = set(contest_problems.values_list('key', flat=True))
problem_time = now
problem_ids = []
for archive_problem in tqdm(archive_problems):
problem_key = archive_problem['key']
tags = archive_problem.get('info', {}).pop('tags', [])

if problem_key in contest_problem_keys:
updated_fields = []
problem = Problem.objects.get(resource=resource, key=problem_key)
info = deepcopy(problem.info)
archive_info = archive_problem.get('info', {})
info.update(archive_info)
if canonize(problem.info) != canonize(info):
problem.info = info
updated_fields.append('info')
counter['updated'] += 1
else:
counter['not_changed'] += 1
for field in 'n_accepted_submissions', 'n_total_submissions':
if field in archive_problem:
setattr(problem, field, archive_problem[field])
updated_fields.append(field)
if updated_fields:
problem.save(update_fields=updated_fields)
if problem.time is not None:
problem_time = problem.time - timedelta(milliseconds=1)
else:
if 'archive_url' not in archive_problem and resource.problem_url:
archive_problem['archive_url'] = resource.problem_url.format(**archive_problem)
archive_problem['is_archive'] = True
problem, created = Problem.objects.update_or_create(resource=resource,
key=problem_key,
defaults=archive_problem)
problem.update_tags(tags, replace=True)
updated_fields = []
if problem.time is None and problem_time:
problem.time = problem_time
updated_fields.append('time')

if updated_fields:
problem.save(update_fields=updated_fields)

if created:
counter['created'] += 1
counter['done'] += 1

problem_ids.append(problem.id)

if resource.problem_rating_predictor and (predictor_model := resource.load_problem_rating_predictor()):
rating_changes = []
problems = resource.problem_set.filter(id__in=problem_ids)
problem_filter = resource.problem_rating_predictor['filter']
if problem_filter:
problems = problems.filter(**problem_filter)
df = resource.problem_rating_predictor_data(problems)
df = df.drop(['rating'], axis=1)
ratings = predictor_model.predict(df)
for problem, rating in zip(problems, ratings):
rating = round(rating)
if problem.rating is not None:
rating_changes.append(problem.rating - rating)
if problem.rating != rating:
problem.rating = rating
problem.save(update_fields=['rating'])
counter['rating'] += 1
else:
counter['rating_not_changed'] += 1
if rating_changes:
n_rating_changes = len(rating_changes)
self.logger.info(f'Rating change = {sum(rating_changes) / n_rating_changes}')
self.logger.info(f'Rating abs change = {sum(map(abs, rating_changes)) / n_rating_changes}')

if not args.force:
resource.problem_archive_update_time = now
resource.save(update_fields=['problem_archive_update_time'])

message = f'counter = {dict(counter)}'
self.logger.info(f'{resource}: {message}')
event_log.update_status(EventStatus.COMPLETED, message=message)
Loading

0 comments on commit d8b5b27

Please sign in to comment.