Skip to content

Commit

Permalink
added subscriptions
Browse files Browse the repository at this point in the history
added forms
live parse statistics
repack database
parsing fixes
minor improvements
  • Loading branch information
aropan committed Nov 17, 2024
1 parent cb201e4 commit 9ae2d34
Show file tree
Hide file tree
Showing 129 changed files with 4,987 additions and 1,863 deletions.
1 change: 1 addition & 0 deletions .env.sentry.template
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ SENTRY_CRON_MONITOR_CHECKING_LOGS=
SENTRY_CRON_MONITOR_REINDEX=
SENTRY_CRON_MONITOR_SET_ACCOUNT_RANK=
SENTRY_CRON_MONITOR_UPDATE_AUTO_RATING=
SENTRY_CRON_MONITOR_PARSE_ARCHIVE_PROBLEMS=
51 changes: 49 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ RUN wget https://github.com/stunnel/static-curl/releases/download/8.6.0-1/curl-l
mv /tmp/curl /usr/local/bin/curl && \
rm /tmp/curl.tar.xz

# psql
RUN apt install -y postgresql-client

RUN apt update --fix-missing

ENV APPDIR=/usr/src/clist
Expand All @@ -45,7 +48,7 @@ WORKDIR $APPDIR
FROM base as dev
ENV DJANGO_ENV_FILE .env.dev
RUN apt install -y redis-server
CMD sh -c 'redis-server --daemonize yes; scripts/watchdog.bash "python manage.py rqworker" "*.py"; python manage.py runserver 0.0.0.0:10042'
CMD sh -c 'redis-server --daemonize yes; scripts/watchdog.bash "python manage.py rqworker system default" "*.py"; python manage.py runserver 0.0.0.0:10042'

COPY config/ipython_config.py .
RUN ipython profile create
Expand All @@ -71,6 +74,7 @@ RUN mkdir /run/daphne
COPY config/redis.conf /etc/redis/redis.conf

COPY config/supervisord.conf /etc/supervisord.conf

CMD supervisord -c /etc/supervisord.conf


Expand All @@ -82,7 +86,50 @@ COPY config/loggly/60-loggly.conf /etc/rsyslog.d/60-loggly.conf
ENTRYPOINT /entrypoint.sh


FROM nginx:alpine as nginx
FROM nginx:stable-alpine as nginx
# logrotate
RUN apk add --no-cache logrotate
COPY config/nginx/logrotate.d/nginx /etc/logrotate.d/nginx
RUN chmod 0644 /etc/logrotate.d/nginx
# cron
RUN apk add --no-cache logrotate dcron
COPY config/nginx/cron /etc/cron.d/nginx
RUN chmod 0644 /etc/cron.d/nginx
RUN crontab /etc/cron.d/nginx

CMD crond && nginx -g "daemon off;"


FROM postgres:14.3-alpine as postgres
# pg_repack
RUN apk add --no-cache --virtual .build-deps \
gcc \
g++ \
make \
musl-dev \
postgresql-dev \
git \
lz4-dev \
zlib-dev \
bash \
util-linux \
gawk \
&& cd /tmp \
&& git clone https://github.com/reorg/pg_repack.git \
&& cd pg_repack \
&& make \
&& make install \
&& apk del .build-deps \
&& rm -rf /tmp/pg_repack
# numfmt
RUN apk add --no-cache coreutils
# cron
RUN apk add --no-cache dcron
COPY config/postgres/cron /etc/cron.d/postgres
RUN chmod 0644 /etc/cron.d/postgres
RUN crontab /etc/cron.d/postgres
# supervisord
RUN apk add --no-cache supervisor
COPY config/postgres/supervisord.conf /etc/supervisord.conf

CMD supervisord -c /etc/supervisord.conf
22 changes: 11 additions & 11 deletions config/cron
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
PATH=/usr/local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
DJANGO_ENV_FILE=.env.prod

20,35,55 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_CALENDAR_UPDATE /usr/src/clist/run-manage.bash update_google_calendars
*/1 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_CREATING_NOTIFICATIONS /usr/src/clist/run-manage.bash notification_to_task
*/1 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_SENDING_NOTIFICATIONS /usr/src/clist/run-manage.bash sendout_tasks
*/1 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_PARSING_STATISTICS /usr/src/clist/run-manage.bash parse_statistic
*/3 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_PARSING_ACCOUNTS /usr/src/clist/run-manage.bash parse_accounts_infos
30 * * * * /usr/src/clist/run-manage.bash parse_archive_problems
*/1 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_CHECKING_LOGS /usr/src/clist/run-manage.bash check_logs
*/15 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_SET_ACCOUNT_RANK /usr/src/clist/run-manage.bash set_account_rank
*/20 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_SET_COUNTRY_RANK /usr/src/clist/run-manage.bash set_country_rank
15 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_UPDATE_AUTO_RATING /usr/src/clist/run-manage.bash update_auto_rating
20,35,55 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_CALENDAR_UPDATE /usr/src/clist/run-manage.bash update_google_calendars
*/1 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_CREATING_NOTIFICATIONS /usr/src/clist/run-manage.bash notification_to_task
*/1 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_SENDING_NOTIFICATIONS /usr/src/clist/run-manage.bash sendout_tasks
*/1 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_PARSING_STATISTICS /usr/src/clist/run-manage.bash parse_statistic
*/1 * * * * /usr/src/clist/run-manage.bash parse_live_statistics
*/3 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_PARSING_ACCOUNTS /usr/src/clist/run-manage.bash parse_accounts_infos
30 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_PARSE_ARCHIVE_PROBLEMS /usr/src/clist/run-manage.bash parse_archive_problems
*/1 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_CHECKING_LOGS /usr/src/clist/run-manage.bash check_logs
*/15 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_SET_ACCOUNT_RANK /usr/src/clist/run-manage.bash set_account_rank
*/20 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_SET_COUNTRY_RANK /usr/src/clist/run-manage.bash set_country_rank
15 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_UPDATE_AUTO_RATING /usr/src/clist/run-manage.bash update_auto_rating

# 32 1 * * wed env MONITOR_NAME=SENTRY_CRON_MONITOR_REINDEX /usr/src/clist/run-manage.bash reindex
# # 58 3 14-20 * * [ "$(date '+\%u')" -eq 4 ] && cd $PROJECT_DIR && run-one ./manage.py runscript calculate_account_contests >logs/command/calculate_account_contests.log 2>&1
# 58 4 * * 4 cd $PROJECT_DIR && run-one ./manage.py runscript calculate_coder_n_accounts_and_coder_n_contests >logs/command/calculate_coder_n_accounts_and_coder_n_contests.log 2>&1
# 58 5 * * 4 cd $PROJECT_DIR && run-one ./manage.py runscript calculate_resource_contests >logs/command/calculate_resource_contests.log 2>&1
4 changes: 2 additions & 2 deletions config/loggly/60-loggly.conf
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
$ModLoad imfile
$InputFilePollInterval 5

$InputFileName /logs/nginx/clist-prod-access.log
$InputFileName /logs/nginx/nginx/clist-prod-access.log
$InputFileTag nginx-prod-access:
$InputFileStateFile /logs/loggly/nginx-prod-access
$InputFileSeverity info
$InputFileReadMode 0
$InputRunFileMonitor

$InputFileName /logs/nginx/clist-prod-error.log
$InputFileName /logs/nginx/nginx/clist-prod-error.log
$InputFileTag nginx-prod-error:
$InputFileStateFile /logs/loggly/nginx-prod-error
$InputFileSeverity error
Expand Down
2 changes: 2 additions & 0 deletions config/nginx/cron
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
0 */6 * * * /usr/sbin/logrotate /etc/logrotate.conf >/var/log/logrotate.log 2>&1
0 0 */9 * * nginx -s reload >/var/log/nginx-reload.log 2>&1
1 change: 1 addition & 0 deletions config/postgres/cron
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
5 0 * * thu /usr/src/clist/scripts/repack-database.bash >/var/log/repack_database.log 2>&1
28 changes: 28 additions & 0 deletions config/postgres/supervisord.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
[supervisord]
logfile=/var/log/supervisord.log
loglevel=info
nodaemon=true
logfile_maxbytes=1MB
logfile_backups=5

[program:postgres]
command=/usr/local/bin/docker-entrypoint.sh postgres -c max_connections=50 -c checkpoint_timeout=60min -c track_activity_query_size=4096 -c shared_buffers=1GB -c effective_cache_size=3GB -c work_mem=64MB -c maintenance_work_mem=500MB
autostart=true
autorestart=true
stdout_logfile=/var/log/postgres_stdout.log
stderr_logfile=/var/log/postgres_stderr.log
stdout_logfile_maxbytes=10MB
stderr_logfile_maxbytes=10MB
stdout_logfile_backups=5
stderr_logfile_backups=5

[program:cron]
command=/usr/sbin/crond -f
autostart=true
autorestart=true
stdout_logfile=/var/log/cron_stdout.log
stderr_logfile=/var/log/cron_stderr.log
stdout_logfile_maxbytes=10MB
stderr_logfile_maxbytes=10MB
stdout_logfile_backups=5
stderr_logfile_backups=5
4 changes: 2 additions & 2 deletions config/supervisord.conf
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ stdout_logfile=logs/redis.log
stderr_logfile=logs/redis.err

[program:rqworker]
command=python manage.py rqworker
command=python manage.py rqworker system default
directory=/usr/src/clist/
numprocs=3
numprocs=4
process_name=%(program_name)s%(process_num)d
user=root
autostart=true
Expand Down
34 changes: 18 additions & 16 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ services:
context: .
target: prod
networks:
- clist_network
clist_network:
ipv4_address: 172.42.0.102
volumes:
- static_files:/usr/src/clist/staticfiles/
- media_files:/usr/src/clist/mediafiles/
Expand All @@ -25,7 +26,8 @@ services:
context: .
target: dev
networks:
- clist_network
clist_network:
ipv4_address: 172.42.0.103
volumes:
- ./src/:/usr/src/clist/
- ./legacy/api/:/usr/src/clist/legacy/api/
Expand All @@ -50,9 +52,11 @@ services:
legacy:
build: ./legacy
networks:
- clist_network
clist_network:
ipv4_address: 172.42.0.104
volumes:
- ./legacy:/usr/src/legacy/
- shared_files:/sharedfiles/
depends_on:
- db
secrets:
Expand All @@ -71,22 +75,17 @@ services:
- db
restart: unless-stopped
db:
image: postgres:14.3-alpine
build:
context: .
target: postgres
networks:
- clist_network
volumes:
- postgres_data:/var/lib/postgresql/data/
- ./src/scripts/:/usr/src/clist/scripts/
- ./logs/postgres/:/var/log/
env_file:
- ./.env.db
command: >
postgres
-c max_connections=50
-c checkpoint_timeout=60min
-c track_activity_query_size=4096
-c shared_buffers=1GB
-c effective_cache_size=3GB
-c work_mem=64MB
-c maintenance_work_mem=500MB
shm_size: 4GB
ports:
- ${CLIST_DB_PORT:-5432}:5432
Expand All @@ -96,19 +95,19 @@ services:
context: .
target: nginx
networks:
- clist_network
clist_network:
ipv4_address: 172.42.0.101
volumes:
- static_files:/staticfiles/
- media_files:/mediafiles/
- ./legacy:/usr/src/legacy/
- ./config/nginx/conf.d:/etc/nginx/conf.d/
- certbot_www:/var/www/certbot/
- certbot_conf:/etc/letsencrypt/
- ./logs/nginx:/var/log/nginx/
- ./logs/nginx:/var/log/
ports:
- 80:80
- 443:443
command: "/bin/sh -c 'while :; do logrotate /etc/logrotate.conf; sleep 6h & wait $${!}; nginx -s reload; done & nginx -g \"daemon off;\"'"
restart: unless-stopped
certbot:
image: certbot/certbot:latest
Expand Down Expand Up @@ -166,6 +165,9 @@ networks:
driver_opts:
com.docker.network.bridge.name: br-clist
name: clist
ipam:
config:
- subnet: 172.42.0.0/24

secrets:
db_conf:
Expand Down
51 changes: 50 additions & 1 deletion legacy/helper.php
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,12 @@ function curlexec(&$url, $postfields = NULL, $params = array())
curl_setopt($CID, CURLOPT_HEADER, true);
}

if (isset($params["no_body"])) {
curl_setopt($CID, CURLOPT_NOBODY, true);
} else {
curl_setopt($CID, CURLOPT_NOBODY, false);
}

$cachefile = CACHEDIR . "/" . parse_url($url, PHP_URL_HOST) . "-" . md5(preg_replace("#/?timeMin=[^&]*#", "", $url)) . ".html";
if ($postfields !== NULL)
{
Expand Down Expand Up @@ -217,7 +223,7 @@ function curlexec(&$url, $postfields = NULL, $params = array())
} else {
$page = curl_exec($CID);
}
if (preg_match('#charset=["\']?([-a-z0-9]+)#i', $page, $match))
if (preg_match('#charset=["\']?([-a-z0-9]+)#i', $page, $match) && !isset($params['no_convert_charset']))
{
$charset = $match[1];
if (!preg_match('#^utf.*8$#i', $charset))
Expand Down Expand Up @@ -686,4 +692,47 @@ function pop_item(&$array, $path, $default = null) {
unset($last_result[$key]);
return $result;
}

function parsed_table($table_html) {
$dom = new DOMDocument();
$dom->loadHTML($table_html);

$cols = $dom->getElementsByTagName('th');
$header = array();
foreach ($cols as $col) {
$header[] = slugify($col->nodeValue);
}

$rows = $dom->getElementsByTagName('tr');
$data = array();
foreach ($rows as $row) {
$cols = $row->getElementsByTagName('td');
if ($cols->length == 0) {
continue;
}
if ($cols->length != count($header)) {
continue;
}
$headered_cols = array_combine($header, iterator_to_array($cols));
$row_data = array();
foreach ($headered_cols as $field => $col) {
$row_data[$field] = trim($col->nodeValue);
$a = $col->getElementsByTagName('a');
if ($a->length > 0) {
$row_data[$field . ':url'] = $a[0]->getAttribute('href');
}
}
$data[] = $row_data;
}
return $data;
}

function current_season_year() {
$year = date('Y');
$month = date('n');
if ($month <= 8) {
return $year - 1;
}
return $year;
}
?>
28 changes: 23 additions & 5 deletions legacy/module/acmp.ru/index.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
$url = $change_url;
$page = curlexec($url, "period=$year", array('http_header' => array("Content-Type: application/x-www-form-urlencoded", "Referer: $referer")));

preg_match('#<h1>(?P<title>[^<]*)</h1>#', $page, $match);
if (!preg_match('#<h1>(?P<title>[^<]*)</h1>#', $page, $match)) {
trigger_error("Can't find title", E_USER_WARNING);
break;
}
if (strpos($match['title'], "$year") === false) {
break;
}
Expand All @@ -32,14 +35,29 @@
$page = str_replace("&nbsp;", " ", $page);
$page = replace_russian_moths_to_number($page);

preg_match('#<h1>Содержание олимпиады "(?P<title>.*?)"</h1>#', $page, $m);
if (!preg_match('#<h1>Содержание олимпиады "(?P<title>.*?)"</h1>#', $page, $m)) {
trigger_error("Can't find title", E_USER_WARNING);
continue;
}
$title = $m['title'];
preg_match('#<b[^>]*>Начало олимпиады:</b>(?P<start_time>[^<]*)<#', $page, $m);

if (!preg_match('#<b[^>]*>Начало олимпиады:</b>(?P<start_time>[^<]*)<#', $page, $m)) {
trigger_error("Can't find start time", E_USER_WARNING);
continue;
}
$start_time = $m['start_time'];
$start_time = preg_replace('#\s*г\.\s*#', ' ', $start_time);
preg_match('#<b[^>]*>Продолжительность:</b>(?P<duration>[^<]*)<#', $page, $m);

if (!preg_match('#<b[^>]*>Продолжительность:</b>(?P<duration>[^<]*)<#', $page, $m)) {
trigger_error("Can't find duration", E_USER_WARNING);
continue;
}
$duration = $m['duration'];
preg_match('#id_stage=(?P<id>[0-9]+)#', $url, $m);

if (!preg_match('#id_stage=(?P<id>[0-9]+)#', $url, $m)) {
trigger_error("Can't find id", E_USER_WARNING);
continue;
}
$key = $m['id'];

$contests[] = array(
Expand Down
Loading

0 comments on commit 9ae2d34

Please sign in to comment.