diff --git a/.env.sentry.template b/.env.sentry.template index 720db38a..adc82df0 100644 --- a/.env.sentry.template +++ b/.env.sentry.template @@ -11,3 +11,4 @@ SENTRY_CRON_MONITOR_CHECKING_LOGS= SENTRY_CRON_MONITOR_REINDEX= SENTRY_CRON_MONITOR_SET_ACCOUNT_RANK= SENTRY_CRON_MONITOR_UPDATE_AUTO_RATING= +SENTRY_CRON_MONITOR_PARSE_ARCHIVE_PROBLEMS= diff --git a/Dockerfile b/Dockerfile index f91c8cd0..b31fe275 100644 --- a/Dockerfile +++ b/Dockerfile @@ -36,6 +36,9 @@ RUN wget https://github.com/stunnel/static-curl/releases/download/8.6.0-1/curl-l mv /tmp/curl /usr/local/bin/curl && \ rm /tmp/curl.tar.xz +# psql +RUN apt install -y postgresql-client + RUN apt update --fix-missing ENV APPDIR=/usr/src/clist @@ -45,7 +48,7 @@ WORKDIR $APPDIR FROM base as dev ENV DJANGO_ENV_FILE .env.dev RUN apt install -y redis-server -CMD sh -c 'redis-server --daemonize yes; scripts/watchdog.bash "python manage.py rqworker" "*.py"; python manage.py runserver 0.0.0.0:10042' +CMD sh -c 'redis-server --daemonize yes; scripts/watchdog.bash "python manage.py rqworker system default" "*.py"; python manage.py runserver 0.0.0.0:10042' COPY config/ipython_config.py . RUN ipython profile create @@ -71,6 +74,7 @@ RUN mkdir /run/daphne COPY config/redis.conf /etc/redis/redis.conf COPY config/supervisord.conf /etc/supervisord.conf + CMD supervisord -c /etc/supervisord.conf @@ -82,7 +86,50 @@ COPY config/loggly/60-loggly.conf /etc/rsyslog.d/60-loggly.conf ENTRYPOINT /entrypoint.sh -FROM nginx:alpine as nginx +FROM nginx:stable-alpine as nginx +# logrotate RUN apk add --no-cache logrotate COPY config/nginx/logrotate.d/nginx /etc/logrotate.d/nginx RUN chmod 0644 /etc/logrotate.d/nginx +# cron +RUN apk add --no-cache logrotate dcron +COPY config/nginx/cron /etc/cron.d/nginx +RUN chmod 0644 /etc/cron.d/nginx +RUN crontab /etc/cron.d/nginx + +CMD crond && nginx -g "daemon off;" + + +FROM postgres:14.3-alpine as postgres +# pg_repack +RUN apk add --no-cache --virtual .build-deps \ + gcc \ + g++ \ + make \ + musl-dev \ + postgresql-dev \ + git \ + lz4-dev \ + zlib-dev \ + bash \ + util-linux \ + gawk \ + && cd /tmp \ + && git clone https://github.com/reorg/pg_repack.git \ + && cd pg_repack \ + && make \ + && make install \ + && apk del .build-deps \ + && rm -rf /tmp/pg_repack +# numfmt +RUN apk add --no-cache coreutils +# cron +RUN apk add --no-cache dcron +COPY config/postgres/cron /etc/cron.d/postgres +RUN chmod 0644 /etc/cron.d/postgres +RUN crontab /etc/cron.d/postgres +# supervisord +RUN apk add --no-cache supervisor +COPY config/postgres/supervisord.conf /etc/supervisord.conf + +CMD supervisord -c /etc/supervisord.conf diff --git a/config/cron b/config/cron index 0e4d0218..a15cc986 100644 --- a/config/cron +++ b/config/cron @@ -1,18 +1,18 @@ PATH=/usr/local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin DJANGO_ENV_FILE=.env.prod -20,35,55 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_CALENDAR_UPDATE /usr/src/clist/run-manage.bash update_google_calendars -*/1 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_CREATING_NOTIFICATIONS /usr/src/clist/run-manage.bash notification_to_task -*/1 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_SENDING_NOTIFICATIONS /usr/src/clist/run-manage.bash sendout_tasks -*/1 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_PARSING_STATISTICS /usr/src/clist/run-manage.bash parse_statistic -*/3 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_PARSING_ACCOUNTS /usr/src/clist/run-manage.bash parse_accounts_infos -30 * * * * /usr/src/clist/run-manage.bash parse_archive_problems -*/1 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_CHECKING_LOGS /usr/src/clist/run-manage.bash check_logs -*/15 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_SET_ACCOUNT_RANK /usr/src/clist/run-manage.bash set_account_rank -*/20 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_SET_COUNTRY_RANK /usr/src/clist/run-manage.bash set_country_rank -15 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_UPDATE_AUTO_RATING /usr/src/clist/run-manage.bash update_auto_rating +20,35,55 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_CALENDAR_UPDATE /usr/src/clist/run-manage.bash update_google_calendars +*/1 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_CREATING_NOTIFICATIONS /usr/src/clist/run-manage.bash notification_to_task +*/1 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_SENDING_NOTIFICATIONS /usr/src/clist/run-manage.bash sendout_tasks +*/1 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_PARSING_STATISTICS /usr/src/clist/run-manage.bash parse_statistic +*/1 * * * * /usr/src/clist/run-manage.bash parse_live_statistics +*/3 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_PARSING_ACCOUNTS /usr/src/clist/run-manage.bash parse_accounts_infos +30 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_PARSE_ARCHIVE_PROBLEMS /usr/src/clist/run-manage.bash parse_archive_problems +*/1 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_CHECKING_LOGS /usr/src/clist/run-manage.bash check_logs +*/15 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_SET_ACCOUNT_RANK /usr/src/clist/run-manage.bash set_account_rank +*/20 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_SET_COUNTRY_RANK /usr/src/clist/run-manage.bash set_country_rank +15 * * * * env MONITOR_NAME=SENTRY_CRON_MONITOR_UPDATE_AUTO_RATING /usr/src/clist/run-manage.bash update_auto_rating -# 32 1 * * wed env MONITOR_NAME=SENTRY_CRON_MONITOR_REINDEX /usr/src/clist/run-manage.bash reindex # # 58 3 14-20 * * [ "$(date '+\%u')" -eq 4 ] && cd $PROJECT_DIR && run-one ./manage.py runscript calculate_account_contests >logs/command/calculate_account_contests.log 2>&1 # 58 4 * * 4 cd $PROJECT_DIR && run-one ./manage.py runscript calculate_coder_n_accounts_and_coder_n_contests >logs/command/calculate_coder_n_accounts_and_coder_n_contests.log 2>&1 # 58 5 * * 4 cd $PROJECT_DIR && run-one ./manage.py runscript calculate_resource_contests >logs/command/calculate_resource_contests.log 2>&1 diff --git a/config/loggly/60-loggly.conf b/config/loggly/60-loggly.conf index 0a27780e..431a6fa8 100644 --- a/config/loggly/60-loggly.conf +++ b/config/loggly/60-loggly.conf @@ -1,14 +1,14 @@ $ModLoad imfile $InputFilePollInterval 5 -$InputFileName /logs/nginx/clist-prod-access.log +$InputFileName /logs/nginx/nginx/clist-prod-access.log $InputFileTag nginx-prod-access: $InputFileStateFile /logs/loggly/nginx-prod-access $InputFileSeverity info $InputFileReadMode 0 $InputRunFileMonitor -$InputFileName /logs/nginx/clist-prod-error.log +$InputFileName /logs/nginx/nginx/clist-prod-error.log $InputFileTag nginx-prod-error: $InputFileStateFile /logs/loggly/nginx-prod-error $InputFileSeverity error diff --git a/config/nginx/cron b/config/nginx/cron new file mode 100644 index 00000000..2ce92484 --- /dev/null +++ b/config/nginx/cron @@ -0,0 +1,2 @@ +0 */6 * * * /usr/sbin/logrotate /etc/logrotate.conf >/var/log/logrotate.log 2>&1 +0 0 */9 * * nginx -s reload >/var/log/nginx-reload.log 2>&1 diff --git a/config/postgres/cron b/config/postgres/cron new file mode 100644 index 00000000..e604aede --- /dev/null +++ b/config/postgres/cron @@ -0,0 +1 @@ +5 0 * * thu /usr/src/clist/scripts/repack-database.bash >/var/log/repack_database.log 2>&1 diff --git a/config/postgres/supervisord.conf b/config/postgres/supervisord.conf new file mode 100644 index 00000000..f8a8cb6a --- /dev/null +++ b/config/postgres/supervisord.conf @@ -0,0 +1,28 @@ +[supervisord] +logfile=/var/log/supervisord.log +loglevel=info +nodaemon=true +logfile_maxbytes=1MB +logfile_backups=5 + +[program:postgres] +command=/usr/local/bin/docker-entrypoint.sh postgres -c max_connections=50 -c checkpoint_timeout=60min -c track_activity_query_size=4096 -c shared_buffers=1GB -c effective_cache_size=3GB -c work_mem=64MB -c maintenance_work_mem=500MB +autostart=true +autorestart=true +stdout_logfile=/var/log/postgres_stdout.log +stderr_logfile=/var/log/postgres_stderr.log +stdout_logfile_maxbytes=10MB +stderr_logfile_maxbytes=10MB +stdout_logfile_backups=5 +stderr_logfile_backups=5 + +[program:cron] +command=/usr/sbin/crond -f +autostart=true +autorestart=true +stdout_logfile=/var/log/cron_stdout.log +stderr_logfile=/var/log/cron_stderr.log +stdout_logfile_maxbytes=10MB +stderr_logfile_maxbytes=10MB +stdout_logfile_backups=5 +stderr_logfile_backups=5 diff --git a/config/supervisord.conf b/config/supervisord.conf index 99476e98..e54cdbc4 100644 --- a/config/supervisord.conf +++ b/config/supervisord.conf @@ -49,9 +49,9 @@ stdout_logfile=logs/redis.log stderr_logfile=logs/redis.err [program:rqworker] -command=python manage.py rqworker +command=python manage.py rqworker system default directory=/usr/src/clist/ -numprocs=3 +numprocs=4 process_name=%(program_name)s%(process_num)d user=root autostart=true diff --git a/docker-compose.yml b/docker-compose.yml index 8ce3aeb6..ec8d2eb3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,7 +6,8 @@ services: context: . target: prod networks: - - clist_network + clist_network: + ipv4_address: 172.42.0.102 volumes: - static_files:/usr/src/clist/staticfiles/ - media_files:/usr/src/clist/mediafiles/ @@ -25,7 +26,8 @@ services: context: . target: dev networks: - - clist_network + clist_network: + ipv4_address: 172.42.0.103 volumes: - ./src/:/usr/src/clist/ - ./legacy/api/:/usr/src/clist/legacy/api/ @@ -50,9 +52,11 @@ services: legacy: build: ./legacy networks: - - clist_network + clist_network: + ipv4_address: 172.42.0.104 volumes: - ./legacy:/usr/src/legacy/ + - shared_files:/sharedfiles/ depends_on: - db secrets: @@ -71,22 +75,17 @@ services: - db restart: unless-stopped db: - image: postgres:14.3-alpine + build: + context: . + target: postgres networks: - clist_network volumes: - postgres_data:/var/lib/postgresql/data/ + - ./src/scripts/:/usr/src/clist/scripts/ + - ./logs/postgres/:/var/log/ env_file: - ./.env.db - command: > - postgres - -c max_connections=50 - -c checkpoint_timeout=60min - -c track_activity_query_size=4096 - -c shared_buffers=1GB - -c effective_cache_size=3GB - -c work_mem=64MB - -c maintenance_work_mem=500MB shm_size: 4GB ports: - ${CLIST_DB_PORT:-5432}:5432 @@ -96,7 +95,8 @@ services: context: . target: nginx networks: - - clist_network + clist_network: + ipv4_address: 172.42.0.101 volumes: - static_files:/staticfiles/ - media_files:/mediafiles/ @@ -104,11 +104,10 @@ services: - ./config/nginx/conf.d:/etc/nginx/conf.d/ - certbot_www:/var/www/certbot/ - certbot_conf:/etc/letsencrypt/ - - ./logs/nginx:/var/log/nginx/ + - ./logs/nginx:/var/log/ ports: - 80:80 - 443:443 - command: "/bin/sh -c 'while :; do logrotate /etc/logrotate.conf; sleep 6h & wait $${!}; nginx -s reload; done & nginx -g \"daemon off;\"'" restart: unless-stopped certbot: image: certbot/certbot:latest @@ -166,6 +165,9 @@ networks: driver_opts: com.docker.network.bridge.name: br-clist name: clist + ipam: + config: + - subnet: 172.42.0.0/24 secrets: db_conf: diff --git a/legacy/helper.php b/legacy/helper.php index 53691a82..b2ba5a34 100755 --- a/legacy/helper.php +++ b/legacy/helper.php @@ -184,6 +184,12 @@ function curlexec(&$url, $postfields = NULL, $params = array()) curl_setopt($CID, CURLOPT_HEADER, true); } + if (isset($params["no_body"])) { + curl_setopt($CID, CURLOPT_NOBODY, true); + } else { + curl_setopt($CID, CURLOPT_NOBODY, false); + } + $cachefile = CACHEDIR . "/" . parse_url($url, PHP_URL_HOST) . "-" . md5(preg_replace("#/?timeMin=[^&]*#", "", $url)) . ".html"; if ($postfields !== NULL) { @@ -217,7 +223,7 @@ function curlexec(&$url, $postfields = NULL, $params = array()) } else { $page = curl_exec($CID); } - if (preg_match('#charset=["\']?([-a-z0-9]+)#i', $page, $match)) + if (preg_match('#charset=["\']?([-a-z0-9]+)#i', $page, $match) && !isset($params['no_convert_charset'])) { $charset = $match[1]; if (!preg_match('#^utf.*8$#i', $charset)) @@ -686,4 +692,47 @@ function pop_item(&$array, $path, $default = null) { unset($last_result[$key]); return $result; } + + function parsed_table($table_html) { + $dom = new DOMDocument(); + $dom->loadHTML($table_html); + + $cols = $dom->getElementsByTagName('th'); + $header = array(); + foreach ($cols as $col) { + $header[] = slugify($col->nodeValue); + } + + $rows = $dom->getElementsByTagName('tr'); + $data = array(); + foreach ($rows as $row) { + $cols = $row->getElementsByTagName('td'); + if ($cols->length == 0) { + continue; + } + if ($cols->length != count($header)) { + continue; + } + $headered_cols = array_combine($header, iterator_to_array($cols)); + $row_data = array(); + foreach ($headered_cols as $field => $col) { + $row_data[$field] = trim($col->nodeValue); + $a = $col->getElementsByTagName('a'); + if ($a->length > 0) { + $row_data[$field . ':url'] = $a[0]->getAttribute('href'); + } + } + $data[] = $row_data; + } + return $data; + } + + function current_season_year() { + $year = date('Y'); + $month = date('n'); + if ($month <= 8) { + return $year - 1; + } + return $year; + } ?> diff --git a/legacy/module/acmp.ru/index.php b/legacy/module/acmp.ru/index.php index 8716abe2..033dd6f9 100644 --- a/legacy/module/acmp.ru/index.php +++ b/legacy/module/acmp.ru/index.php @@ -14,7 +14,10 @@ $url = $change_url; $page = curlexec($url, "period=$year", array('http_header' => array("Content-Type: application/x-www-form-urlencoded", "Referer: $referer"))); - preg_match('#