Update sync_status_readme.py

IntensiveCoLearning · Feb 13, 2025 · 10ef497 · 10ef497
1 parent 0753293
commit 10ef497
Showing 1 changed file with 160 additions and 119 deletions.
diff --git a/sync_status_readme.py b/sync_status_readme.py
@@ -8,19 +8,21 @@
 
 # Constants
 START_DATE = datetime.fromisoformat(os.environ.get(
-    'START_DATE', '2024-06-24T00:00:00+00:00')).replace(tzinfo=pytz.UTC)
+    'START_DATE', '2025-01-06T00:00:00+00:00')).replace(tzinfo=pytz.UTC)
 END_DATE = datetime.fromisoformat(os.environ.get(
-    'END_DATE', '2024-07-14T23:59:59+00:00')).replace(tzinfo=pytz.UTC)
+    'END_DATE', '2025-01-26T23:59:59+00:00')).replace(tzinfo=pytz.UTC)
 DEFAULT_TIMEZONE = 'Asia/Shanghai'
-FILE_SUFFIX = os.environ.get('FILE_SUFFIX', '.md')
+FILE_SUFFIX = '.md'
 README_FILE = 'README.md'
-FIELD_NAME = os.environ.get('FIELD_NAME', 'Name')
+FIELD_NAME = 'Name'
 Content_START_MARKER = "<!-- Content_START -->"
 Content_END_MARKER = "<!-- Content_END -->"
 TABLE_START_MARKER = "<!-- START_COMMIT_TABLE -->"
 TABLE_END_MARKER = "<!-- END_COMMIT_TABLE -->"
 GITHUB_REPOSITORY_OWNER = os.environ.get('GITHUB_REPOSITORY_OWNER')
 GITHUB_REPOSITORY = os.environ.get('GITHUB_REPOSITORY')
+STATS_START_MARKER = "<!-- STATISTICALDATA_START -->"
+STATS_END_MARKER = "<!-- STATISTICALDATA_END -->"
 
 # Configure logging
 logging.basicConfig(level=logging.INFO,
@@ -70,13 +72,27 @@ def get_date_range():
 
 
 def get_user_timezone(file_content):
+    """
+    Extracts the timezone from the file content, supporting IANA timezone names
+    (e.g., 'Asia/Shanghai') and UTC offsets (e.g., 'UTC+8').
+    If no valid timezone is found, defaults to DEFAULT_TIMEZONE.
+    """
     yaml_match = re.search(r'---\s*\ntimezone:\s*(\S+)\s*\n---', file_content)
     if yaml_match:
+        timezone_str = yaml_match.group(1)
         try:
-            return pytz.timezone(yaml_match.group(1))
+            # Attempt to interpret as a named timezone (e.g., "Asia/Shanghai")
+            return pytz.timezone(timezone_str)
         except pytz.exceptions.UnknownTimeZoneError:
-            logging.warning(
-                f"Unknown timezone: {yaml_match.group(1)}. Using default {DEFAULT_TIMEZONE}.")
+            # If named timezone fails, attempt to interpret as a UTC offset
+            try:
+                # Convert UTC offset string to a fixed offset timezone
+                offset = int(timezone_str[3:])  # Extract the offset value
+                return pytz.FixedOffset(offset * 60)  # Offset in minutes
+            except ValueError:
+                logging.warning(
+                    f"Invalid timezone format: {timezone_str}. Using default {DEFAULT_TIMEZONE}.")
+                return pytz.timezone(DEFAULT_TIMEZONE)
     return pytz.timezone(DEFAULT_TIMEZONE)
 
 
@@ -91,13 +107,25 @@ def extract_content_between_markers(file_content):
 
 def find_date_in_content(content, local_date):
     date_patterns = [
+        r'#\s*' + local_date.strftime("%Y.%m.%d"),
+        r'##\s*' + local_date.strftime("%Y.%m.%d"),
         r'###\s*' + local_date.strftime("%Y.%m.%d"),
+        r'#\s*' + local_date.strftime("%Y.%m.%d").replace('.0', '.'),
+        r'##\s*' + local_date.strftime("%Y.%m.%d").replace('.0', '.'),
         r'###\s*' + local_date.strftime("%Y.%m.%d").replace('.0', '.'),
+        r'#\s*' + local_date.strftime("%m.%d").lstrip('0').replace('.0', '.'),
+        r'##\s*' + local_date.strftime("%m.%d").lstrip('0').replace('.0', '.'),
         r'###\s*' +
         local_date.strftime("%m.%d").lstrip('0').replace('.0', '.'),
+        r'#\s*' + local_date.strftime("%Y/%m/%d"),
+        r'##\s*' + local_date.strftime("%Y/%m/%d"),
         r'###\s*' + local_date.strftime("%Y/%m/%d"),
+        r'#\s*' + local_date.strftime("%m/%d").lstrip('0').replace('/0', '/'),
+        r'##\s*' + local_date.strftime("%m/%d").lstrip('0').replace('/0', '/'),
         r'###\s*' +
         local_date.strftime("%m/%d").lstrip('0').replace('/0', '/'),
+        r'#\s*' + local_date.strftime("%m.%d").zfill(5),
+        r'##\s*' + local_date.strftime("%m.%d").zfill(5),
         r'###\s*' + local_date.strftime("%m.%d").zfill(5)
     ]
     combined_pattern = '|'.join(date_patterns)
@@ -200,10 +228,25 @@ def check_weekly_status(user_status, date, user_tz):
 def get_all_user_files():
     exclude_prefixes = ('template', 'readme')
     return [f[:-len(FILE_SUFFIX)] for f in os.listdir('.')
-            if f.lower().endswith(FILE_SUFFIX.lower()) 
+            if f.lower().endswith(FILE_SUFFIX.lower())
             and not f.lower().startswith(exclude_prefixes)]
 
 
+def extract_name_from_row(row):
+    """
+    Extracts the username from a table row, handling Markdown links.
+    """
+    match = re.match(r'\|\s*\[([^\]]+)\]\([^)]+\)\s*\|', row)
+    if match:
+        return match.group(1).strip()  # Extract the name from the link
+    else:
+        # If not a Markdown link, return the content before the first "|"
+        parts = row.split('|')
+        if len(parts) > 1:
+            return parts[1].strip()
+        return None
+
+
 def update_readme(content):
     try:
         start_index = content.find(TABLE_START_MARKER)
@@ -227,21 +270,16 @@ def update_readme(content):
                              len(TABLE_START_MARKER):end_index].strip().split('\n')[2:]
 
         for row in table_rows:
-            match = re.match(r'\|\s*([^|]+)\s*\|', row)
-            if match:
-                display_name = match.group(1).strip()
-                if display_name:  # 检查 display_name 是否为非空
-                    existing_users.add(display_name)
-                    new_table.append(generate_user_row(display_name))
-                else:
-                    logging.warning(
-                        f"Skipping empty display name in row: {row}")
+            user_name = extract_name_from_row(row)
+            if user_name:
+                existing_users.add(user_name)
+                new_table.append(generate_user_row(user_name))
             else:
                 logging.warning(f"Skipping invalid row: {row}")
 
         new_users = set(get_all_user_files()) - existing_users
         for user in new_users:
-            if user.strip():  # 确保用户名不是空的或只包含空格
+            if user.strip():
                 new_table.append(generate_user_row(user))
                 logging.info(f"Added new user: {user}")
             else:
@@ -255,14 +293,31 @@ def update_readme(content):
 
 def generate_user_row(user):
     user_status = get_user_study_status(user)
-    with open(f"{user}{FILE_SUFFIX}", 'r', encoding='utf-8') as file:
-        file_content = file.read()
-    user_tz = get_user_timezone(file_content)
-    new_row = f"| {user} |"
+    owner, repo = get_repo_info()
+    if owner and repo:
+        repo_url = f"https://github.com/{owner}/{repo}/blob/main/{user}{FILE_SUFFIX}"
+    else:
+        # Fallback to local if repo info is unavailable
+        repo_url = f"{user}{FILE_SUFFIX}"
+    # 修改这里，将用户名替换为markdown链接
+    user_link = f"[{user}]({repo_url})"
+    new_row = f"| {user_link} |"
     is_eliminated = False
     absent_count = 0
     current_week = None
 
+    file_name_to_open = f"{user}{FILE_SUFFIX}"
+
+    try:
+        with open(file_name_to_open, 'r', encoding='utf-8') as file:
+            file_content = file.read()
+    except FileNotFoundError as e:
+        logging.error(f"Error: Could not find file {file_name_to_open}")
+        # 返回一个包含 "⭕️" 的默认行或者采取其他错误处理措施
+        return "| " + user_link + " | " + " ⭕️ |" * len(get_date_range()) + "\n"
+
+    user_tz = get_user_timezone(file_content)
+
     user_current_day = datetime.now(user_tz).replace(
         hour=0, minute=0, second=0, microsecond=0)
     for date in get_date_range():
@@ -337,112 +392,98 @@ def get_fork_count():
 
 
 def calculate_statistics(content):
-    start_index = content.find(TABLE_START_MARKER)
-    end_index = content.find(TABLE_END_MARKER)
+    start_index = content.find(STATS_START_MARKER)
+    end_index = content.find(STATS_END_MARKER)
+
     if start_index == -1 or end_index == -1:
-        logging.error("Error: Couldn't find the table markers in README.md")
+        logging.error("Error: Couldn't find the stats markers in README.md")
         return None
 
-    table_content = content[start_index +
-                            len(TABLE_START_MARKER):end_index].strip()
-    rows = table_content.split('\n')[2:]  # Skip header and separator rows
-
-    total_participants = len(rows)
-    eliminated_participants = 0
-    completed_participants = 0
-    perfect_attendance_users = []
-    completed_users = []
-
-    for row in rows:
-        user_name = row.split('|')[1].strip()
-        # Exclude first and last empty elements
-        statuses = [status.strip() for status in row.split('|')[2:-1]]
-
-        if '❌' in statuses:
-            eliminated_participants += 1
-        elif all(status == '✅' for status in statuses):
-            completed_participants += 1
-            completed_users.append(user_name)
-            perfect_attendance_users.append(user_name)
-        elif all(status in ['✅', '⭕️', ' '] for status in statuses):
-            completed_participants += 1
-            completed_users.append(user_name)
-
-    elimination_rate = (eliminated_participants /
-                        total_participants) * 100 if total_participants > 0 else 0
-    fork_count = get_fork_count()
-
-    return {
-        'total_participants': total_participants,
-        'completed_participants': completed_participants,
-        'eliminated_participants': eliminated_participants,
-        'elimination_rate': elimination_rate,
-        'fork_count': fork_count,
-        'perfect_attendance_users': perfect_attendance_users,
-        'completed_users': completed_users
+    stats_content = content[start_index +
+                            len(STATS_START_MARKER):end_index].strip()
+
+    # Initialize variables to store statistics
+    stats = {
+        "total_participants": 0,
+        "eliminated_participants": 0,
+        "completed_participants": 0,
+        "perfect_attendance_users": [],
+        "completed_users": [],
+        "fork_count": 0
     }
 
+    # Use regular expressions to extract the data.  Handle missing data gracefully.
+    total_match = re.search(r"- 总参与人数:\s*(\d+)", stats_content)
+    if total_match:
+        stats["total_participants"] = int(total_match.group(1))
+
+    completed_match = re.search(r"- 完成人数:\s*(\d+)", stats_content)
+    if completed_match:
+        stats["completed_participants"] = int(completed_match.group(1))
+
+    completed_users_match = re.search(r"- 完成用户:\s*([\w\s,]+)", stats_content)
+    if completed_users_match:
+        stats["completed_users"] = [x.strip()
+                                    for x in completed_users_match.group(1).split(',') if x.strip()]
+
+    perfect_attendance_users_match = re.search(
+        r"- 全勤用户:\s*([\w\s,]+)", stats_content)
+    if perfect_attendance_users_match:
+        stats["perfect_attendance_users"] = [
+            x.strip() for x in perfect_attendance_users_match.group(1).split(',') if x.strip()]
+
+    eliminated_match = re.search(r"- 淘汰人数:\s*(\d+)", stats_content)
+    if eliminated_match:
+        stats["eliminated_participants"] = int(eliminated_match.group(1))
+
+    fork_count_match = re.search(r"- Fork人数:\s*(\d+)", stats_content)
+    if fork_count_match:
+        stats["fork_count"] = int(fork_count_match.group(1))
+
+    return stats
+
+
+def update_statistics(content, stats):
+    start_index = content.find(STATS_START_MARKER)
+    end_index = content.find(STATS_END_MARKER)
+
+    if start_index == -1 or end_index == -1:
+        logging.error("Error: Couldn't find the stats markers in README.md")
+        return content
+
+    stats_text = f"""{STATS_START_MARKER}
+## 统计数据
+
+- 总参与人数: {stats["total_participants"]}
+- 完成人数: {stats["completed_participants"]}
+- 完成用户: {', '.join(stats['completed_users'])}
+- 全勤用户: {', '.join(stats['perfect_attendance_users'])}
+- 淘汰人数: {stats["eliminated_participants"]}
+- 淘汰率: {stats["total_participants"] and stats["eliminated_participants"] / stats["total_participants"]:.2%}
+- Fork人数: {stats["fork_count"]}
+{STATS_END_MARKER}"""
+
+    return content[:start_index] + stats_text + content[end_index + len(STATS_END_MARKER):]
+
+
 def main():
     try:
-        print_variables(
-            'START_DATE', 'END_DATE', 'DEFAULT_TIMEZONE',
-            GITHUB_REPOSITORY_OWNER=GITHUB_REPOSITORY,
-            GITHUB_REPOSITORY=GITHUB_REPOSITORY,
-            FILE_SUFFIX=FILE_SUFFIX,
-            README_FILE=README_FILE,
-            FIELD_NAME=FIELD_NAME,
-            Content_START_MARKER=Content_START_MARKER,
-            Content_END_MARKER=Content_END_MARKER,
-            TABLE_START_MARKER=TABLE_START_MARKER,
-            TABLE_END_MARKER=TABLE_END_MARKER
-        )
         with open(README_FILE, 'r', encoding='utf-8') as file:
             content = file.read()
-        new_content = update_readme(content)
-        current_date = datetime.now(pytz.UTC)
-        if current_date > END_DATE:
-            stats = calculate_statistics(new_content)
-            if stats:
-                stats_content = f"\n\n## 统计数据\n\n"
-                stats_content += f"- 总参与人数: {stats['total_participants']}\n"
-                stats_content += f"- 完成人数: {stats['completed_participants']}\n"
-                stats_content += f"- 完成用户: {', '.join(stats['completed_users'])}\n"
-                stats_content += f"- 全勤用户: {', '.join(stats['perfect_attendance_users'])}\n"
-                stats_content += f"- 淘汰人数: {stats['eliminated_participants']}\n"
-                stats_content += f"- 淘汰率: {stats['elimination_rate']:.2f}%\n"
-                stats_content += f"- Fork人数: {stats['fork_count']}\n"
-            # 将统计数据添加到文件末尾
-            # 在<!-- END_COMMIT_TABLE -->标记后插入统计数据
-                stats_start = new_content.find(
-                    "<!-- STATISTICALDATA_START -->")
-                stats_end = new_content.find("<!-- STATISTICALDATA_END -->")
-
-                if stats_start != -1 and stats_end != -1:
-                    # Replace existing statistical data
-                    new_content = new_content[:stats_start] + "<!-- STATISTICALDATA_START -->\n" + stats_content + \
-                        "<!-- STATISTICALDATA_END -->" + \
-                        new_content[stats_end +
-                                    len("<!-- STATISTICALDATA_END -->"):]
-                else:
-                    # Add new statistical data after <!-- END_COMMIT_TABLE -->
-                    end_table_marker = "<!-- END_COMMIT_TABLE -->"
-                    end_table_index = new_content.find(end_table_marker)
-                    if end_table_index != -1:
-                        insert_position = end_table_index + \
-                            len(end_table_marker)
-                        new_content = new_content[:insert_position] + "\n\n<!-- STATISTICALDATA_START -->\n" + \
-                            stats_content + "<!-- STATISTICALDATA_END -->" + \
-                            new_content[insert_position:]
-                    else:
-                        logging.warning(
-                            "<!-- END_COMMIT_TABLE --> marker not found. Appending stats to the end.")
-                        new_content += "\n\n<!-- STATISTICALDATA_START -->\n" + \
-                            stats_content + "<!-- STATISTICALDATA_END -->"
-        with open(README_FILE, 'w', encoding='utf-8') as file:
-            file.write(new_content)
-        logging.info("README.md has been successfully updated.")
-    except Exception as e:
-        logging.error(f"An error occurred in main function: {str(e)}")
+    except FileNotFoundError:
+        logging.error(f"Error: Could not find file {README_FILE}")
+        return
+
+    content = update_readme(content)
+    stats = calculate_statistics(content)
+
+    if stats:
+        content = update_statistics(content, stats)
+
+    with open(README_FILE, 'w', encoding='utf-8') as file:
+        file.write(content)
+
+    logging.info(f"Successfully updated {README_FILE}")
 
 
 if __name__ == "__main__":