Reuse server_1_watched history to avoid duplication

Keeps the server_1_watched history that way it does not need to fetch the same results again each time it needs to sync to another server Signed-off-by: Luis Garcia <git@luigi311.com>
luigi311 · Feb 22, 2025 · e2a0950 · e2a0950
1 parent ac5be47
commit e2a0950
Show file tree

Hide file tree

Showing 4 changed files with 199 additions and 16 deletions.
diff --git a/src/jellyfin_emby.py b/src/jellyfin_emby.py
@@ -349,12 +349,19 @@ def get_user_library_watched(
             return {}
 
     def get_watched(
-        self, users: dict[str, str], sync_libraries: list[str]
+        self,
+        users: dict[str, str],
+        sync_libraries: list[str],
+        users_watched: dict[str, UserData] = None,
     ) -> dict[str, UserData]:
         try:
-            users_watched: dict[str, UserData] = {}
+            if not users_watched:
+                users_watched: dict[str, UserData] = {}
 
             for user_name, user_id in users.items():
+                if user_name.lower() not in users_watched:
+                    users_watched[user_name.lower()] = UserData()
+
                 libraries = []
 
                 all_libraries = self.query(f"/Users/{user_id}/Views", "get")
@@ -365,6 +372,12 @@ def get_watched(
                     if library_title not in sync_libraries:
                         continue
 
+                    if library_title in users_watched:
+                        logger.debug(
+                            f"{self.server_type}: {user_name} {library_title} watched history has already been gathered, skipping"
+                        )
+                        continue
+
                     identifiers: dict[str, str] = {
                         "library_id": library_id,
                         "library_title": library_title,
@@ -404,9 +417,6 @@ def get_watched(
                             library_title,
                         )
 
-                        if user_name.lower() not in users_watched:
-                            users_watched[user_name.lower()] = UserData()
-
                         users_watched[user_name.lower()].libraries[library_title] = (
                             library_data
                         )

diff --git a/src/main.py b/src/main.py
@@ -17,6 +17,7 @@
 from src.users import setup_users
 from src.watched import (
     cleanup_watched,
+    merge_server_watched,
 )
 from src.black_white import setup_black_white_lists
 from src.connection import generate_server_connections
@@ -144,6 +145,9 @@ def main_loop():
         if server_1 == servers[-1]:
             break
 
+        # Store a copy of server_1_watched that way it can be used multiple times without having to regather everyones watch history every single time
+        server_1_watched = None
+
         # Start server_2 at the next server in the list
         for server_2 in servers[servers.index(server_1) + 1 :]:
             # Check if server 1 and server 2 are going to be synced in either direction, skip if not
@@ -174,14 +178,16 @@ def main_loop():
             logger.info(f"Server 2 syncing libraries: {server_2_libraries}")
 
             logger.info("Creating watched lists", 1)
-            server_1_watched = server_1.get_watched(server_1_users, server_1_libraries)
+            server_1_watched = server_1.get_watched(
+                server_1_users, server_1_libraries, server_1_watched
+            )
             logger.info("Finished creating watched list server 1")
 
             server_2_watched = server_2.get_watched(server_2_users, server_2_libraries)
             logger.info("Finished creating watched list server 2")
 
-            logger.debug(f"Server 1 watched: {server_1_watched}")
-            logger.debug(f"Server 2 watched: {server_2_watched}")
+            logger.trace(f"Server 1 watched: {server_1_watched}")
+            logger.trace(f"Server 2 watched: {server_2_watched}")
 
             logger.info("Cleaning Server 1 Watched", 1)
             server_1_watched_filtered = cleanup_watched(
@@ -202,6 +208,16 @@ def main_loop():
 
             if should_sync_server(server_2, server_1):
                 logger.info(f"Syncing {server_2.info()} -> {server_1.info()}")
+
+                # Add server_2_watched_filtered to server_1_watched that way the stored version isn't stale for the next server
+                if not dryrun:
+                    server_1_watched = merge_server_watched(
+                        server_1_watched,
+                        server_2_watched_filtered,
+                        user_mapping,
+                        library_mapping,
+                    )
+
                 server_1.update_watched(
                     server_2_watched_filtered,
                     user_mapping,

diff --git a/src/plex.py b/src/plex.py
@@ -366,9 +366,15 @@ def get_user_library_watched(self, user, user_plex, library) -> LibraryData:
             )
             return LibraryData(title=library.title)
 
-    def get_watched(self, users, sync_libraries) -> dict[str, UserData]:
+    def get_watched(
+        self,
+        users: list[MyPlexAccount],
+        sync_libraries: list[str],
+        users_watched: dict[str, UserData] = None,
+    ) -> dict[str, UserData]:
         try:
-            users_watched: dict[str, UserData] = {}
+            if not users_watched:
+                users_watched: dict[str, UserData] = {}
 
             for user in users:
                 if self.admin_user == user:
@@ -386,26 +392,32 @@ def get_watched(self, users, sync_libraries) -> dict[str, UserData]:
                         )
                         continue
 
+                if user.title.lower() not in users_watched:
+                    users_watched[user.title.lower()] = UserData()
+
                 libraries = user_plex.library.sections()
 
                 for library in libraries:
                     if library.title not in sync_libraries:
                         continue
 
+                    if library.title in users_watched[user.title.lower()].libraries:
+                        logger.debug(
+                            f"Plex: {user.title} {library.title} watched history has already been gathered, skipping"
+                        )
+                        continue
+
                     library_data = self.get_user_library_watched(
                         user, user_plex, library
                     )
 
-                    if user.title.lower() not in users_watched:
-                        users_watched[user.title.lower()] = UserData()
-
                     users_watched[user.title.lower()].libraries[library.title] = (
                         library_data
                     )
 
             return users_watched
         except Exception as e:
-            logger.error(f"Plex: Failed to get watched, Error: {e}")
+            logger.error(f"Plex: Failed to get users watched, Error: {e}")
             raise Exception(e)
 
     def update_watched(

diff --git a/src/watched.py b/src/watched.py
@@ -42,6 +42,151 @@ class UserData(BaseModel):
     libraries: dict[str, LibraryData] = {}
 
 
+def merge_mediaitem_data(ep1: MediaItem, ep2: MediaItem) -> MediaItem:
+    """
+    Merge two MediaItem episodes by comparing their watched status.
+    If one is completed while the other isn't, choose the completed one.
+    If both are completed or both are not, choose the one with the higher time.
+    """
+    if ep1.status.completed and not ep2.status.completed:
+        return ep1
+    elif ep2.status.completed and not ep1.status.completed:
+        return ep2
+    return ep1 if ep1.status.time >= ep2.status.time else ep2
+
+
+def merge_series_data(series1: Series, series2: Series) -> Series:
+    """
+    Merge two Series objects by combining their episodes.
+    For duplicate episodes (determined by check_same_identifiers), merge their watched status.
+    """
+    merged_series = copy.deepcopy(series1)
+
+    # For each episode in series2, see if there's a matching episode in the merged series.
+    for ep in series2.episodes:
+        found = False
+        for idx, merged_ep in enumerate(merged_series.episodes):
+            if check_same_identifiers(ep.identifiers, merged_ep.identifiers):
+                # Merge the duplicate episodes.
+                merged_series.episodes[idx] = merge_mediaitem_data(merged_ep, ep)
+                found = True
+                break
+        if not found:
+            merged_series.episodes.append(copy.deepcopy(ep))
+    return merged_series
+
+
+def merge_library_data(lib1: LibraryData, lib2: LibraryData) -> LibraryData:
+    """
+    Merge two LibraryData objects by extending movies and merging series.
+    For series, duplicates are determined using check_same_identifiers.
+    """
+    merged = copy.deepcopy(lib1)
+
+    # Merge movies.
+    for movie in lib2.movies:
+        found = False
+        for idx, merged_movie in enumerate(merged.movies):
+            if check_same_identifiers(movie.identifiers, merged_movie.identifiers):
+                merged.movies[idx] = merge_mediaitem_data(merged_movie, movie)
+                found = True
+                break
+        if not found:
+            merged.movies.append(copy.deepcopy(movie))
+
+    # For series, check for duplicates using check_same_identifiers.
+    for series2 in lib2.series:
+        found = False
+        for idx, series1 in enumerate(merged.series):
+            if check_same_identifiers(series1.identifiers, series2.identifiers):
+                merged.series[idx] = merge_series_data(series1, series2)
+                found = True
+                break
+        if not found:
+            merged.series.append(copy.deepcopy(series2))
+
+    return merged
+
+
+def merge_user_data(user1: UserData, user2: UserData) -> UserData:
+    """
+    Merge two UserData objects by merging their libraries.
+    If a library exists in both, merge its content;
+    otherwise, add the new library.
+    """
+    merged_libraries = copy.deepcopy(user1.libraries)
+    for lib_key, lib_data in user2.libraries.items():
+        if lib_key in merged_libraries:
+            merged_libraries[lib_key] = merge_library_data(
+                merged_libraries[lib_key], lib_data
+            )
+        else:
+            merged_libraries[lib_key] = copy.deepcopy(lib_data)
+    return UserData(libraries=merged_libraries)
+
+
+def merge_server_watched(
+    watched_list_1: dict[str, UserData],
+    watched_list_2: dict[str, UserData],
+    user_mapping: dict[str, str] | None = None,
+    library_mapping: dict[str, str] | None = None,
+) -> dict[str, UserData]:
+    """
+    Merge two dictionaries of UserData while taking into account possible
+    differences in user and library keys via the provided mappings.
+
+    For each user in watched_list_2:
+      - If a matching user exists in watched_list_1 (either directly or via user_mapping),
+        then for each library in that user, merge the library data if a corresponding
+        library exists (via library_mapping) or add the library if it does not.
+      - If no matching user exists, add the entire user from watched_list_2.
+    """
+    merged_watched = copy.deepcopy(watched_list_1)
+
+    for user_2 in watched_list_2:
+        # Map the user key if a mapping is provided.
+        user_other = user_mapping.get(user_2) if user_mapping else None
+        user_1 = (
+            user_other
+            if user_other in watched_list_1
+            else user_2
+            if user_2 in watched_list_1
+            else None
+        )
+
+        if user_1 is None:
+            # Optionally, add the whole new user:
+            merged_watched[user_2] = copy.deepcopy(watched_list_2[user_2])
+            continue
+
+        for library_2_key in watched_list_2[user_2].libraries:
+            library_other = (
+                library_mapping.get(library_2_key) if library_mapping else None
+            )
+            library_1_key = (
+                library_other
+                if library_other in watched_list_1[user_1].libraries
+                else library_2_key
+                if library_2_key in watched_list_1[user_1].libraries
+                else None
+            )
+            if library_1_key is None:
+                # Optionally, add the new library:
+                merged_watched[user_1].libraries[library_2_key] = copy.deepcopy(
+                    watched_list_2[user_2].libraries[library_2_key]
+                )
+                continue
+
+            # Merge the two libraries.
+            merged_lib = merge_library_data(
+                watched_list_1[user_1].libraries[library_1_key],
+                watched_list_2[user_2].libraries[library_2_key],
+            )
+            merged_watched[user_1].libraries[library_1_key] = merged_lib
+
+    return merged_watched
+
+
 def check_same_identifiers(item1: MediaIdentifiers, item2: MediaIdentifiers) -> bool:
     # Check for duplicate based on file locations:
     if item1.locations and item2.locations:
@@ -103,8 +248,8 @@ def check_remove_entry(item1: MediaItem, item2: MediaItem) -> bool:
 def cleanup_watched(
     watched_list_1: dict[str, UserData],
     watched_list_2: dict[str, UserData],
-    user_mapping=None,
-    library_mapping=None,
+    user_mapping: dict[str, str] | None = None,
+    library_mapping: dict[str, str] | None = None,
 ) -> dict[str, UserData]:
     modified_watched_list_1 = copy.deepcopy(watched_list_1)