fix: Add logs and continue sumarizer on error

arslanashraf7 · arslanashraf7 · commit 0caed6833b1d · 2025-04-30T20:47:47.000+05:00
diff --git a/docker-compose.services.yml b/docker-compose.services.yml
@@ -101,6 +101,7 @@ services:
     environment:
       - KEYCLOAK_ADMIN=${KEYCLOAK_SVC_ADMIN:-admin}
       - KEYCLOAK_ADMIN_PASSWORD=${KEYCLOAK_SVC_ADMIN_PASSWORD:-admin}
+      - "_JAVA_OPTIONS=${JAVA_OPTIONS:-}" # Load _JAVA_OPTIONS from env, fallback to empty string
     networks:
       default:
         aliases:
diff --git a/learning_resources/content_summarizer.py b/learning_resources/content_summarizer.py
@@ -7,6 +7,10 @@
 from langchain_community.chat_models import ChatLiteLLM
 from typing_extensions import TypedDict
 
+from learning_resources.exceptions import (
+    FlashcardsGenerationError,
+    SummaryGenerationError,
+)
 from learning_resources.models import (
     ContentFile,
     ContentSummarizerConfiguration,
@@ -134,21 +138,26 @@ def summarize_content_files_by_ids(
         Returns:
             - None
         """
+        status_messages = []
         for content_file_id in content_file_ids:
-            self.summarize_single_content_file(content_file_id, overwrite=overwrite)
+            status_msg = self.summarize_single_content_file(
+                content_file_id, overwrite=overwrite
+            )
+            status_messages.append(status_msg)
+        return status_messages
 
     def summarize_single_content_file(
         self,
         content_file_id: int,
         overwrite,
-    ) -> None:
+    ) -> tuple[bool, str]:
         """Process a single content file
         Args:
             - content_file_id (int): Id of the content file to process
             - overwrite (bool): Whether to overwrite existing summary and flashcards
 
         Returns:
-            - None
+            - str: A string message indicating the status of the summarization
         """
         try:
             with transaction.atomic():
@@ -175,10 +184,20 @@ def summarize_single_content_file(
 
                     if updated:
                         content_file.save()
+                    return f"Content file summarization succeeded for CONTENT_FILE_ID: {content_file_id}"  # noqa: E501
+                return f"Content file summarization skipped for CONTENT_FILE_ID: {content_file_id}"  # noqa: E501
+
+        except SummaryGenerationError as exc:
+            return f"Content file summary generation failed for CONTENT_FILE_ID: {content_file_id}\nError: {exc.args[0]}\n\n"  # noqa: E501
 
-        except Exception:
+        except FlashcardsGenerationError as exc:
+            return f"Content file flashcards generation failed for CONTENT_FILE_ID: {content_file_id}\nError: {exc.args[0]}\n\n"  # noqa: E501
+        except Exception as exc:
             logger.exception("Error processing content: %d", content_file.id)
-            raise
+            return (
+                False,
+                f"Content file summarization failed for CONTENT_FILE_ID: {content_file_id}\nError: {exc.args[0]}\n\n",  # noqa: E501
+            )
 
     def _get_llm(self, model=None, temperature=0.0, max_tokens=1000) -> ChatLiteLLM:
         """Get the ChatLiteLLM instance"""
@@ -216,13 +235,14 @@ def _generate_summary(self, content: str, llm_model: str) -> str:
             generated_summary = response.content
             logger.info("Generated summary: %s", generated_summary)
 
-        except Exception:
+        except Exception as exc:
             logger.exception(
                 "An error occurred while generating summary using model: %s", llm_model
             )
-            raise
+            raise SummaryGenerationError(exc) from exc
+
         else:
-            return generated_summary
+            return True, generated_summary
 
     def _generate_flashcards(
         self, content: str, llm_model: str
@@ -243,12 +263,12 @@ def _generate_flashcards(
             )
             generated_flashcards = response.get("flashcards")
             logger.info("Generated flashcards: %s", generated_flashcards)
-
-        except Exception:
+        except Exception as exc:
             logger.exception(
                 "An error occurred while generating flashcards using model: %s",
                 llm_model,
             )
-            raise
+            raise FlashcardsGenerationError(exc) from exc
+
         else:
             return generated_flashcards
diff --git a/learning_resources/exceptions.py b/learning_resources/exceptions.py
@@ -13,3 +13,11 @@ class PostHogAuthenticationError(Exception):
 
 class PostHogQueryError(Exception):
     """Raised if the PostHog query API returns a non-authentication error."""
+
+
+class SummaryGenerationError(Exception):
+    """Raised if the summary generation fails for a content file."""
+
+
+class FlashcardsGenerationError(Exception):
+    """Raised if the flashcards generation fails for a content file."""
diff --git a/learning_resources/management/commands/generate_summary_flashcards.py b/learning_resources/management/commands/generate_summary_flashcards.py
@@ -1,5 +1,7 @@
 """Management command to run the content summarizer"""
 
+import itertools
+
 from django.conf import settings
 from django.core.management import BaseCommand
 
@@ -91,9 +93,16 @@ def handle(self, *args, **options):  # noqa: ARG002
             self.stdout.write("Waiting on task...")
 
             start = now_in_utc()
-            summarizer_task.get()
+            results = summarizer_task.get()
+
+            # Log the summarization stats
+            flat_results = list(itertools.chain(*results))
+            for result in flat_results:
+                self.stdout.write(f"{result}")
 
             total_seconds = (now_in_utc() - start).total_seconds()
             self.stdout.write(
-                f"Content file summarizer finished, took {total_seconds} seconds"
+                self.style.SUCCESS(
+                    f"Content file summarizer finished, took {total_seconds} seconds"
+                )
             )
diff --git a/learning_resources/tasks.py b/learning_resources/tasks.py
@@ -424,7 +424,7 @@ def summarize_content_files_task(
         - None
     """
     summarizer = ContentSummarizer()
-    summarizer.summarize_content_files_by_ids(content_file_ids, overwrite)
+    return summarizer.summarize_content_files_by_ids(content_file_ids, overwrite)
 
 
 @app.task(bind=True, acks_late=True)