diff --git a/backend/platform_settings_v2/migrations/__init__.py b/backend/platform_settings_v2/migrations/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/backend/platform_settings_v2/models.py b/backend/platform_settings_v2/models.py
index e69de29bb2..ff2f13f858 100644
--- a/backend/platform_settings_v2/models.py
+++ b/backend/platform_settings_v2/models.py
@@ -0,0 +1,73 @@
+import uuid
+
+from adapter_processor_v2.models import AdapterInstance
+from django.db import models
+from utils.models.base_model import BaseModel
+from utils.models.organization_mixin import (
+    DefaultOrganizationManagerMixin,
+    DefaultOrganizationMixin,
+)
+
+
+class PlatformSettingsModelManager(DefaultOrganizationManagerMixin, models.Manager):
+    """Manager for PlatformSettings model."""
+
+    pass
+
+
+class PlatformSettings(DefaultOrganizationMixin, BaseModel):
+    """Platform-level settings for an organization.
+
+    This model stores organization-wide settings including the system LLM
+    adapter that will be used for platform operations like vibe extractor
+    prompt generation.
+    """
+
+    id = models.UUIDField(
+        primary_key=True,
+        default=uuid.uuid4,
+        editable=False,
+        db_comment="Unique identifier for the platform settings",
+    )
+
+    # System LLM for platform operations (e.g., vibe extractor, prompt generation)
+    system_llm_adapter = models.ForeignKey(
+        AdapterInstance,
+        on_delete=models.SET_NULL,
+        null=True,
+        blank=True,
+        related_name="platform_system_llm",
+        db_comment="System LLM adapter used for platform-level AI operations like prompt generation",
+    )
+
+    objects = PlatformSettingsModelManager()
+
+    class Meta:
+        verbose_name = "Platform Setting"
+        verbose_name_plural = "Platform Settings"
+        db_table = "platform_settings"
+        constraints = [
+            models.UniqueConstraint(
+                fields=["organization"],
+                name="unique_organization_platform_settings",
+            ),
+        ]
+
+    def __str__(self) -> str:
+        return f"PlatformSettings({self.organization})"
+
+    @classmethod
+    def get_for_organization(cls, organization):
+        """Get or create platform settings for an organization.
+
+        Args:
+            organization: Organization instance
+
+        Returns:
+            PlatformSettings instance
+        """
+        settings, created = cls.objects.get_or_create(
+            organization=organization,
+            defaults={},
+        )
+        return settings
diff --git a/backend/platform_settings_v2/serializers.py b/backend/platform_settings_v2/serializers.py
index 24bd93ec5c..3212b4d550 100644
--- a/backend/platform_settings_v2/serializers.py
+++ b/backend/platform_settings_v2/serializers.py
@@ -1,7 +1,16 @@
 from account_v2.models import PlatformKey
+from adapter_processor_v2.models import AdapterInstance
 from rest_framework import serializers
+from rest_framework.exceptions import ValidationError
 
 from backend.serializers import AuditSerializer
+from platform_settings_v2.models import PlatformSettings
+from unstract.flags.feature_flag import check_feature_flag_status
+
+if check_feature_flag_status("sdk1"):
+    from unstract.sdk1.adapters.enums import AdapterTypes
+else:
+    from unstract.sdk.adapters.enums import AdapterTypes
 
 
 class PlatformKeySerializer(AuditSerializer):
@@ -22,3 +31,50 @@ class PlatformKeyIDSerializer(serializers.Serializer):
     key_name = serializers.CharField()
     key = serializers.CharField()
     is_active = serializers.BooleanField()
+
+
+class PlatformSettingsSerializer(AuditSerializer):
+    """Serializer for PlatformSettings model."""
+
+    system_llm_adapter = serializers.PrimaryKeyRelatedField(
+        queryset=AdapterInstance.objects.all(),
+        required=False,
+        allow_null=True,
+    )
+
+    class Meta:
+        model = PlatformSettings
+        fields = [
+            "id",
+            "organization",
+            "system_llm_adapter",
+            "created_at",
+            "modified_at",
+        ]
+        read_only_fields = ["id", "organization", "created_at", "modified_at"]
+
+    def validate_system_llm_adapter(self, value):
+        """Validate that the adapter type is LLM and is accessible to the user."""
+        if value is None:
+            return value
+
+        # Check if user has access to this adapter
+        request = self.context.get("request")
+        if request and hasattr(request, "user"):
+            try:
+                adapter = AdapterInstance.objects.for_user(request.user).get(id=value.id)
+                # Validate that the adapter type is LLM
+                if adapter.adapter_type != AdapterTypes.LLM.value:
+                    raise ValidationError("Only LLM adapters are allowed for system LLM")
+
+                # Validate that adapter is usable and active
+                if not adapter.is_usable:
+                    raise ValidationError("Selected LLM adapter is not usable")
+
+                if not adapter.is_active:
+                    raise ValidationError("Selected LLM adapter is not active")
+
+            except AdapterInstance.DoesNotExist:
+                raise ValidationError("Selected LLM adapter not found or not accessible")
+
+        return value
diff --git a/backend/platform_settings_v2/urls.py b/backend/platform_settings_v2/urls.py
index feb1f5cc1e..ccad3f935c 100644
--- a/backend/platform_settings_v2/urls.py
+++ b/backend/platform_settings_v2/urls.py
@@ -1,7 +1,7 @@
 from django.urls import path
 from rest_framework.urlpatterns import format_suffix_patterns
 
-from .views import PlatformKeyViewSet
+from .views import PlatformKeyViewSet, PlatformSettingsViewSet
 
 platform_key_list = PlatformKeyViewSet.as_view(
     {"post": "create", "put": "refresh", "get": "list"}
@@ -10,6 +10,12 @@
     {"put": "toggle_platform_key", "delete": "destroy"}
 )
 
+platform_settings_view = PlatformSettingsViewSet.as_view(
+    {"get": "list", "put": "update", "patch": "update"}
+)
+
+platform_settings_system_llm = PlatformSettingsViewSet.as_view({"get": "system_llm"})
+
 urlpatterns = format_suffix_patterns(
     [
         path(
@@ -22,5 +28,15 @@
             platform_key_update,
             name="update_platform_key",
         ),
+        path(
+            "settings/",
+            platform_settings_view,
+            name="platform_settings",
+        ),
+        path(
+            "settings/system-llm/",
+            platform_settings_system_llm,
+            name="platform_settings_system_llm",
+        ),
     ]
 )
diff --git a/backend/platform_settings_v2/views.py b/backend/platform_settings_v2/views.py
index 41610b60c5..1674f8e51c 100644
--- a/backend/platform_settings_v2/views.py
+++ b/backend/platform_settings_v2/views.py
@@ -5,17 +5,20 @@
 
 from account_v2.models import Organization, PlatformKey
 from rest_framework import status, viewsets
+from rest_framework.decorators import action
 from rest_framework.request import Request
 from rest_framework.response import Response
 from utils.user_context import UserContext
 
 from platform_settings_v2.constants import PlatformServiceConstants
+from platform_settings_v2.models import PlatformSettings
 from platform_settings_v2.platform_auth_helper import PlatformAuthHelper
 from platform_settings_v2.platform_auth_service import PlatformAuthenticationService
 from platform_settings_v2.serializers import (
     PlatformKeyGenerateSerializer,
     PlatformKeyIDSerializer,
     PlatformKeySerializer,
+    PlatformSettingsSerializer,
 )
 
 logger = logging.getLogger(__name__)
@@ -123,3 +126,79 @@ def create(self, request: Request) -> Response:
             status=status.HTTP_201_CREATED,
             data=serialized_data,
         )
+
+
+class PlatformSettingsViewSet(viewsets.ModelViewSet):
+    """ViewSet for managing platform settings."""
+
+    serializer_class = PlatformSettingsSerializer
+
+    def get_queryset(self):
+        """Get platform settings for the user's organization."""
+        organization = UserContext.get_organization()
+        return PlatformSettings.objects.filter(organization=organization)
+
+    def get_object(self):
+        """Get or create platform settings for the user's organization."""
+        organization = UserContext.get_organization()
+        settings, created = PlatformSettings.objects.get_or_create(
+            organization=organization
+        )
+        return settings
+
+    def list(
+        self, request: Request, *args: tuple[Any], **kwargs: dict[str, Any]
+    ) -> Response:
+        """List platform settings for the organization."""
+        settings = self.get_object()
+        serializer = self.get_serializer(settings)
+        return Response(serializer.data)
+
+    def retrieve(
+        self, request: Request, *args: tuple[Any], **kwargs: dict[str, Any]
+    ) -> Response:
+        """Retrieve platform settings for the organization."""
+        settings = self.get_object()
+        serializer = self.get_serializer(settings)
+        return Response(serializer.data)
+
+    def update(
+        self, request: Request, *args: tuple[Any], **kwargs: dict[str, Any]
+    ) -> Response:
+        """Update platform settings."""
+        settings = self.get_object()
+        serializer = self.get_serializer(
+            settings, data=request.data, partial=True, context={"request": request}
+        )
+        serializer.is_valid(raise_exception=True)
+        serializer.save()
+        return Response(serializer.data)
+
+    @action(detail=False, methods=["get"])
+    def system_llm(self, request: Request) -> Response:
+        """Get the configured system LLM adapter for the organization.
+
+        Returns:
+            Response with system LLM adapter details or null if not configured
+        """
+        settings = self.get_object()
+        if settings.system_llm_adapter:
+            from adapter_processor_v2.serializers import AdapterInstanceSerializer
+
+            adapter_serializer = AdapterInstanceSerializer(settings.system_llm_adapter)
+            return Response(
+                {
+                    "system_llm_adapter": adapter_serializer.data,
+                    "is_configured": True,
+                },
+                status=status.HTTP_200_OK,
+            )
+        else:
+            return Response(
+                {
+                    "system_llm_adapter": None,
+                    "is_configured": False,
+                    "message": "No system LLM adapter configured for this organization",
+                },
+                status=status.HTTP_200_OK,
+            )
diff --git a/backend/prompt_studio/prompt_studio_vibe_extractor_v2/__init__.py b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/backend/prompt_studio/prompt_studio_vibe_extractor_v2/admin.py b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/admin.py
new file mode 100644
index 0000000000..b083210e1c
--- /dev/null
+++ b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/admin.py
@@ -0,0 +1,30 @@
+from django.contrib import admin
+
+from prompt_studio.prompt_studio_vibe_extractor_v2.models import (
+    VibeExtractorProject,
+)
+
+
+@admin.register(VibeExtractorProject)
+class VibeExtractorProjectAdmin(admin.ModelAdmin):
+    """Admin interface for VibeExtractorProject."""
+
+    list_display = [
+        "project_id",
+        "document_type",
+        "status",
+        "tool_id",
+        "created_at",
+        "modified_at",
+    ]
+    list_filter = ["status", "created_at"]
+    search_fields = ["document_type", "project_id"]
+    readonly_fields = [
+        "project_id",
+        "generation_output_path",
+        "generation_progress",
+        "created_by",
+        "modified_by",
+        "created_at",
+        "modified_at",
+    ]
diff --git a/backend/prompt_studio/prompt_studio_vibe_extractor_v2/apps.py b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/apps.py
new file mode 100644
index 0000000000..dc03bc3a2a
--- /dev/null
+++ b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/apps.py
@@ -0,0 +1,7 @@
+from django.apps import AppConfig
+
+
+class PromptStudioVibeExtractorV2Config(AppConfig):
+    default_auto_field = "django.db.models.BigAutoField"
+    name = "prompt_studio.prompt_studio_vibe_extractor_v2"
+    verbose_name = "Prompt Studio Vibe Extractor V2"
diff --git a/backend/prompt_studio/prompt_studio_vibe_extractor_v2/constants.py b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/constants.py
new file mode 100644
index 0000000000..1fd5b19fee
--- /dev/null
+++ b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/constants.py
@@ -0,0 +1,44 @@
+"""Constants for Vibe Extractor."""
+
+
+class VibeExtractorKeys:
+    """Keys for Vibe Extractor API requests and responses."""
+
+    PROJECT_ID = "project_id"
+    DOCUMENT_TYPE = "document_type"
+    STATUS = "status"
+    GENERATION_OUTPUT_PATH = "generation_output_path"
+    ERROR_MESSAGE = "error_message"
+    GENERATION_PROGRESS = "generation_progress"
+    TOOL_ID = "tool_id"
+
+
+class VibeExtractorFileNames:
+    """File names for generated files."""
+
+    METADATA_YAML = "metadata.yaml"
+    EXTRACTION_YAML = "extraction.yaml"
+    PAGE_EXTRACTION_SYSTEM_MD = "page-extraction-system.md"
+    PAGE_EXTRACTION_USER_MD = "page-extraction-user.md"
+    SCALARS_EXTRACTION_SYSTEM_MD = "extraction-scalars-system.md"
+    SCALARS_EXTRACTION_USER_MD = "extraction-scalars-user.md"
+    TABLES_EXTRACTION_SYSTEM_MD = "extraction-table-system.md"
+    TABLES_EXTRACTION_USER_MD = "extraction-table-user.md"
+
+
+class VibeExtractorPaths:
+    """Path constants for Vibe Extractor."""
+
+    PROMPTS_DIR = "prompts"
+    STAGING_DIR = "staging"
+    REFERENCE_DIR = "reference"
+
+
+class GenerationSteps:
+    """Steps in the generation process."""
+
+    METADATA = "metadata"
+    EXTRACTION_FIELDS = "extraction_fields"
+    PAGE_EXTRACTION_PROMPTS = "page_extraction_prompts"
+    SCALARS_EXTRACTION_PROMPTS = "scalars_extraction_prompts"
+    TABLES_EXTRACTION_PROMPTS = "tables_extraction_prompts"
diff --git a/backend/prompt_studio/prompt_studio_vibe_extractor_v2/exceptions.py b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/exceptions.py
new file mode 100644
index 0000000000..f521afc1fd
--- /dev/null
+++ b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/exceptions.py
@@ -0,0 +1,31 @@
+"""Exceptions for Vibe Extractor."""
+
+
+class VibeExtractorError(Exception):
+    """Base exception for Vibe Extractor errors."""
+
+    pass
+
+
+class ProjectNotFoundError(VibeExtractorError):
+    """Raised when a project is not found."""
+
+    pass
+
+
+class GenerationError(VibeExtractorError):
+    """Raised when generation fails."""
+
+    pass
+
+
+class FileReadError(VibeExtractorError):
+    """Raised when reading a generated file fails."""
+
+    pass
+
+
+class InvalidDocumentTypeError(VibeExtractorError):
+    """Raised when document type is invalid."""
+
+    pass
diff --git a/backend/prompt_studio/prompt_studio_vibe_extractor_v2/migrations/__init__.py b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/migrations/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/backend/prompt_studio/prompt_studio_vibe_extractor_v2/models.py b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/models.py
new file mode 100644
index 0000000000..452e065bb4
--- /dev/null
+++ b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/models.py
@@ -0,0 +1,65 @@
+import uuid
+
+from account_v2.models import User
+from adapter_processor_v2.models import AdapterInstance
+from django.db import models
+from utils.models.base_model import BaseModel
+
+from prompt_studio.prompt_studio_core_v2.models import CustomTool
+
+
+class VibeExtractorProject(BaseModel):
+    """Model to store Vibe Extractor project metadata.
+
+    This stores the document type and tracks the generation process.
+    All generated content (metadata.yaml, extraction.yaml, prompts)
+    will be stored as files in the repository.
+    """
+
+    id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
+    document_type = models.TextField(
+        blank=False,
+        db_comment="Document type name (e.g., invoice, receipt)",
+    )
+    llm_adapter = models.ForeignKey(
+        AdapterInstance,
+        on_delete=models.SET_NULL,
+        related_name="vibe_extractor_projects_llm",
+        null=True,
+        blank=True,
+        db_comment="LLM adapter used for generation (from platform system LLM)",
+    )
+    tool_id = models.ForeignKey(
+        CustomTool,
+        on_delete=models.SET_NULL,
+        related_name="vibe_extractor_projects",
+        null=True,
+        blank=True,
+        db_comment="Associated custom tool",
+    )
+    created_by = models.ForeignKey(
+        User,
+        on_delete=models.SET_NULL,
+        related_name="vibe_extractor_projects_created",
+        null=True,
+        blank=True,
+        editable=False,
+    )
+    modified_by = models.ForeignKey(
+        User,
+        on_delete=models.SET_NULL,
+        related_name="vibe_extractor_projects_modified",
+        null=True,
+        blank=True,
+        editable=False,
+    )
+
+    class Meta:
+        verbose_name = "Vibe Extractor Project"
+        verbose_name_plural = "Vibe Extractor Projects"
+        db_table = "vibe_extractor_project"
+        indexes = [
+            models.Index(fields=["document_type"]),
+            models.Index(fields=["status"]),
+            models.Index(fields=["tool_id"]),
+        ]
diff --git a/backend/prompt_studio/prompt_studio_vibe_extractor_v2/serializers.py b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/serializers.py
new file mode 100644
index 0000000000..548b724e3b
--- /dev/null
+++ b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/serializers.py
@@ -0,0 +1,137 @@
+from rest_framework import serializers
+
+from prompt_studio.prompt_studio_vibe_extractor_v2.models import (
+    VibeExtractorProject,
+)
+
+
+class VibeExtractorProjectSerializer(serializers.ModelSerializer):
+    """Serializer for VibeExtractorProject model."""
+
+    class Meta:
+        model = VibeExtractorProject
+        fields = [
+            "id",
+            "document_type",
+            "llm_adapter",
+            "tool_id",
+            "created_by",
+            "modified_by",
+            "created_at",
+            "modified_at",
+        ]
+        read_only_fields = [
+            "id",
+            "created_by",
+            "modified_by",
+            "created_at",
+            "modified_at",
+        ]
+
+
+class VibeExtractorProjectCreateSerializer(serializers.Serializer):
+    """Serializer for creating a new Vibe Extractor project."""
+
+    document_type = serializers.CharField(
+        required=True,
+        help_text="Document type name (e.g., invoice, receipt)",
+    )
+    tool_id = serializers.UUIDField(
+        required=False,
+        allow_null=True,
+        help_text="Associated custom tool ID",
+    )
+
+
+class VibeExtractorGenerateSerializer(serializers.Serializer):
+    """Serializer for triggering generation for a project."""
+
+    regenerate = serializers.BooleanField(
+        default=False,
+        help_text="Whether to regenerate if files already exist",
+    )
+
+
+class VibeExtractorFileReadSerializer(serializers.Serializer):
+    """Serializer for reading generated files."""
+
+    file_type = serializers.ChoiceField(
+        choices=[
+            "metadata",
+            "extraction",
+            "page_extraction_system",
+            "page_extraction_user",
+            "scalars_extraction_system",
+            "scalars_extraction_user",
+            "tables_extraction_system",
+            "tables_extraction_user",
+        ],
+        required=True,
+        help_text="Type of file to read",
+    )
+
+
+class VibeExtractorGenerateMetadataSerializer(serializers.Serializer):
+    """Serializer for generating metadata only."""
+
+    regenerate = serializers.BooleanField(
+        default=False,
+        help_text="Whether to regenerate if metadata already exists",
+    )
+
+
+class VibeExtractorGenerateExtractionFieldsSerializer(serializers.Serializer):
+    """Serializer for generating extraction fields."""
+
+    metadata = serializers.JSONField(
+        required=True,
+        help_text="Metadata dictionary to use for generation",
+    )
+
+
+class VibeExtractorGeneratePagePromptsSerializer(serializers.Serializer):
+    """Serializer for generating page extraction prompts."""
+
+    metadata = serializers.JSONField(
+        required=True,
+        help_text="Metadata dictionary to use for generation",
+    )
+
+
+class VibeExtractorGenerateScalarPromptsSerializer(serializers.Serializer):
+    """Serializer for generating scalar extraction prompts."""
+
+    metadata = serializers.JSONField(
+        required=True,
+        help_text="Metadata dictionary to use for generation",
+    )
+    extraction_yaml = serializers.CharField(
+        required=True,
+        help_text="Extraction YAML content",
+    )
+
+
+class VibeExtractorGenerateTablePromptsSerializer(serializers.Serializer):
+    """Serializer for generating table extraction prompts."""
+
+    metadata = serializers.JSONField(
+        required=True,
+        help_text="Metadata dictionary to use for generation",
+    )
+    extraction_yaml = serializers.CharField(
+        required=True,
+        help_text="Extraction YAML content",
+    )
+
+
+class VibeExtractorGuessDocumentTypeSerializer(serializers.Serializer):
+    """Serializer for guessing document type from file."""
+
+    file_name = serializers.CharField(
+        required=True,
+        help_text="Name of the file in permanent storage",
+    )
+    tool_id = serializers.UUIDField(
+        required=True,
+        help_text="Tool ID to construct the file path",
+    )
diff --git a/backend/prompt_studio/prompt_studio_vibe_extractor_v2/services/__init__.py b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/services/__init__.py
new file mode 100644
index 0000000000..662d45ff25
--- /dev/null
+++ b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/services/__init__.py
@@ -0,0 +1 @@
+"""Services for Vibe Extractor V2."""
diff --git a/backend/prompt_studio/prompt_studio_vibe_extractor_v2/services/adapter_helper.py b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/services/adapter_helper.py
new file mode 100644
index 0000000000..f9acb7a68b
--- /dev/null
+++ b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/services/adapter_helper.py
@@ -0,0 +1,180 @@
+"""Adapter Helper for Vibe Extractor.
+
+This module converts platform AdapterInstance to autogen-compatible LLM configuration.
+"""
+
+import logging
+from typing import Any
+
+from adapter_processor_v2.models import AdapterInstance
+
+logger = logging.getLogger(__name__)
+
+
+class AdapterHelper:
+    """Helper to convert AdapterInstance to LLM configuration."""
+
+    # Mapping of adapter_id to autogen adapter_id
+    ADAPTER_ID_MAPPING = {
+        # OpenAI adapters
+        "openai": "openai",
+        "openai-llm": "openai",
+        # Azure OpenAI adapters
+        "azure-openai": "azureopenai",
+        "azureopenai": "azureopenai",
+        # Anthropic adapters
+        "anthropic": "anthropic",
+        "claude": "anthropic",
+        # Bedrock adapters
+        "bedrock": "bedrock",
+        "aws-bedrock": "bedrock",
+    }
+
+    @staticmethod
+    def get_autogen_adapter_id(adapter_id: str) -> str:
+        """Get autogen-compatible adapter ID.
+
+        Args:
+            adapter_id: Platform adapter ID
+
+        Returns:
+            Autogen adapter ID (openai, azureopenai, anthropic, bedrock)
+        """
+        # Normalize adapter_id
+        normalized_id = adapter_id.lower().strip()
+
+        # Check mapping
+        for key, value in AdapterHelper.ADAPTER_ID_MAPPING.items():
+            if key in normalized_id:
+                return value
+
+        # Default to openai if not found
+        logger.warning(f"Unknown adapter_id: {adapter_id}. Defaulting to 'openai'")
+        return "openai"
+
+    @staticmethod
+    def convert_to_llm_config(adapter: AdapterInstance) -> dict[str, Any]:
+        """Convert AdapterInstance to autogen LLM configuration.
+
+        Args:
+            adapter: AdapterInstance from platform
+
+        Returns:
+            LLM configuration dictionary for autogen
+
+        Raises:
+            ValueError: If adapter type is not LLM
+        """
+        # Validate adapter type
+        if adapter.adapter_type != "LLM":
+            raise ValueError(f"Adapter must be of type LLM, got: {adapter.adapter_type}")
+
+        # Get decrypted metadata
+        metadata = adapter.metadata
+
+        # Get autogen adapter ID
+        autogen_adapter_id = AdapterHelper.get_autogen_adapter_id(adapter.adapter_id)
+
+        # Base configuration
+        llm_config = {
+            "adapter_id": autogen_adapter_id,
+            "model": metadata.get("model", metadata.get("deployment", "gpt-4")),
+            "temperature": float(metadata.get("temperature", 0.7)),
+            "max_tokens": int(metadata.get("max_tokens", 4096)),
+        }
+
+        # Provider-specific configuration
+        if autogen_adapter_id == "openai":
+            llm_config["api_key"] = metadata.get("api_key", "")
+            if "api_base" in metadata:
+                llm_config["api_base"] = metadata["api_base"]
+            if "timeout" in metadata:
+                llm_config["timeout"] = int(metadata["timeout"])
+            if "max_retries" in metadata:
+                llm_config["max_retries"] = int(metadata["max_retries"])
+
+        elif autogen_adapter_id == "azureopenai":
+            llm_config["api_key"] = metadata.get("api_key", "")
+            llm_config["api_base"] = metadata.get(
+                "azure_endpoint", metadata.get("api_base", "")
+            )
+            llm_config["api_version"] = metadata.get("api_version", "2024-02-15-preview")
+            llm_config["deployment"] = metadata.get("deployment", metadata.get("model"))
+            if "timeout" in metadata:
+                llm_config["timeout"] = int(metadata["timeout"])
+
+        elif autogen_adapter_id == "anthropic":
+            llm_config["api_key"] = metadata.get("api_key", "")
+            if "api_base" in metadata:
+                llm_config["api_base"] = metadata["api_base"]
+
+        elif autogen_adapter_id == "bedrock":
+            llm_config["aws_access_key_id"] = metadata.get("aws_access_key_id", "")
+            llm_config["aws_secret_access_key"] = metadata.get(
+                "aws_secret_access_key", ""
+            )
+            llm_config["region_name"] = metadata.get("region_name", "us-east-1")
+            if "max_retries" in metadata:
+                llm_config["max_retries"] = int(metadata["max_retries"])
+            if "budget_tokens" in metadata:
+                llm_config["budget_tokens"] = int(metadata["budget_tokens"])
+            if "timeout" in metadata:
+                llm_config["timeout"] = int(metadata["timeout"])
+
+        # Add provider for tracking
+        llm_config["provider"] = adapter.adapter_id
+
+        return llm_config
+
+    @staticmethod
+    def validate_llm_adapter(adapter: AdapterInstance) -> tuple[bool, str]:
+        """Validate that adapter is suitable for vibe extraction.
+
+        Args:
+            adapter: AdapterInstance to validate
+
+        Returns:
+            Tuple of (is_valid, error_message)
+        """
+        # Check adapter type
+        if adapter.adapter_type != "LLM":
+            return False, f"Adapter must be of type LLM, got: {adapter.adapter_type}"
+
+        # Check if adapter is usable
+        if not adapter.is_usable:
+            return False, "Adapter is not usable"
+
+        # Check if adapter is active
+        if not adapter.is_active:
+            return (
+                False,
+                "Adapter is not active. Please activate it in platform settings.",
+            )
+
+        # Try to get metadata
+        try:
+            metadata = adapter.metadata
+            if not metadata:
+                return False, "Adapter metadata is empty"
+        except Exception as e:
+            return False, f"Error reading adapter metadata: {str(e)}"
+
+        # Check for required fields
+        required_fields = ["model"]
+        autogen_adapter_id = AdapterHelper.get_autogen_adapter_id(adapter.adapter_id)
+
+        if autogen_adapter_id in ["openai", "azureopenai", "anthropic"]:
+            required_fields.append("api_key")
+        elif autogen_adapter_id == "bedrock":
+            required_fields.extend(
+                ["aws_access_key_id", "aws_secret_access_key", "region_name"]
+            )
+
+        missing_fields = [field for field in required_fields if not metadata.get(field)]
+        if missing_fields:
+            return (
+                False,
+                f"Missing required fields in adapter metadata: {', '.join(missing_fields)}",
+            )
+
+        return True, ""
diff --git a/backend/prompt_studio/prompt_studio_vibe_extractor_v2/services/generator_service.py b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/services/generator_service.py
new file mode 100644
index 0000000000..b0751db2b1
--- /dev/null
+++ b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/services/generator_service.py
@@ -0,0 +1,275 @@
+"""Generator Service Integration.
+
+This module integrates with the prompt service to generate
+document extraction components.
+"""
+
+import asyncio
+import logging
+from typing import Any
+
+from adapter_processor_v2.models import AdapterInstance
+from platform_settings_v2.models import PlatformSettings
+from utils.user_context import UserContext
+
+from prompt_studio.prompt_studio_vibe_extractor_v2.models import (
+    VibeExtractorProject,
+)
+from prompt_studio.prompt_studio_vibe_extractor_v2.services.adapter_helper import (
+    AdapterHelper,
+)
+from prompt_studio.prompt_studio_vibe_extractor_v2.vibe_extractor_helper import (
+    VibeExtractorHelper,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class GeneratorService:
+    """Service to integrate with prompt service for generation."""
+
+    @staticmethod
+    def _get_system_llm_adapter() -> AdapterInstance:
+        """Get system LLM adapter from platform settings.
+
+        Returns:
+            AdapterInstance configured as system LLM
+
+        Raises:
+            ValueError: If system LLM is not configured
+        """
+        try:
+            organization = UserContext.get_organization()
+            platform_settings = PlatformSettings.get_for_organization(organization)
+
+            if not platform_settings.system_llm_adapter:
+                raise ValueError(
+                    "No system LLM adapter configured for this organization. "
+                    "Please configure a system LLM in platform settings."
+                )
+
+            # Validate the adapter
+            is_valid, error_msg = AdapterHelper.validate_llm_adapter(
+                platform_settings.system_llm_adapter
+            )
+            if not is_valid:
+                raise ValueError(f"System LLM adapter is invalid: {error_msg}")
+
+            return platform_settings.system_llm_adapter
+
+        except Exception as e:
+            logger.error("Failed to get system LLM adapter: %s", str(e))
+            raise ValueError(f"Failed to get system LLM adapter: {str(e)}") from e
+
+    @staticmethod
+    def _get_llm_config(
+        project: VibeExtractorProject = None,
+    ) -> dict[str, Any]:
+        """Get LLM configuration from platform system LLM or project.
+
+        Args:
+            project: Optional VibeExtractorProject to get LLM from
+
+        Returns:
+            LLM configuration dictionary
+
+        Raises:
+            ValueError: If LLM configuration is missing or invalid
+        """
+        # If project has an LLM adapter, use it
+        if project and project.llm_adapter:
+            adapter = project.llm_adapter
+        else:
+            # Otherwise, get system LLM from platform settings
+            adapter = GeneratorService._get_system_llm_adapter()
+
+        # Convert adapter to LLM config
+        try:
+            llm_config = AdapterHelper.convert_to_llm_config(adapter)
+            logger.info(
+                "Using LLM adapter: %s (model: %s)",
+                adapter.adapter_name,
+                llm_config.get("model"),
+            )
+            return llm_config
+        except Exception as e:
+            error_msg = f"Failed to convert adapter to LLM config: {str(e)}"
+            logger.error(error_msg)
+            raise ValueError(error_msg) from e
+
+    @staticmethod
+    def _get_reference_template() -> str:
+        """Get reference metadata.yaml template.
+
+        Returns:
+            Reference template content
+        """
+        try:
+            reference_template = VibeExtractorHelper.get_reference_template(
+                "metadata.yaml"
+            )
+            return reference_template
+        except Exception as e:
+            logger.warning(f"Could not load reference template: {e}")
+            # Return default template
+            return """---
+name_identifier: example
+name: Example Document
+description: |
+  Example document description.
+description_seo: |
+  SEO optimized description.
+html_meta_description: |
+  HTML meta description.
+tags:
+  - example
+version: 1.0.0
+status: beta
+visibility: public
+author: Zipstack Inc
+release_date: 2025-07-01
+price_multiplier: 1.0
+llm_model: claude-sonnet-1-7
+extraction_features:
+  locate_pages: true
+  rolling_window: false
+  challenge: false
+"""
+
+    @staticmethod
+    def _create_progress_callback(project: VibeExtractorProject):
+        """Create a progress callback for updating project status.
+
+        Args:
+            project: VibeExtractorProject instance
+
+        Returns:
+            Callback function
+        """
+
+        def progress_callback(step: str, status: str, message: str = ""):
+            """Update project progress.
+
+            Args:
+                step: Generation step name
+                status: Status (in_progress, completed, failed)
+                message: Optional message
+            """
+            try:
+                VibeExtractorHelper.update_generation_progress(
+                    project, step, status, message
+                )
+
+                # Update project status based on step
+                if status == "failed":
+                    project.status = VibeExtractorProject.Status.FAILED
+                    project.error_message = message
+                    project.save(update_fields=["status", "error_message", "modified_at"])
+                elif step == "generating_metadata":
+                    project.status = VibeExtractorProject.Status.GENERATING_METADATA
+                    project.save(update_fields=["status", "modified_at"])
+                elif step == "generating_extraction_fields":
+                    project.status = VibeExtractorProject.Status.GENERATING_FIELDS
+                    project.save(update_fields=["status", "modified_at"])
+                elif step == "generating_page_prompts" or step.startswith("generating_"):
+                    project.status = VibeExtractorProject.Status.GENERATING_PROMPTS
+                    project.save(update_fields=["status", "modified_at"])
+
+            except Exception as e:
+                logger.error(f"Error in progress callback: {e}")
+
+        return progress_callback
+
+    @staticmethod
+    async def generate_all_async(
+        project: VibeExtractorProject,
+    ) -> dict[str, Any]:
+        """Generate all components for a project asynchronously.
+
+        Args:
+            project: VibeExtractorProject instance
+
+        Returns:
+            Dictionary containing generation result
+        """
+        try:
+            # Import here to avoid circular imports and ensure prompt service is available
+            from unstract.prompt_service.services.vibe_extractor.api_helper import (
+                generate_document_extraction_components_sync,
+            )
+
+            # Get system LLM adapter if not already set on project
+            if not project.llm_adapter:
+                system_llm = GeneratorService._get_system_llm_adapter()
+                project.llm_adapter = system_llm
+                project.save(update_fields=["llm_adapter"])
+
+            # Get LLM configuration
+            llm_config = GeneratorService._get_llm_config(project)
+
+            # Get reference template
+            reference_template = GeneratorService._get_reference_template()
+
+            # Get output directory
+            output_dir = VibeExtractorHelper.get_project_output_path(project)
+
+            # Create progress callback
+            progress_callback = GeneratorService._create_progress_callback(project)
+
+            # Generate all components
+            result = generate_document_extraction_components_sync(
+                doc_type=project.document_type,
+                output_dir=str(output_dir.parent),
+                llm_config=llm_config,
+                reference_template=reference_template,
+                progress_callback=progress_callback,
+            )
+
+            # Update project status based on result
+            if result["status"] == "success":
+                project.status = VibeExtractorProject.Status.COMPLETED
+                project.generation_output_path = result["output_path"]
+                project.error_message = ""
+                project.save(
+                    update_fields=[
+                        "status",
+                        "generation_output_path",
+                        "error_message",
+                        "modified_at",
+                    ]
+                )
+            else:
+                project.status = VibeExtractorProject.Status.FAILED
+                project.error_message = result.get("error", "Unknown error")
+                project.save(update_fields=["status", "error_message", "modified_at"])
+
+            return result
+
+        except Exception as e:
+            error_msg = f"Error during generation: {str(e)}"
+            logger.error(error_msg, exc_info=True)
+
+            project.status = VibeExtractorProject.Status.FAILED
+            project.error_message = error_msg
+            project.save(update_fields=["status", "error_message", "modified_at"])
+
+            return {"status": "error", "error": error_msg}
+
+    @staticmethod
+    def generate_all(project: VibeExtractorProject) -> dict[str, Any]:
+        """Generate all components for a project (sync wrapper).
+
+        Args:
+            project: VibeExtractorProject instance
+
+        Returns:
+            Dictionary containing generation result
+        """
+        # Run the async function in a new event loop
+        try:
+            loop = asyncio.get_event_loop()
+        except RuntimeError:
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+
+        return loop.run_until_complete(GeneratorService.generate_all_async(project))
diff --git a/backend/prompt_studio/prompt_studio_vibe_extractor_v2/services/prompt_service_helper.py b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/services/prompt_service_helper.py
new file mode 100644
index 0000000000..89fa7d2a74
--- /dev/null
+++ b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/services/prompt_service_helper.py
@@ -0,0 +1,234 @@
+"""Helper to communicate with prompt-service for vibe extractor operations.
+
+This module provides a helper class that uses the SDK's PromptTool
+to communicate with the prompt-service, following Unstract's standards.
+"""
+
+import logging
+from typing import Any
+
+from account_v2.constants import Common
+from django.conf import settings
+from utils.local_context import StateStore
+
+from prompt_studio.prompt_studio_core_v2.prompt_ide_base_tool import (
+    PromptIdeBaseTool,
+)
+from unstract.flags.feature_flag import check_feature_flag_status
+
+if check_feature_flag_status("sdk1"):
+    from unstract.sdk1.constants import LogLevel
+    from unstract.sdk1.prompt import PromptTool
+else:
+    from unstract.sdk.constants import LogLevel
+    from unstract.sdk.prompt import PromptTool
+
+logger = logging.getLogger(__name__)
+
+
+class VibeExtractorPromptServiceHelper:
+    """Helper class to communicate with prompt-service for vibe extractor.
+
+    This class follows Unstract's standard pattern of using PromptIdeBaseTool
+    with the SDK's PromptTool to make HTTP calls to the prompt-service.
+    """
+
+    @staticmethod
+    def _get_prompt_tool(org_id: str) -> PromptTool:
+        """Get configured PromptTool instance.
+
+        Args:
+            org_id: Organization ID
+
+        Returns:
+            Configured PromptTool instance
+        """
+        # Create PromptIdeBaseTool (standard tool used in backend)
+        util = PromptIdeBaseTool(log_level=LogLevel.INFO, org_id=org_id)
+
+        # Create PromptTool instance
+        prompt_tool = PromptTool(
+            tool=util,
+            prompt_host=settings.PROMPT_HOST,
+            prompt_port=settings.PROMPT_PORT,
+            request_id=StateStore.get(Common.REQUEST_ID),
+        )
+
+        return prompt_tool
+
+    @staticmethod
+    def guess_document_type(
+        file_content: str,
+        llm_config: dict[str, Any],
+        org_id: str,
+    ) -> dict[str, Any]:
+        """Guess document type from file content.
+
+        Args:
+            file_content: Extracted text content from document
+            llm_config: LLM configuration dictionary
+            org_id: Organization ID
+
+        Returns:
+            Dictionary with status, document_type, confidence, etc.
+        """
+        prompt_tool = VibeExtractorPromptServiceHelper._get_prompt_tool(org_id)
+
+        payload = {
+            "file_content": file_content,
+            "llm_config": llm_config,
+        }
+
+        return prompt_tool.guess_document_type(payload=payload)
+
+    @staticmethod
+    def generate_metadata(
+        doc_type: str,
+        llm_config: dict[str, Any],
+        reference_template: str,
+        org_id: str,
+    ) -> dict[str, Any]:
+        """Generate metadata for a document type.
+
+        Args:
+            doc_type: Document type name
+            llm_config: LLM configuration dictionary
+            reference_template: Reference metadata template
+            org_id: Organization ID
+
+        Returns:
+            Dictionary with status and metadata
+        """
+        prompt_tool = VibeExtractorPromptServiceHelper._get_prompt_tool(org_id)
+
+        payload = {
+            "doc_type": doc_type,
+            "llm_config": llm_config,
+            "reference_template": reference_template,
+        }
+
+        return prompt_tool.generate_metadata(payload=payload)
+
+    @staticmethod
+    def generate_extraction_fields(
+        doc_type: str,
+        metadata_description: str,
+        llm_config: dict[str, Any],
+        org_id: str,
+    ) -> dict[str, Any]:
+        """Generate extraction fields YAML.
+
+        Args:
+            doc_type: Document type name
+            metadata_description: Description from metadata
+            llm_config: LLM configuration dictionary
+            org_id: Organization ID
+
+        Returns:
+            Dictionary with status and extraction_yaml
+        """
+        prompt_tool = VibeExtractorPromptServiceHelper._get_prompt_tool(org_id)
+
+        payload = {
+            "doc_type": doc_type,
+            "metadata_description": metadata_description,
+            "llm_config": llm_config,
+        }
+
+        return prompt_tool.generate_extraction_fields(payload=payload)
+
+    @staticmethod
+    def generate_page_prompts(
+        doc_type: str,
+        metadata_description: str,
+        llm_config: dict[str, Any],
+        org_id: str,
+    ) -> dict[str, Any]:
+        """Generate page extraction prompts.
+
+        Args:
+            doc_type: Document type name
+            metadata_description: Description from metadata
+            llm_config: LLM configuration dictionary
+            org_id: Organization ID
+
+        Returns:
+            Dictionary with status, system_prompt, user_prompt
+        """
+        prompt_tool = VibeExtractorPromptServiceHelper._get_prompt_tool(org_id)
+
+        payload = {
+            "doc_type": doc_type,
+            "metadata_description": metadata_description,
+            "llm_config": llm_config,
+        }
+
+        return prompt_tool.generate_page_prompts(payload=payload)
+
+    @staticmethod
+    def generate_scalar_prompts(
+        doc_type: str,
+        metadata_description: str,
+        extraction_yaml: str,
+        scalar_fields: list,
+        llm_config: dict[str, Any],
+        org_id: str,
+    ) -> dict[str, Any]:
+        """Generate scalar extraction prompts.
+
+        Args:
+            doc_type: Document type name
+            metadata_description: Description from metadata
+            extraction_yaml: Extraction YAML string
+            scalar_fields: List of scalar field names
+            llm_config: LLM configuration dictionary
+            org_id: Organization ID
+
+        Returns:
+            Dictionary with status, system_prompt, user_prompt
+        """
+        prompt_tool = VibeExtractorPromptServiceHelper._get_prompt_tool(org_id)
+
+        payload = {
+            "doc_type": doc_type,
+            "metadata_description": metadata_description,
+            "extraction_yaml": extraction_yaml,
+            "scalar_fields": scalar_fields,
+            "llm_config": llm_config,
+        }
+
+        return prompt_tool.generate_scalar_prompts(payload=payload)
+
+    @staticmethod
+    def generate_table_prompts(
+        doc_type: str,
+        metadata_description: str,
+        extraction_yaml: str,
+        list_fields: list,
+        llm_config: dict[str, Any],
+        org_id: str,
+    ) -> dict[str, Any]:
+        """Generate table extraction prompts.
+
+        Args:
+            doc_type: Document type name
+            metadata_description: Description from metadata
+            extraction_yaml: Extraction YAML string
+            list_fields: List of list/table field names
+            llm_config: LLM configuration dictionary
+            org_id: Organization ID
+
+        Returns:
+            Dictionary with status, system_prompt, user_prompt
+        """
+        prompt_tool = VibeExtractorPromptServiceHelper._get_prompt_tool(org_id)
+
+        payload = {
+            "doc_type": doc_type,
+            "metadata_description": metadata_description,
+            "extraction_yaml": extraction_yaml,
+            "list_fields": list_fields,
+            "llm_config": llm_config,
+        }
+
+        return prompt_tool.generate_table_prompts(payload=payload)
diff --git a/backend/prompt_studio/prompt_studio_vibe_extractor_v2/urls.py b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/urls.py
new file mode 100644
index 0000000000..338d083c8a
--- /dev/null
+++ b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/urls.py
@@ -0,0 +1,77 @@
+from rest_framework.routers import SimpleRouter
+
+from prompt_studio.prompt_studio_vibe_extractor_v2.views import (
+    VibeExtractorProjectView,
+)
+
+# Create router for standard CRUD operations
+router = SimpleRouter()
+router.register(
+    r"vibe-extractor",
+    VibeExtractorProjectView,
+    basename="vibe-extractor",
+)
+
+# Get viewset instance for custom actions
+viewset = VibeExtractorProjectView.as_view
+
+# Explicit URL patterns for generation endpoints
+generation_patterns = [
+    # Generate all components at once
+    path(
+        "vibe-extractor/<str:pk>/generate/",
+        viewset({"post": "generate"}),
+        name="vibe-extractor-generate",
+    ),
+    # Generate metadata only
+    path(
+        "vibe-extractor/<str:pk>/generate-metadata/",
+        viewset({"post": "generate_metadata"}),
+        name="vibe-extractor-generate-metadata",
+    ),
+    # Generate extraction fields
+    path(
+        "vibe-extractor/<str:pk>/generate-extraction-fields/",
+        viewset({"post": "generate_extraction_fields"}),
+        name="vibe-extractor-generate-extraction-fields",
+    ),
+    # Generate page extraction prompts
+    path(
+        "vibe-extractor/<str:pk>/generate-page-prompts/",
+        viewset({"post": "generate_page_prompts"}),
+        name="vibe-extractor-generate-page-prompts",
+    ),
+    # Generate scalar extraction prompts
+    path(
+        "vibe-extractor/<str:pk>/generate-scalar-prompts/",
+        viewset({"post": "generate_scalar_prompts"}),
+        name="vibe-extractor-generate-scalar-prompts",
+    ),
+    # Generate table extraction prompts
+    path(
+        "vibe-extractor/<str:pk>/generate-table-prompts/",
+        viewset({"post": "generate_table_prompts"}),
+        name="vibe-extractor-generate-table-prompts",
+    ),
+    # Read generated file
+    path(
+        "vibe-extractor/<str:pk>/read-file/",
+        viewset({"get": "read_file"}),
+        name="vibe-extractor-read-file",
+    ),
+    # List generated files
+    path(
+        "vibe-extractor/<str:pk>/list-files/",
+        viewset({"get": "list_files"}),
+        name="vibe-extractor-list-files",
+    ),
+    # Guess document type from file
+    path(
+        "vibe-extractor/guess-document-type/",
+        viewset({"post": "guess_document_type"}),
+        name="vibe-extractor-guess-document-type",
+    ),
+]
+
+# Combine router URLs with explicit generation patterns
+urlpatterns = router.urls + generation_patterns
diff --git a/backend/prompt_studio/prompt_studio_vibe_extractor_v2/vibe_extractor_helper.py b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/vibe_extractor_helper.py
new file mode 100644
index 0000000000..aca8150bbe
--- /dev/null
+++ b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/vibe_extractor_helper.py
@@ -0,0 +1,355 @@
+"""Helper functions for Vibe Extractor operations."""
+
+import logging
+from pathlib import Path
+from typing import Any
+
+import yaml
+from django.conf import settings
+from utils.file_storage.helpers.prompt_studio_file_helper import (
+    PromptStudioFileHelper,
+)
+
+from prompt_studio.prompt_studio_vibe_extractor_v2.constants import (
+    VibeExtractorFileNames,
+    VibeExtractorPaths,
+)
+from prompt_studio.prompt_studio_vibe_extractor_v2.exceptions import (
+    FileReadError,
+    InvalidDocumentTypeError,
+)
+from prompt_studio.prompt_studio_vibe_extractor_v2.models import (
+    VibeExtractorProject,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class VibeExtractorHelper:
+    """Helper class for Vibe Extractor operations."""
+
+    @staticmethod
+    def validate_document_type(doc_type: str) -> str:
+        """Validate and normalize document type name.
+
+        Args:
+            doc_type: Document type name
+
+        Returns:
+            Normalized document type (lowercase with hyphens)
+
+        Raises:
+            InvalidDocumentTypeError: If document type is invalid
+        """
+        if not doc_type or not doc_type.strip():
+            raise InvalidDocumentTypeError("Document type cannot be empty")
+
+        # Convert to lowercase and replace spaces with hyphens
+        normalized = doc_type.lower().replace(" ", "-").replace("_", "-")
+
+        # Remove special characters except hyphens
+        normalized = "".join(c for c in normalized if c.isalnum() or c == "-")
+
+        if not normalized:
+            raise InvalidDocumentTypeError(f"Invalid document type: {doc_type}")
+
+        return normalized
+
+    @staticmethod
+    def get_project_output_path(project: VibeExtractorProject) -> Path:
+        """Get the output path for a project.
+
+        Args:
+            project: VibeExtractorProject instance
+
+        Returns:
+            Path object for the project output directory
+        """
+        if project.generation_output_path:
+            return Path(project.generation_output_path)
+
+        # Default to staging directory
+        base_dir = getattr(
+            settings,
+            "VIBE_EXTRACTOR_OUTPUT_DIR",
+            Path(settings.BASE_DIR).parent / VibeExtractorPaths.STAGING_DIR,
+        )
+        normalized_type = VibeExtractorHelper.validate_document_type(
+            project.document_type
+        )
+        return Path(base_dir) / normalized_type
+
+    @staticmethod
+    def ensure_output_directory(project: VibeExtractorProject) -> Path:
+        """Ensure output directory exists for a project.
+
+        Args:
+            project: VibeExtractorProject instance
+
+        Returns:
+            Path object for the created directory
+        """
+        output_path = VibeExtractorHelper.get_project_output_path(project)
+        output_path.mkdir(parents=True, exist_ok=True)
+
+        # Create prompts subdirectory
+        prompts_path = output_path / VibeExtractorPaths.PROMPTS_DIR
+        prompts_path.mkdir(parents=True, exist_ok=True)
+
+        return output_path
+
+    @staticmethod
+    def read_generated_file(project: VibeExtractorProject, file_type: str) -> str:
+        """Read a generated file for a project.
+
+        Args:
+            project: VibeExtractorProject instance
+            file_type: Type of file to read
+
+        Returns:
+            Content of the file
+
+        Raises:
+            FileReadError: If file cannot be read
+        """
+        output_path = VibeExtractorHelper.get_project_output_path(project)
+
+        file_map = {
+            "metadata": output_path / VibeExtractorFileNames.METADATA_YAML,
+            "extraction": output_path / VibeExtractorFileNames.EXTRACTION_YAML,
+            "page_extraction_system": output_path
+            / VibeExtractorPaths.PROMPTS_DIR
+            / VibeExtractorFileNames.PAGE_EXTRACTION_SYSTEM_MD,
+            "page_extraction_user": output_path
+            / VibeExtractorPaths.PROMPTS_DIR
+            / VibeExtractorFileNames.PAGE_EXTRACTION_USER_MD,
+            "scalars_extraction_system": output_path
+            / VibeExtractorPaths.PROMPTS_DIR
+            / VibeExtractorFileNames.SCALARS_EXTRACTION_SYSTEM_MD,
+            "scalars_extraction_user": output_path
+            / VibeExtractorPaths.PROMPTS_DIR
+            / VibeExtractorFileNames.SCALARS_EXTRACTION_USER_MD,
+            "tables_extraction_system": output_path
+            / VibeExtractorPaths.PROMPTS_DIR
+            / VibeExtractorFileNames.TABLES_EXTRACTION_SYSTEM_MD,
+            "tables_extraction_user": output_path
+            / VibeExtractorPaths.PROMPTS_DIR
+            / VibeExtractorFileNames.TABLES_EXTRACTION_USER_MD,
+        }
+
+        file_path = file_map.get(file_type)
+        if not file_path:
+            raise FileReadError(f"Unknown file type: {file_type}")
+
+        if not file_path.exists():
+            raise FileReadError(f"File not found: {file_path}. Generate the files first.")
+
+        try:
+            with open(file_path) as f:
+                return f.read()
+        except Exception as e:
+            raise FileReadError(f"Error reading file {file_path}: {str(e)}") from e
+
+    @staticmethod
+    def update_generation_progress(
+        project: VibeExtractorProject,
+        step: str,
+        status: str,
+        message: str | None = None,
+    ) -> None:
+        """Update generation progress for a project.
+
+        Args:
+            project: VibeExtractorProject instance
+            step: Generation step name
+            status: Status of the step (pending, in_progress, completed, failed)
+            message: Optional message
+        """
+        if not project.generation_progress:
+            project.generation_progress = {}
+
+        project.generation_progress[step] = {
+            "status": status,
+            "message": message or "",
+        }
+        project.save(update_fields=["generation_progress", "modified_at"])
+
+    @staticmethod
+    def get_reference_template(template_name: str) -> str:
+        """Get reference template content.
+
+        Args:
+            template_name: Name of the template file
+
+        Returns:
+            Content of the reference template
+
+        Raises:
+            FileReadError: If template cannot be read
+        """
+        reference_dir = getattr(
+            settings,
+            "VIBE_EXTRACTOR_REFERENCE_DIR",
+            Path(settings.BASE_DIR).parent / VibeExtractorPaths.REFERENCE_DIR,
+        )
+        template_path = Path(reference_dir) / template_name
+
+        if not template_path.exists():
+            raise FileReadError(f"Reference template not found: {template_path}")
+
+        try:
+            with open(template_path) as f:
+                return f.read()
+        except Exception as e:
+            raise FileReadError(
+                f"Error reading reference template {template_path}: {str(e)}"
+            ) from e
+
+    @staticmethod
+    def save_yaml_file(output_path: Path, filename: str, content: dict[str, Any]) -> None:
+        """Save content as YAML file.
+
+        Args:
+            output_path: Output directory path
+            filename: Name of the file
+            content: Content to save as YAML
+        """
+        file_path = output_path / filename
+        with open(file_path, "w") as f:
+            yaml.dump(content, f, default_flow_style=False, sort_keys=False)
+
+    @staticmethod
+    def save_markdown_file(output_path: Path, filename: str, content: str) -> None:
+        """Save content as markdown file.
+
+        Args:
+            output_path: Output directory path
+            filename: Name of the file
+            content: Content to save
+        """
+        file_path = output_path / filename
+        with open(file_path, "w") as f:
+            f.write(content)
+
+    @staticmethod
+    def guess_document_type_from_file(
+        file_name: str,
+        tool_id: str,
+        org_id: str,
+        user_id: str,
+    ) -> Dict[str, Any]:
+        """Guess document type from file content.
+
+        This method:
+        1. Constructs the file path using permanent file storage
+        2. Reads the file content using dynamic_extractor
+        3. Calls prompt-service to guess the document type using LLM
+
+        Args:
+            file_name: Name of the file in permanent storage
+            tool_id: Tool ID to construct the file path
+            org_id: Organization ID
+            user_id: User ID
+
+        Returns:
+            Dictionary containing:
+                - status: "success" or "error"
+                - document_type: Guessed document type (if success)
+                - confidence: Confidence score (if applicable)
+                - error: Error message (if error)
+        """
+        try:
+            # Import here to avoid circular imports
+            from prompt_studio.prompt_profile_manager_v2.models import ProfileManager
+            from prompt_studio.prompt_studio_core_v2.models import CustomTool
+            from prompt_studio.prompt_studio_core_v2.prompt_studio_helper import (
+                PromptStudioHelper,
+            )
+            from prompt_studio.prompt_studio_vibe_extractor_v2.services.generator_service import (
+                GeneratorService,
+            )
+
+            # Get the tool instance to access profile manager
+            tool = CustomTool.objects.get(pk=tool_id)
+
+            # Get default profile for extraction
+            default_profile = ProfileManager.get_default_llm_profile(tool)
+
+            # Construct file path using PromptStudioFileHelper
+            file_path = PromptStudioFileHelper.get_or_create_prompt_studio_subdirectory(
+                org_id=org_id,
+                user_id=user_id,
+                tool_id=tool_id,
+                is_create=False,
+            )
+            full_file_path = str(Path(file_path) / file_name)
+
+            # Use dynamic_extractor to read and extract text from the file
+            from utils.file_storage.constants import FileStorageKeys
+
+            from prompt_studio.prompt_studio_core_v2.prompt_ide_base_tool import (
+                PromptIdeBaseTool,
+            )
+            from unstract.sdk.constants import LogLevel
+            from unstract.sdk1.file_storage.constants import StorageType
+            from unstract.sdk1.file_storage.env_helper import EnvHelper
+            from unstract.sdk1.utils.indexing import IndexingUtils
+
+            fs_instance = EnvHelper.get_storage(
+                storage_type=StorageType.PERMANENT,
+                env_name=FileStorageKeys.PERMANENT_REMOTE_STORAGE,
+            )
+            util = PromptIdeBaseTool(log_level=LogLevel.INFO, org_id=org_id)
+
+            # Generate doc_id for extraction
+            doc_id = IndexingUtils.generate_index_key(
+                vector_db=str(default_profile.vector_store.id),
+                embedding=str(default_profile.embedding_model.id),
+                x2text=str(default_profile.x2text.id),
+                chunk_size=str(default_profile.chunk_size),
+                chunk_overlap=str(default_profile.chunk_overlap),
+                file_path=full_file_path,
+                file_hash=None,
+                fs=fs_instance,
+                tool=util,
+            )
+
+            # Extract text from the file
+            extracted_text = PromptStudioHelper.dynamic_extractor(
+                profile_manager=default_profile,
+                file_path=full_file_path,
+                org_id=org_id,
+                document_id=None,  # Not needed for this operation
+                run_id=None,
+                enable_highlight=False,
+                doc_id=doc_id,
+            )
+
+            if not extracted_text or not extracted_text.strip():
+                return {
+                    "status": "error",
+                    "error": "Could not extract text from file",
+                }
+
+            # Get LLM configuration from system LLM
+            llm_config = GeneratorService._get_llm_config()
+
+            # Call prompt-service via SDK helper
+            from prompt_studio.prompt_studio_vibe_extractor_v2.services.prompt_service_helper import (
+                VibeExtractorPromptServiceHelper,
+            )
+
+            result = VibeExtractorPromptServiceHelper.guess_document_type(
+                file_content=extracted_text,
+                llm_config=llm_config,
+                org_id=org_id,
+            )
+
+            return result
+
+        except Exception as e:
+            logger.error(f"Error guessing document type: {str(e)}", exc_info=True)
+            return {
+                "status": "error",
+                "error": str(e),
+            }
diff --git a/backend/prompt_studio/prompt_studio_vibe_extractor_v2/views.py b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/views.py
new file mode 100644
index 0000000000..0d0b16ef61
--- /dev/null
+++ b/backend/prompt_studio/prompt_studio_vibe_extractor_v2/views.py
@@ -0,0 +1,543 @@
+from django.db.models import QuerySet
+from rest_framework import status, viewsets
+from rest_framework.decorators import action
+from rest_framework.request import Request
+from rest_framework.response import Response
+from rest_framework.versioning import URLPathVersioning
+from utils.filtering import FilterHelper
+
+from prompt_studio.permission import PromptAcesssToUser
+from prompt_studio.prompt_studio_vibe_extractor_v2.constants import (
+    VibeExtractorKeys,
+)
+from prompt_studio.prompt_studio_vibe_extractor_v2.exceptions import (
+    FileReadError,
+    InvalidDocumentTypeError,
+)
+from prompt_studio.prompt_studio_vibe_extractor_v2.models import (
+    VibeExtractorProject,
+)
+from prompt_studio.prompt_studio_vibe_extractor_v2.serializers import (
+    VibeExtractorFileReadSerializer,
+    VibeExtractorGenerateExtractionFieldsSerializer,
+    VibeExtractorGenerateMetadataSerializer,
+    VibeExtractorGeneratePagePromptsSerializer,
+    VibeExtractorGenerateScalarPromptsSerializer,
+    VibeExtractorGenerateTablePromptsSerializer,
+    VibeExtractorGuessDocumentTypeSerializer,
+    VibeExtractorProjectCreateSerializer,
+    VibeExtractorProjectSerializer,
+)
+from prompt_studio.prompt_studio_vibe_extractor_v2.services.generator_service import (
+    GeneratorService,
+)
+from prompt_studio.prompt_studio_vibe_extractor_v2.vibe_extractor_helper import (
+    VibeExtractorHelper,
+)
+
+
+class VibeExtractorProjectView(viewsets.ModelViewSet):
+    """Viewset to handle Vibe Extractor project CRUD operations.
+
+    Provides endpoints for:
+    - Creating new extraction projects
+    - Listing projects
+    - Retrieving project details
+    - Updating project settings
+    - Deleting projects
+    - Triggering generation
+    - Reading generated files
+    """
+
+    versioning_class = URLPathVersioning
+    serializer_class = VibeExtractorProjectSerializer
+    permission_classes: list[type[PromptAcesssToUser]] = [PromptAcesssToUser]
+
+    def get_queryset(self) -> QuerySet:
+        """Get queryset filtered by tool_id if provided."""
+        filter_args = FilterHelper.build_filter_args(
+            self.request,
+            VibeExtractorKeys.TOOL_ID,
+        )
+        if filter_args:
+            queryset = VibeExtractorProject.objects.filter(**filter_args)
+        else:
+            queryset = VibeExtractorProject.objects.all()
+        return queryset
+
+    def create(self, request: Request, *args, **kwargs) -> Response:
+        """Create a new Vibe Extractor project.
+
+        Args:
+            request: HTTP request with document_type and optional tool_id
+
+        Returns:
+            Response with created project data
+        """
+        serializer = VibeExtractorProjectCreateSerializer(data=request.data)
+        serializer.is_valid(raise_exception=True)
+
+        try:
+            # Validate and normalize document type
+            document_type = VibeExtractorHelper.validate_document_type(
+                serializer.validated_data["document_type"]
+            )
+
+            # Create project
+            project = VibeExtractorProject.objects.create(
+                document_type=document_type,
+                tool_id_id=serializer.validated_data.get("tool_id"),
+                created_by=request.user,
+                modified_by=request.user,
+            )
+
+            # Create output directory
+            output_path = VibeExtractorHelper.ensure_output_directory(project)
+            project.generation_output_path = str(output_path)
+            project.save(update_fields=["generation_output_path"])
+
+            response_serializer = VibeExtractorProjectSerializer(project)
+            return Response(response_serializer.data, status=status.HTTP_201_CREATED)
+
+        except InvalidDocumentTypeError as e:
+            return Response({"error": str(e)}, status=status.HTTP_400_BAD_REQUEST)
+        except Exception as e:
+            return Response(
+                {"error": f"Failed to create project: {str(e)}"},
+                status=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            )
+
+    @action(detail=True, methods=["post"])
+    def generate_metadata(self, request: Request, pk=None) -> Response:
+        """Generate only metadata for a project.
+
+        Args:
+            request: HTTP request
+            pk: Project ID
+
+        Returns:
+            Response with generated metadata
+        """
+        serializer = VibeExtractorGenerateMetadataSerializer(data=request.data)
+        serializer.is_valid(raise_exception=True)
+
+        try:
+            project = self.get_object()
+
+            # Start generation in background
+            import threading
+
+            def run_generation():
+                """Run metadata generation in background thread."""
+                try:
+                    GeneratorService.generate_metadata_only(project)
+                except Exception as e:
+                    import logging
+
+                    logger = logging.getLogger(__name__)
+                    logger.error(f"Background generation failed: {e}", exc_info=True)
+
+            thread = threading.Thread(target=run_generation)
+            thread.daemon = True
+            thread.start()
+
+            return Response(
+                {
+                    "message": "Metadata generation started",
+                    "project_id": str(project.project_id),
+                    "status": project.status,
+                },
+                status=status.HTTP_202_ACCEPTED,
+            )
+
+        except VibeExtractorProject.DoesNotExist:
+            return Response(
+                {"error": "Project not found"},
+                status=status.HTTP_404_NOT_FOUND,
+            )
+        except Exception as e:
+            return Response(
+                {"error": f"Generation failed: {str(e)}"},
+                status=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            )
+
+    @action(detail=True, methods=["post"])
+    def generate_extraction_fields(self, request: Request, pk=None) -> Response:
+        """Generate extraction fields for a project.
+
+        Args:
+            request: HTTP request with metadata
+            pk: Project ID
+
+        Returns:
+            Response with generated extraction fields
+        """
+        serializer = VibeExtractorGenerateExtractionFieldsSerializer(data=request.data)
+        serializer.is_valid(raise_exception=True)
+
+        try:
+            project = self.get_object()
+            metadata = serializer.validated_data["metadata"]
+
+            # Start generation in background
+            import threading
+
+            def run_generation():
+                """Run extraction fields generation in background thread."""
+                try:
+                    GeneratorService.generate_extraction_fields_only(project, metadata)
+                except Exception as e:
+                    import logging
+
+                    logger = logging.getLogger(__name__)
+                    logger.error(f"Background generation failed: {e}", exc_info=True)
+
+            thread = threading.Thread(target=run_generation)
+            thread.daemon = True
+            thread.start()
+
+            return Response(
+                {
+                    "message": "Extraction fields generation started",
+                    "project_id": str(project.project_id),
+                    "status": project.status,
+                },
+                status=status.HTTP_202_ACCEPTED,
+            )
+
+        except VibeExtractorProject.DoesNotExist:
+            return Response(
+                {"error": "Project not found"},
+                status=status.HTTP_404_NOT_FOUND,
+            )
+        except Exception as e:
+            return Response(
+                {"error": f"Generation failed: {str(e)}"},
+                status=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            )
+
+    @action(detail=True, methods=["post"])
+    def generate_page_prompts(self, request: Request, pk=None) -> Response:
+        """Generate page extraction prompts for a project.
+
+        Args:
+            request: HTTP request with metadata
+            pk: Project ID
+
+        Returns:
+            Response with generated prompts
+        """
+        serializer = VibeExtractorGeneratePagePromptsSerializer(data=request.data)
+        serializer.is_valid(raise_exception=True)
+
+        try:
+            project = self.get_object()
+            metadata = serializer.validated_data["metadata"]
+
+            # Start generation in background
+            import threading
+
+            def run_generation():
+                """Run page prompts generation in background thread."""
+                try:
+                    GeneratorService.generate_page_extraction_prompts(project, metadata)
+                except Exception as e:
+                    import logging
+
+                    logger = logging.getLogger(__name__)
+                    logger.error(f"Background generation failed: {e}", exc_info=True)
+
+            thread = threading.Thread(target=run_generation)
+            thread.daemon = True
+            thread.start()
+
+            return Response(
+                {
+                    "message": "Page prompts generation started",
+                    "project_id": str(project.project_id),
+                    "status": project.status,
+                },
+                status=status.HTTP_202_ACCEPTED,
+            )
+
+        except VibeExtractorProject.DoesNotExist:
+            return Response(
+                {"error": "Project not found"},
+                status=status.HTTP_404_NOT_FOUND,
+            )
+        except Exception as e:
+            return Response(
+                {"error": f"Generation failed: {str(e)}"},
+                status=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            )
+
+    @action(detail=True, methods=["post"])
+    def generate_scalar_prompts(self, request: Request, pk=None) -> Response:
+        """Generate scalar extraction prompts for a project.
+
+        Args:
+            request: HTTP request with metadata and extraction_yaml
+            pk: Project ID
+
+        Returns:
+            Response with generated prompts
+        """
+        serializer = VibeExtractorGenerateScalarPromptsSerializer(data=request.data)
+        serializer.is_valid(raise_exception=True)
+
+        try:
+            project = self.get_object()
+            metadata = serializer.validated_data["metadata"]
+            extraction_yaml = serializer.validated_data["extraction_yaml"]
+
+            # Start generation in background
+            import threading
+
+            def run_generation():
+                """Run scalar prompts generation in background thread."""
+                try:
+                    GeneratorService.generate_scalar_extraction_prompts(
+                        project, metadata, extraction_yaml
+                    )
+                except Exception as e:
+                    import logging
+
+                    logger = logging.getLogger(__name__)
+                    logger.error(f"Background generation failed: {e}", exc_info=True)
+
+            thread = threading.Thread(target=run_generation)
+            thread.daemon = True
+            thread.start()
+
+            return Response(
+                {
+                    "message": "Scalar prompts generation started",
+                    "project_id": str(project.project_id),
+                    "status": project.status,
+                },
+                status=status.HTTP_202_ACCEPTED,
+            )
+
+        except VibeExtractorProject.DoesNotExist:
+            return Response(
+                {"error": "Project not found"},
+                status=status.HTTP_404_NOT_FOUND,
+            )
+        except Exception as e:
+            return Response(
+                {"error": f"Generation failed: {str(e)}"},
+                status=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            )
+
+    @action(detail=True, methods=["post"])
+    def generate_table_prompts(self, request: Request, pk=None) -> Response:
+        """Generate table extraction prompts for a project.
+
+        Args:
+            request: HTTP request with metadata and extraction_yaml
+            pk: Project ID
+
+        Returns:
+            Response with generated prompts
+        """
+        serializer = VibeExtractorGenerateTablePromptsSerializer(data=request.data)
+        serializer.is_valid(raise_exception=True)
+
+        try:
+            project = self.get_object()
+            metadata = serializer.validated_data["metadata"]
+            extraction_yaml = serializer.validated_data["extraction_yaml"]
+
+            # Start generation in background
+            import threading
+
+            def run_generation():
+                """Run table prompts generation in background thread."""
+                try:
+                    GeneratorService.generate_table_extraction_prompts(
+                        project, metadata, extraction_yaml
+                    )
+                except Exception as e:
+                    import logging
+
+                    logger = logging.getLogger(__name__)
+                    logger.error(f"Background generation failed: {e}", exc_info=True)
+
+            thread = threading.Thread(target=run_generation)
+            thread.daemon = True
+            thread.start()
+
+            return Response(
+                {
+                    "message": "Table prompts generation started",
+                    "project_id": str(project.project_id),
+                    "status": project.status,
+                },
+                status=status.HTTP_202_ACCEPTED,
+            )
+
+        except VibeExtractorProject.DoesNotExist:
+            return Response(
+                {"error": "Project not found"},
+                status=status.HTTP_404_NOT_FOUND,
+            )
+        except Exception as e:
+            return Response(
+                {"error": f"Generation failed: {str(e)}"},
+                status=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            )
+
+    @action(detail=True, methods=["get"])
+    def read_file(self, request: Request, pk=None) -> Response:
+        """Read a generated file for a project.
+
+        Args:
+            request: HTTP request with file_type parameter
+            pk: Project ID
+
+        Returns:
+            Response with file content
+        """
+        file_type = request.query_params.get("file_type")
+        if not file_type:
+            return Response(
+                {"error": "file_type parameter is required"},
+                status=status.HTTP_400_BAD_REQUEST,
+            )
+
+        serializer = VibeExtractorFileReadSerializer(data={"file_type": file_type})
+        if not serializer.is_valid():
+            return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
+
+        try:
+            project = self.get_object()
+            content = VibeExtractorHelper.read_generated_file(project, file_type)
+
+            return Response(
+                {
+                    "file_type": file_type,
+                    "content": content,
+                    "project_id": str(project.project_id),
+                },
+                status=status.HTTP_200_OK,
+            )
+
+        except VibeExtractorProject.DoesNotExist:
+            return Response(
+                {"error": "Project not found"},
+                status=status.HTTP_404_NOT_FOUND,
+            )
+        except FileReadError as e:
+            return Response({"error": str(e)}, status=status.HTTP_404_NOT_FOUND)
+        except Exception as e:
+            return Response(
+                {"error": f"Failed to read file: {str(e)}"},
+                status=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            )
+
+    @action(detail=True, methods=["get"])
+    def list_files(self, request: Request, pk=None) -> Response:
+        """List all generated files for a project.
+
+        Args:
+            request: HTTP request
+            pk: Project ID
+
+        Returns:
+            Response with list of available files
+        """
+        try:
+            project = self.get_object()
+            output_path = VibeExtractorHelper.get_project_output_path(project)
+
+            files = []
+            file_types = [
+                "metadata",
+                "extraction",
+                "page_extraction_system",
+                "page_extraction_user",
+                "scalars_extraction_system",
+                "scalars_extraction_user",
+                "tables_extraction_system",
+                "tables_extraction_user",
+            ]
+
+            for file_type in file_types:
+                try:
+                    VibeExtractorHelper.read_generated_file(project, file_type)
+                    files.append({"file_type": file_type, "exists": True})
+                except FileReadError:
+                    files.append({"file_type": file_type, "exists": False})
+
+            return Response(
+                {
+                    "project_id": str(project.project_id),
+                    "files": files,
+                },
+                status=status.HTTP_200_OK,
+            )
+
+        except VibeExtractorProject.DoesNotExist:
+            return Response(
+                {"error": "Project not found"},
+                status=status.HTTP_404_NOT_FOUND,
+            )
+        except Exception as e:
+            return Response(
+                {"error": f"Failed to list files: {str(e)}"},
+                status=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            )
+
+    @action(detail=False, methods=["post"])
+    def guess_document_type(self, request: Request) -> Response:
+        """Guess document type from file content.
+
+        Args:
+            request: HTTP request with file_name and tool_id
+
+        Returns:
+            Response with guessed document type
+        """
+        serializer = VibeExtractorGuessDocumentTypeSerializer(data=request.data)
+        serializer.is_valid(raise_exception=True)
+
+        try:
+            file_name = serializer.validated_data["file_name"]
+            tool_id = serializer.validated_data["tool_id"]
+
+            # Call the helper to guess document type
+            result = VibeExtractorHelper.guess_document_type_from_file(
+                file_name=file_name,
+                tool_id=str(tool_id),
+                org_id=request.user.organization_id,
+                user_id=request.user.user_id,
+            )
+
+            if result.get("status") == "error":
+                return Response(
+                    {
+                        "error": result.get("error"),
+                        "raw_response": result.get("raw_response"),
+                        "attempted_json": result.get("attempted_json"),
+                        "partial_response": result.get("partial_response"),
+                    },
+                    status=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                )
+
+            return Response(
+                {
+                    "document_type": result.get("document_type"),
+                    "confidence": result.get("confidence"),
+                    "primary_indicators": result.get("primary_indicators", []),
+                    "document_category": result.get("document_category"),
+                    "alternative_types": result.get("alternative_types", []),
+                    "reasoning": result.get("reasoning"),
+                },
+                status=status.HTTP_200_OK,
+            )
+
+        except Exception as e:
+            return Response(
+                {"error": f"Failed to guess document type: {str(e)}"},
+                status=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            )
diff --git a/prompt-service/pyproject.toml b/prompt-service/pyproject.toml
index 09e41a8b2b..8d08387ff8 100644
--- a/prompt-service/pyproject.toml
+++ b/prompt-service/pyproject.toml
@@ -19,7 +19,11 @@ dependencies = [
     "redis>=5.0.3,<5.3",
     "unstract-core",
     "unstract-flags",
-    "unstract-sdk1[aws,gcs,azure]"
+    "unstract-sdk1[aws,gcs,azure]",
+    # Autogen packages for Vibe Extractor LLM generation
+    "autogen-core>=0.4.0",
+    "autogen-ext>=0.4.0",
+    "autogen-agentchat>=0.4.0",
 ]
 
 [tool.uv.sources]
diff --git a/prompt-service/src/unstract/prompt_service/services/vibe_extractor/README.md b/prompt-service/src/unstract/prompt_service/services/vibe_extractor/README.md
new file mode 100644
index 0000000000..0794d6a9a9
--- /dev/null
+++ b/prompt-service/src/unstract/prompt_service/services/vibe_extractor/README.md
@@ -0,0 +1,350 @@
+# Vibe Extractor Service
+
+The Vibe Extractor Service is an agentic system that automatically generates document extraction metadata, fields, and prompts using LLM technology. It follows the architecture and patterns from the `new_document_type_generator.py` reference implementation.
+
+## Overview
+
+This service generates all the necessary components for document extraction:
+- Document metadata (metadata.yaml)
+- Extraction fields (extraction.yaml)
+- Page extraction prompts (system and user)
+- Scalar extraction prompts (system and user)
+- Table extraction prompts (system and user)
+
+## Architecture
+
+### Components
+
+```
+vibe_extractor/
+├── __init__.py              # Package exports
+├── constants.py             # Bootstrap prompts and constants
+├── llm_helper.py           # LLM client initialization (using autogen-ext)
+├── generator.py            # Core generation logic
+├── service.py              # Service orchestration
+├── api_helper.py           # API integration helpers
+└── README.md               # This file
+```
+
+### LLM Adapter Pattern
+
+The service uses the autogen-ext library for LLM communication, making it easy to swap between different providers:
+
+- **OpenAI**: Standard OpenAI models
+- **Azure OpenAI**: Azure-hosted OpenAI models
+- **Anthropic**: Claude models
+- **Bedrock**: AWS Bedrock with Claude models
+
+This architecture is designed to be easily replaceable with the new autogen client when it becomes available.
+
+## Usage
+
+### Basic Usage
+
+```python
+from unstract.prompt_service.services.vibe_extractor.api_helper import (
+    generate_document_extraction_components
+)
+
+# Configure LLM
+llm_config = {
+    "adapter_id": "anthropic",
+    "model": "claude-3-5-sonnet-20241022",
+    "api_key": "sk-ant-...",
+    "temperature": 0.7,
+    "max_tokens": 4096
+}
+
+# Generate all components
+result = await generate_document_extraction_components(
+    doc_type="invoice",
+    output_dir="/path/to/output",
+    llm_config=llm_config
+)
+
+if result["status"] == "success":
+    print(f"Generated files at: {result['output_path']}")
+    print(f"Files: {result['files']}")
+else:
+    print(f"Error: {result['error']}")
+```
+
+### Backend Integration
+
+The backend integrates with this service through the `GeneratorService` class:
+
+```python
+from prompt_studio.prompt_studio_vibe_extractor_v2.services.generator_service import (
+    GeneratorService
+)
+
+# Generate all components for a project
+result = GeneratorService.generate_all(project)
+```
+
+## Configuration
+
+### Environment Variables
+
+For the backend to use this service, configure these environment variables:
+
+```bash
+# LLM Provider Configuration
+VIBE_EXTRACTOR_ADAPTER_ID=anthropic  # or openai, azureopenai, bedrock
+VIBE_EXTRACTOR_MODEL=claude-3-5-sonnet-20241022
+VIBE_EXTRACTOR_API_KEY=your-api-key-here
+VIBE_EXTRACTOR_TEMPERATURE=0.7
+VIBE_EXTRACTOR_MAX_TOKENS=4096
+
+# For Azure OpenAI
+VIBE_EXTRACTOR_API_BASE=https://your-resource.openai.azure.com/
+VIBE_EXTRACTOR_API_VERSION=2024-02-15-preview
+VIBE_EXTRACTOR_DEPLOYMENT=your-deployment-name
+
+# For AWS Bedrock
+VIBE_EXTRACTOR_AWS_ACCESS_KEY_ID=your-access-key
+VIBE_EXTRACTOR_AWS_SECRET_ACCESS_KEY=your-secret-key
+VIBE_EXTRACTOR_REGION_NAME=us-east-1
+```
+
+### Django Settings
+
+Alternatively, configure in Django settings.py:
+
+```python
+VIBE_EXTRACTOR_LLM_CONFIG = {
+    "adapter_id": "anthropic",
+    "model": "claude-3-5-sonnet-20241022",
+    "api_key": os.environ.get("ANTHROPIC_API_KEY"),
+    "temperature": 0.7,
+    "max_tokens": 4096,
+}
+```
+
+## API Endpoints
+
+### Backend API Endpoints
+
+#### Create Project
+```http
+POST /api/v1/vibe-extractor/
+Content-Type: application/json
+
+{
+  "document_type": "invoice",
+  "tool_id": "optional-tool-uuid"
+}
+```
+
+#### Generate Components
+```http
+POST /api/v1/vibe-extractor/{project_id}/generate/
+Content-Type: application/json
+
+{
+  "regenerate": false
+}
+```
+
+Response:
+```json
+{
+  "message": "Generation started",
+  "project_id": "uuid",
+  "status": "generating_metadata"
+}
+```
+
+#### Read Generated File
+```http
+GET /api/v1/vibe-extractor/{project_id}/read_file/?file_type=metadata
+```
+
+Response:
+```json
+{
+  "file_type": "metadata",
+  "content": "...",
+  "project_id": "uuid"
+}
+```
+
+Supported file types:
+- `metadata`: metadata.yaml
+- `extraction`: extraction.yaml
+- `page_extraction_system`: Page extraction system prompt
+- `page_extraction_user`: Page extraction user prompt
+- `scalars_extraction_system`: Scalar extraction system prompt
+- `scalars_extraction_user`: Scalar extraction user prompt
+- `tables_extraction_system`: Table extraction system prompt
+- `tables_extraction_user`: Table extraction user prompt
+
+#### List Generated Files
+```http
+GET /api/v1/vibe-extractor/{project_id}/list_files/
+```
+
+Response:
+```json
+{
+  "project_id": "uuid",
+  "files": [
+    {"file_type": "metadata", "exists": true},
+    {"file_type": "extraction", "exists": true},
+    ...
+  ]
+}
+```
+
+## Generation Steps
+
+The service generates components in the following sequence:
+
+1. **Generate Metadata** (`generating_metadata`)
+   - Creates metadata.yaml with document type information
+   - Includes name, description, tags, version, etc.
+
+2. **Generate Extraction Fields** (`generating_fields`)
+   - Creates extraction.yaml with field definitions
+   - Includes scalar fields and list/table fields
+
+3. **Generate Page Extraction Prompts** (`generating_prompts`)
+   - System prompt for page relevance detection
+   - User prompt for page analysis
+
+4. **Generate Scalar Extraction Prompts**
+   - System prompt for scalar field extraction
+   - User prompt for scalar extraction
+
+5. **Generate Table Extraction Prompts**
+   - System prompt for table/list extraction
+   - User prompt for table extraction
+
+Each step updates the project status and progress tracking.
+
+## Progress Tracking
+
+The service provides progress callbacks to track generation:
+
+```python
+def progress_callback(step: str, status: str, message: str = ""):
+    print(f"Step: {step}, Status: {status}, Message: {message}")
+
+result = await service.generate_all(
+    doc_type="invoice",
+    reference_template=template,
+    progress_callback=progress_callback
+)
+```
+
+## Error Handling
+
+The service includes comprehensive error handling:
+
+- Invalid LLM configuration
+- API failures
+- File I/O errors
+- Invalid document types
+- Generation failures
+
+All errors are logged and returned with descriptive messages.
+
+## Testing
+
+### Manual Testing
+
+1. Create a project:
+```bash
+curl -X POST http://localhost:8000/api/v1/vibe-extractor/ \
+  -H "Content-Type: application/json" \
+  -d '{"document_type": "invoice"}'
+```
+
+2. Start generation:
+```bash
+curl -X POST http://localhost:8000/api/v1/vibe-extractor/{project_id}/generate/ \
+  -H "Content-Type: application/json" \
+  -d '{}'
+```
+
+3. Check status:
+```bash
+curl http://localhost:8000/api/v1/vibe-extractor/{project_id}/
+```
+
+4. Read generated files:
+```bash
+curl http://localhost:8000/api/v1/vibe-extractor/{project_id}/read_file/?file_type=metadata
+```
+
+## Future Enhancements
+
+### Autogen Client Migration
+
+The current implementation uses autogen-ext for LLM communication. When the new autogen client is ready, migration will be straightforward:
+
+1. Update `llm_helper.py` to use the new autogen client
+2. Update `generate_with_llm()` function
+3. No changes needed in `generator.py` or `service.py`
+
+### Celery Integration
+
+For production deployments, replace the threading-based background processing with Celery:
+
+```python
+from celery import shared_task
+
+@shared_task
+def generate_components_task(project_id):
+    project = VibeExtractorProject.objects.get(project_id=project_id)
+    return GeneratorService.generate_all(project)
+```
+
+### Caching
+
+Add caching for reference templates and frequently used prompts to improve performance.
+
+## Troubleshooting
+
+### Import Errors
+
+If you see import errors, ensure the prompt-service is properly installed:
+```bash
+cd prompt-service
+pip install -e .
+```
+
+### LLM Configuration Errors
+
+Verify your LLM configuration:
+```python
+from unstract.prompt_service.services.vibe_extractor.api_helper import (
+    validate_llm_config
+)
+
+is_valid, error = validate_llm_config(llm_config)
+if not is_valid:
+    print(f"Configuration error: {error}")
+```
+
+### Generation Failures
+
+Check the logs for detailed error messages:
+```bash
+tail -f /path/to/logs/django.log
+```
+
+## Code Style
+
+The implementation follows Unstract coding standards:
+- Type hints for all function parameters and returns
+- Comprehensive docstrings
+- Error handling and logging
+- Consistent naming conventions
+- Clean separation of concerns
+
+## References
+
+- Reference Implementation: `/home/harini/Documents/Workspace/unstract-omniparse-studio/tools/new_document_type_generator.py`
+- Rentroll Service (Adapter Pattern): `/home/harini/Documents/Workspace/unstract-cloud/rentroll-service/`
+- Backend Models: `backend/prompt_studio/prompt_studio_vibe_extractor_v2/models.py`
diff --git a/prompt-service/src/unstract/prompt_service/services/vibe_extractor/__init__.py b/prompt-service/src/unstract/prompt_service/services/vibe_extractor/__init__.py
new file mode 100644
index 0000000000..459083d182
--- /dev/null
+++ b/prompt-service/src/unstract/prompt_service/services/vibe_extractor/__init__.py
@@ -0,0 +1,12 @@
+"""Vibe Extractor service for generating document extraction prompts."""
+
+from .generator import VibeExtractorGenerator
+from .llm_helper import generate_with_llm, get_llm_client
+from .service import VibeExtractorService
+
+__all__ = [
+    "VibeExtractorGenerator",
+    "VibeExtractorService",
+    "get_llm_client",
+    "generate_with_llm",
+]
diff --git a/prompt-service/src/unstract/prompt_service/services/vibe_extractor/api_helper.py b/prompt-service/src/unstract/prompt_service/services/vibe_extractor/api_helper.py
new file mode 100644
index 0000000000..0640356fd3
--- /dev/null
+++ b/prompt-service/src/unstract/prompt_service/services/vibe_extractor/api_helper.py
@@ -0,0 +1,553 @@
+"""API Helper for Vibe Extractor.
+
+This module provides helper functions for backend API integration.
+"""
+
+import asyncio
+import logging
+from typing import Any
+
+from .service import VibeExtractorService
+
+logger = logging.getLogger(__name__)
+
+
+def _run_async(coro):
+    """Helper to run async coroutines in sync context.
+
+    Args:
+        coro: Coroutine to run
+
+    Returns:
+        Result of the coroutine
+    """
+    try:
+        loop = asyncio.get_event_loop()
+        if loop.is_running():
+            # If loop is running, create a new one
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+            result = loop.run_until_complete(coro)
+            return result
+        else:
+            return loop.run_until_complete(coro)
+    except RuntimeError:
+        # No event loop, create new one
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        result = loop.run_until_complete(coro)
+        return result
+
+
+def generate_document_extraction_components_sync(
+    doc_type: str,
+    output_dir: str,
+    llm_config: Dict[str, Any],
+    reference_template: Optional[str] = None,
+    progress_callback: Optional[callable] = None,
+) -> Dict[str, Any]:
+    """Generate all document extraction components (sync version).
+
+    This is the main entry point for backend API to trigger generation.
+
+    Args:
+        doc_type: Document type name (e.g., "invoice", "receipt")
+        output_dir: Base output directory for generated files
+        llm_config: LLM configuration dictionary
+        reference_template: Optional reference metadata.yaml template content
+        progress_callback: Optional callback function(step, status, message)
+
+    Returns:
+        Dictionary containing generation result
+    """
+    return _run_async(
+        generate_document_extraction_components_async(
+            doc_type, output_dir, llm_config, reference_template, progress_callback
+        )
+    )
+
+
+async def generate_document_extraction_components_async(
+    doc_type: str,
+    output_dir: str,
+    llm_config: Dict[str, Any],
+    reference_template: Optional[str] = None,
+    progress_callback: Optional[callable] = None,
+) -> Dict[str, Any]:
+    """Generate all document extraction components.
+
+    This is the main entry point for backend API to trigger generation.
+
+    Args:
+        doc_type: Document type name (e.g., "invoice", "receipt")
+        output_dir: Base output directory for generated files
+        llm_config: LLM configuration dictionary containing:
+            - adapter_id: Provider (openai, anthropic, bedrock, azureopenai)
+            - model: Model name
+            - api_key: API key
+            - temperature: Temperature (default: 0.7)
+            - max_tokens: Max tokens (default: 4096)
+        reference_template: Optional reference metadata.yaml template content.
+            If not provided, a default template will be used.
+        progress_callback: Optional callback function(step, status, message)
+            to report generation progress
+
+    Returns:
+        Dictionary containing:
+            - status: "success" or "error"
+            - output_path: Path to generated files
+            - files: Dictionary of generated file paths
+            - error: Error message if status is "error"
+
+    Example:
+        ```python
+        llm_config = {
+            "adapter_id": "anthropic",
+            "model": "claude-3-5-sonnet-20241022",
+            "api_key": "sk-ant-...",
+            "temperature": 0.7,
+            "max_tokens": 4096,
+        }
+
+        result = await generate_document_extraction_components(
+            doc_type="invoice", output_dir="/path/to/output", llm_config=llm_config
+        )
+
+        if result["status"] == "success":
+            print(f"Generated files at: {result['output_path']}")
+            print(f"Files: {result['files']}")
+        else:
+            print(f"Error: {result['error']}")
+        ```
+    """
+    try:
+        # Use default reference template if not provided
+        if reference_template is None:
+            reference_template = _get_default_reference_template()
+
+        # Initialize service
+        service = VibeExtractorService(llm_config, output_dir)
+
+        # Generate all components
+        result = await service.generate_all(
+            doc_type, reference_template, progress_callback
+        )
+
+        return result
+
+    except Exception as e:
+        error_msg = f"Error in generate_document_extraction_components: {str(e)}"
+        logger.error(error_msg, exc_info=True)
+        return {"status": "error", "error": error_msg}
+
+
+def generate_metadata_only_sync(
+    doc_type: str,
+    llm_config: Dict[str, Any],
+    reference_template: Optional[str] = None,
+) -> Dict[str, Any]:
+    """Generate only metadata for a document type (sync version).
+
+    Args:
+        doc_type: Document type name
+        llm_config: LLM configuration dictionary
+        reference_template: Optional reference template
+
+    Returns:
+        Dictionary containing generated metadata or error
+    """
+    return _run_async(
+        generate_metadata_only_async(doc_type, llm_config, reference_template)
+    )
+
+
+async def generate_metadata_only_async(
+    doc_type: str,
+    llm_config: Dict[str, Any],
+    reference_template: Optional[str] = None,
+) -> Dict[str, Any]:
+    """Generate only metadata for a document type.
+
+    Args:
+        doc_type: Document type name
+        llm_config: LLM configuration dictionary
+        reference_template: Optional reference template
+
+    Returns:
+        Dictionary containing generated metadata or error
+    """
+    try:
+        if reference_template is None:
+            reference_template = _get_default_reference_template()
+
+        # Initialize service with temporary output dir
+        service = VibeExtractorService(llm_config, "/tmp/vibe_extractor")
+
+        result = await service.generate_metadata_only(doc_type, reference_template)
+        return result
+
+    except Exception as e:
+        error_msg = f"Error generating metadata: {str(e)}"
+        logger.error(error_msg)
+        return {"status": "error", "error": error_msg}
+
+
+def generate_extraction_fields_only_sync(
+    doc_type: str,
+    metadata: Dict[str, Any],
+    llm_config: Dict[str, Any],
+) -> Dict[str, Any]:
+    """Generate only extraction fields for a document type (sync version).
+
+    Args:
+        doc_type: Document type name
+        metadata: Metadata dictionary
+        llm_config: LLM configuration dictionary
+
+    Returns:
+        Dictionary containing extraction YAML or error
+    """
+    return _run_async(
+        generate_extraction_fields_only_async(doc_type, metadata, llm_config)
+    )
+
+
+async def generate_extraction_fields_only_async(
+    doc_type: str,
+    metadata: Dict[str, Any],
+    llm_config: Dict[str, Any],
+) -> Dict[str, Any]:
+    """Generate only extraction fields for a document type.
+
+    Args:
+        doc_type: Document type name
+        metadata: Metadata dictionary
+        llm_config: LLM configuration dictionary
+
+    Returns:
+        Dictionary containing extraction YAML or error
+    """
+    try:
+        # Initialize service with temporary output dir
+        service = VibeExtractorService(llm_config, "/tmp/vibe_extractor")
+
+        result = await service.generate_extraction_fields_only(doc_type, metadata)
+        return result
+
+    except Exception as e:
+        error_msg = f"Error generating extraction fields: {str(e)}"
+        logger.error(error_msg)
+        return {"status": "error", "error": error_msg}
+
+
+def _get_default_reference_template() -> str:
+    """Get default reference metadata.yaml template.
+
+    Returns:
+        Default reference template as string
+    """
+    return """---
+name_identifier: example  # Unique identifier
+name: Example Document  # Human-readable name
+description: |  # Description of the document type
+  Example document description.
+  This should be 3-4 sentences explaining what this document type is.
+description_seo: |  # SEO optimized description
+  SEO optimized description for example document.
+html_meta_description: |  # HTML meta description
+  HTML meta description for example document.
+tags:  # List of tags
+  - example
+  - document
+  - sample
+version: 1.0.0  # Version
+status: beta  # Current status
+visibility: public  # Visibility
+author: Zipstack Inc  # Author
+release_date: 2025-07-01  # Release date
+price_multiplier: 1.0  # Price multiplier
+llm_model: claude-sonnet-1-7  # LLM model
+extraction_features:  # Extraction features
+  locate_pages: true
+  rolling_window: false
+  challenge: false
+"""
+
+
+def generate_page_extraction_prompts_sync(
+    doc_type: str,
+    metadata: Dict[str, Any],
+    llm_config: Dict[str, Any],
+) -> Dict[str, Any]:
+    """Generate page extraction prompts (sync version).
+
+    Args:
+        doc_type: Document type name
+        metadata: Metadata dictionary
+        llm_config: LLM configuration dictionary
+
+    Returns:
+        Dictionary containing system and user prompts or error
+    """
+    return _run_async(
+        generate_page_extraction_prompts_async(doc_type, metadata, llm_config)
+    )
+
+
+async def generate_page_extraction_prompts_async(
+    doc_type: str,
+    metadata: Dict[str, Any],
+    llm_config: Dict[str, Any],
+) -> Dict[str, Any]:
+    """Generate page extraction prompts (system and user).
+
+    Args:
+        doc_type: Document type name
+        metadata: Metadata dictionary
+        llm_config: LLM configuration dictionary
+
+    Returns:
+        Dictionary containing system and user prompts or error
+    """
+    try:
+        # Initialize service with temporary output dir
+        service = VibeExtractorService(llm_config, "/tmp/vibe_extractor")
+
+        # Generate both prompts
+        page_system_prompt = (
+            await service.generator.generate_page_extraction_system_prompt(
+                doc_type, metadata
+            )
+        )
+        page_user_prompt = await service.generator.generate_page_extraction_user_prompt(
+            doc_type, metadata
+        )
+
+        return {
+            "status": "success",
+            "system_prompt": page_system_prompt,
+            "user_prompt": page_user_prompt,
+        }
+
+    except Exception as e:
+        error_msg = f"Error generating page extraction prompts: {str(e)}"
+        logger.error(error_msg)
+        return {"status": "error", "error": error_msg}
+
+
+def generate_scalar_extraction_prompts_sync(
+    doc_type: str,
+    metadata: Dict[str, Any],
+    extraction_yaml: str,
+    llm_config: Dict[str, Any],
+) -> Dict[str, Any]:
+    """Generate scalar extraction prompts (sync version).
+
+    Args:
+        doc_type: Document type name
+        metadata: Metadata dictionary
+        extraction_yaml: Extraction YAML content
+        llm_config: LLM configuration dictionary
+
+    Returns:
+        Dictionary containing system and user prompts or error
+    """
+    return _run_async(
+        generate_scalar_extraction_prompts_async(
+            doc_type, metadata, extraction_yaml, llm_config
+        )
+    )
+
+
+async def generate_scalar_extraction_prompts_async(
+    doc_type: str,
+    metadata: Dict[str, Any],
+    extraction_yaml: str,
+    llm_config: Dict[str, Any],
+) -> Dict[str, Any]:
+    """Generate scalar extraction prompts (system and user).
+
+    Args:
+        doc_type: Document type name
+        metadata: Metadata dictionary
+        extraction_yaml: Extraction YAML content
+        llm_config: LLM configuration dictionary
+
+    Returns:
+        Dictionary containing system and user prompts or error
+    """
+    try:
+        # Initialize service with temporary output dir
+        service = VibeExtractorService(llm_config, "/tmp/vibe_extractor")
+
+        # Generate both prompts
+        scalar_system_prompt = (
+            await service.generator.generate_scalar_extraction_system_prompt(
+                doc_type, metadata, extraction_yaml
+            )
+        )
+        scalar_user_prompt = (
+            await service.generator.generate_scalar_extraction_user_prompt(
+                doc_type, metadata
+            )
+        )
+
+        return {
+            "status": "success",
+            "system_prompt": scalar_system_prompt,
+            "user_prompt": scalar_user_prompt,
+        }
+
+    except Exception as e:
+        error_msg = f"Error generating scalar extraction prompts: {str(e)}"
+        logger.error(error_msg)
+        return {"status": "error", "error": error_msg}
+
+
+def generate_table_extraction_prompts_sync(
+    doc_type: str,
+    metadata: Dict[str, Any],
+    extraction_yaml: str,
+    llm_config: Dict[str, Any],
+) -> Dict[str, Any]:
+    """Generate table extraction prompts (sync version).
+
+    Args:
+        doc_type: Document type name
+        metadata: Metadata dictionary
+        extraction_yaml: Extraction YAML content
+        llm_config: LLM configuration dictionary
+
+    Returns:
+        Dictionary containing system and user prompts or error
+    """
+    return _run_async(
+        generate_table_extraction_prompts_async(
+            doc_type, metadata, extraction_yaml, llm_config
+        )
+    )
+
+
+async def generate_table_extraction_prompts_async(
+    doc_type: str,
+    metadata: Dict[str, Any],
+    extraction_yaml: str,
+    llm_config: Dict[str, Any],
+) -> Dict[str, Any]:
+    """Generate table extraction prompts (system and user).
+
+    Args:
+        doc_type: Document type name
+        metadata: Metadata dictionary
+        extraction_yaml: Extraction YAML content
+        llm_config: LLM configuration dictionary
+
+    Returns:
+        Dictionary containing system and user prompts or error
+    """
+    try:
+        # Initialize service with temporary output dir
+        service = VibeExtractorService(llm_config, "/tmp/vibe_extractor")
+
+        # Generate both prompts
+        table_system_prompt = (
+            await service.generator.generate_table_extraction_system_prompt(
+                doc_type, metadata, extraction_yaml
+            )
+        )
+        table_user_prompt = await service.generator.generate_table_extraction_user_prompt(
+            doc_type, metadata
+        )
+
+        return {
+            "status": "success",
+            "system_prompt": table_system_prompt,
+            "user_prompt": table_user_prompt,
+        }
+
+    except Exception as e:
+        error_msg = f"Error generating table extraction prompts: {str(e)}"
+        logger.error(error_msg)
+        return {"status": "error", "error": error_msg}
+
+
+def validate_llm_config(llm_config: Dict[str, Any]) -> tuple[bool, Optional[str]]:
+    """Validate LLM configuration.
+
+    Args:
+        llm_config: LLM configuration dictionary
+
+    Returns:
+        Tuple of (is_valid, error_message)
+    """
+    required_fields = ["adapter_id", "model", "api_key"]
+
+    for field in required_fields:
+        if field not in llm_config:
+            return False, f"Missing required field: {field}"
+
+    valid_adapters = ["openai", "azureopenai", "anthropic", "bedrock"]
+    if llm_config["adapter_id"] not in valid_adapters:
+        return (
+            False,
+            f"Invalid adapter_id: {llm_config['adapter_id']}. "
+            f"Must be one of: {', '.join(valid_adapters)}",
+        )
+
+    return True, None
+
+
+def guess_document_type_sync(
+    file_content: str,
+    llm_config: Dict[str, Any],
+) -> Dict[str, Any]:
+    """Guess document type from file content (sync version).
+
+    Args:
+        file_content: Extracted text content from the document
+        llm_config: LLM configuration dictionary
+
+    Returns:
+        Dictionary containing guessed document type or error
+    """
+    return _run_async(guess_document_type_async(file_content, llm_config))
+
+
+async def guess_document_type_async(
+    file_content: str,
+    llm_config: Dict[str, Any],
+) -> Dict[str, Any]:
+    """Guess document type from file content using LLM.
+
+    Args:
+        file_content: Extracted text content from the document
+        llm_config: LLM configuration dictionary
+
+    Returns:
+        Dictionary containing:
+            - status: "success" or "error"
+            - document_type: Guessed document type (if success)
+            - confidence: Confidence description (if applicable)
+            - error: Error message (if error)
+    """
+    try:
+        # Validate LLM config
+        is_valid, error_msg = validate_llm_config(llm_config)
+        if not is_valid:
+            return {"status": "error", "error": error_msg}
+
+        # Import LLM helper
+        from .llm_helper import guess_document_type_with_llm
+
+        # Call LLM helper to guess document type
+        result = await guess_document_type_with_llm(
+            file_content=file_content,
+            llm_config=llm_config,
+        )
+
+        return result
+
+    except Exception as e:
+        error_msg = f"Error guessing document type: {str(e)}"
+        logger.error(error_msg, exc_info=True)
+        return {"status": "error", "error": error_msg}
diff --git a/prompt-service/src/unstract/prompt_service/services/vibe_extractor/constants.py b/prompt-service/src/unstract/prompt_service/services/vibe_extractor/constants.py
new file mode 100644
index 0000000000..27aa8f3118
--- /dev/null
+++ b/prompt-service/src/unstract/prompt_service/services/vibe_extractor/constants.py
@@ -0,0 +1,396 @@
+"""Constants for Vibe Extractor generation service."""
+
+
+class VibeExtractorBootstrapPrompts:
+    """Bootstrap prompts for generating document extraction components."""
+
+    DOCUMENT_METADATA = """Generate metadata for a document type called "{doc_type}".
+Based on your knowledge of this document type, provide all the fields shown in the reference template below.
+Focus on generating appropriate values for:
+1. name_identifier (lowercase, hyphens instead of spaces)
+2. name (human-readable name)
+3. description (3-4 sentences explaining what this document type is)
+4. description_seo (SEO-optimized version of description)
+5. html_meta_description (HTML meta description)
+6. tags (3-6 relevant tags)
+7. status (typically "beta" for new document types)
+8. visibility (typically "public")
+IMPORTANT: For multiline text fields (description, description_seo, html_meta_description), use the YAML pipe syntax (|) to properly format multiline content. For example:
+description: |
+  This is a multiline description
+  that spans multiple lines
+  and maintains proper formatting.
+Use the reference template structure but adapt the content for "{doc_type}":
+{reference_template}
+Return your response as a YAML structure matching the exact format above, but with content appropriate for "{doc_type}".
+Make sure to use the pipe syntax (|) for all description fields.
+Only return the YAML structure, no additional text."""
+
+    DOCUMENT_EXTRACTION_FIELDS = """Generate an extraction.yaml structure for document type: "{doc_type}".
+Document description: {metadata_description}
+Create a YAML structure that defines the fields to extract from this document type.
+Follow these IMPORTANT rules:
+1. Include all relevant fields that would typically be found in a {doc_type}
+2. Use descriptive field names with underscores (e.g., invoice_number, customer_name)
+3. Add comments after each field using # to describe what it extracts
+4. **CRITICAL**: List type fields should and can ONLY be one level deep
+5. Use List ONLY for items that are actual lists in the document (e.g., line_items, taxes, discounts)
+6. Do NOT generate nested items or objects in the extraction YAML file
+7. The extraction YAML should ONLY contain:
+   - Scalar items: items with single values (e.g., invoice_number, date, total_amount, vendor_name)
+   - List type items: items that are lists/arrays (e.g., line_items, taxes, discounts)
+8. List items should be one level deep with sub-fields, but no nested objects
+9. Include both scalar fields and list fields where appropriate for {doc_type} documents
+Example format:
+# Scalar items (single values)
+field_name: <placeholder>  # Description of field
+another_scalar: <placeholder>  # Description of another scalar field
+# List items (one level deep only)
+list_field:  # Description of list
+  - sub_field: <placeholder>  # Description
+    another_field: <placeholder>  # Description
+IMPORTANT: Do NOT create nested objects or multi-level lists. Keep it simple:
+- Scalar items for single values
+- List items for arrays/lists (one level deep only)
+Generate a comprehensive extraction structure for {doc_type} documents.
+Return ONLY the YAML structure, no additional text."""
+
+    PAGE_EXTRACTION_SYSTEM = """Generate a system prompt for page extraction for document type: "{doc_type}".
+Document description: {metadata_description}
+Context: Some documents may have many pages of irrelevant data. The LLM needs to identify
+pages that contain relevant data for this document type.
+The LLM will be given a page of the document (including bottom half of previous page and
+top half of next page for context). The LLM must decide whether the page contains relevant
+data and respond with only "yes" or "no".
+Generate a system prompt that:
+1. Explains what this document type is
+2. Describes what relevant data looks like for this document type
+3. Lists what irrelevant data might be present
+4. Provides clear instructions to respond only with "yes" or "no"
+5. Gives examples of what to look for
+Make the prompt comprehensive but concise. Focus on the specific characteristics of {doc_type} documents."""
+
+    PAGE_EXTRACTION_USER = """Generate a user prompt for page extraction for document type: "{doc_type}".
+Document description: {metadata_description}
+Context: This is the user prompt that will be sent along with the system prompt. The user
+will provide a page of the document (including bottom half of previous page and top half
+of next page for context). The LLM must decide whether the page contains relevant data
+and respond with only "yes" or "no".
+Generate a concise user prompt that:
+1. Asks the LLM to analyze the provided page
+2. Reminds the LLM to look for relevant {doc_type} data
+3. Instructs to respond with only "yes" or "no"
+Keep it short and direct - this will be used as a template for each page analysis."""
+
+    SCALARS_EXTRACTION_SYSTEM = """Generate a system prompt for scalar field extraction for document type: "{doc_type}".
+Document description: {metadata_description}
+Context: The LLM needs to extract scalar values from the document. Each line in the document
+is numbered in hexadecimal format (0x0001, 0x0002, etc.). The LLM must extract values and
+their line numbers.
+The prompt must:
+1. Have dedicated section with exact format:
+   ## Extraction Items
+   ```yaml
+   {{{{extraction_items}}}}
+   ```
+2. Use the handlebars variable only once in the prompt, refer to "## Extraction Items" section elsewhere
+3. Have a section called "## Expected Variations of requested to available items" that lists possible variations of the scalar items based on the document type
+4. Instruct to extract ONLY from the provided document (no prior knowledge)
+5. Require ALL fields in output (use null if not found)
+6. Include line numbers for each extracted value (format: _line_number_fieldname)
+7. Output ONLY YAML format, no other text
+8. Handle {doc_type}-specific extraction challenges
+9. **CRITICAL**: Emphasize that the LLM must NOT perform any arithmetic operations, calculations, or other operations on values. Extract values exactly as they appear in the document. If a calculated field is required but not present in the document, it should be set to null.
+Example output format (showing extracted values, not field names):
+field_name: "extracted value from document"
+_line_number_field_name: 0x0002
+missing_field: null
+_line_number_missing_field: null
+Example scalar fields: {scalar_fields}
+Generate a comprehensive system prompt for scalar extraction with:
+1. Dedicated section using exact format:
+   ## Extraction Items
+   ```yaml
+   {{{{extraction_items}}}}
+   ```
+2. Expected Variations section with {doc_type}-specific field variations"""
+
+    SCALARS_EXTRACTION_USER = """Generate a concise user prompt for scalar field extraction for document type: "{doc_type}".
+The user prompt should be very simple and direct. It should:
+1. Ask the LLM to extract the specified fields from the document
+2. Remind to follow the system instructions for format and line numbers
+3. Be very brief - just 1-2 sentences
+4. Not repeat detailed instructions (those are in the system prompt)
+The prompt should be something like:
+"Extract the specified fields from this {doc_type} document following the format requirements."
+Generate a very concise user prompt."""
+
+    TABLES_EXTRACTION_SYSTEM = """Generate a system prompt for table/list extraction for document type: "{doc_type}".
+Document description: {metadata_description}
+Context: The LLM needs to extract table/list data in TSV format. Tables can span multiple pages,
+have multi-line cells, and sometimes what appears to be a table is actually a simple list.
+The prompt must:
+1. Have dedicated section with exact format:
+   ## Extraction Items
+   ```yaml
+   {{{{extraction_items}}}}
+   ```
+2. Use the handlebars variable only once in the prompt, refer to "## Extraction Items" section elsewhere
+3. Have a section called "## Expected Variations of requested to available items" that lists possible variations of the table items based on the document type
+4. Handle rolling window documents (partial pages)
+5. Handle tables spanning multiple pages with headers/footers
+6. Handle multi-line cell content
+7. Distinguish between tables and simple lists
+8. Extract ONLY from provided document (no prior knowledge)
+9. Include line numbers for each row (format: _line_no column)
+10. Output TSV format with headers
+11. Handle {doc_type}-specific table structures
+12. **CRITICAL**: Emphasize that the LLM must NOT perform any arithmetic operations, calculations, or other operations on values. Extract values exactly as they appear in the document. If a calculated field is required but not present in the document, it should be set to null.
+13. If the table is not present in the document, return an empty TSV file with header only
+14. Output ONLY TSV format with no explanations, commentary, or other text
+Include these specific examples in the prompt (use \\t to represent tabs in examples):
+TYPE 1 - Normal tables example:
+Document:
+```
+0x0001:
+0x0002: No       Description          Unit     Discount
+0x0004:                               Cost
+0x0005: 1        Item 1              100.00    10.00
+0x0006: 2        Item 2              200.00    20.00
+0x0007: 3        Item 3              300.00    30.00
+```
+Note: "Unit Cost" spans two lines.
+Output should be:
+```tsv
+_line_no\\tline_item_no\\tdescription\\tunit_cost\\tdiscount_percentage
+0x0005\\t1\\tItem 1\\t100.00\\t10.00
+0x0006\\t2\\tItem 2\\t200.00\\t20.00
+0x0007\\t3\\tItem 3\\t300.00\\t30.00
+```
+TYPE 2 - Simple list example:
+Document:
+```
+0x0001:
+0x0002: Special instructions:
+0x0003: • Item 1
+0x0004: • Item 2
+0x0005: • Item 3
+```
+Output should be:
+```tsv
+_line_no\\titem
+0x0003\\tItem 1
+0x0004\\tItem 2
+0x0005\\tItem 3
+```
+Generate a comprehensive system prompt for table/list extraction with:
+1. Dedicated section using exact format:
+   ## Extraction Items
+   ```yaml
+   {{{{extraction_items}}}}
+   ```
+2. Expected Variations section with {doc_type}-specific field variations
+3. Include these examples with \\t notation"""
+
+    TABLES_EXTRACTION_USER = """Generate a concise user prompt for table extraction for document type: "{doc_type}".
+The user prompt should be very simple and direct. It should:
+1. Ask the LLM to extract the specified table/list from the document
+2. Remind to follow the system instructions for TSV format
+3. Be very brief - just 1-2 sentences
+4. Not repeat detailed instructions (those are in the system prompt)
+The prompt should be something like:
+"Extract the table if it is present. If there is no matching table, reply No table found."
+Generate a very concise user prompt."""
+
+    DOCUMENT_TYPE_IDENTIFICATION = """You are an expert document analyzer. Your task is to identify the type of document based on its content.
+
+Analyze the provided document content carefully and identify its type with high accuracy.
+
+## Document Analysis Guidelines
+
+When analyzing the document, look for these key indicators:
+
+### 1. Structural Elements
+- Headers, footers, and watermarks
+- Document layout and formatting
+- Presence of logos or official seals
+- Table structures and data organization
+- Section headings and labels
+
+### 2. Content Markers
+- Specific terminology and jargon
+- Date formats and references
+- Monetary values and calculations
+- Legal or regulatory language
+- Contact information and addresses
+
+### 3. Functional Purpose
+- What is the primary purpose of this document?
+- Who are the typical stakeholders (issuer, recipient)?
+- What transaction or process does it document?
+- What obligations or information does it convey?
+
+## Common Document Types
+
+Consider these common business document categories:
+
+**Financial Documents:**
+- Invoice: Itemized bill for goods/services with payment terms, invoice number, vendor details
+- Receipt: Proof of payment showing transaction details, payment method, timestamp
+- Purchase Order: Request to purchase goods/services with PO number, quantities, pricing
+- Credit Note: Document issued for refunds or corrections to invoices
+- Debit Note: Document for additional charges or corrections
+- Bill of Lading: Shipping document detailing goods being transported
+- Packing Slip: List of items included in a shipment
+- Delivery Note: Confirmation of goods delivered
+- Statement of Account: Summary of transactions over a period
+- Payment Voucher: Authorization for payment
+
+**Banking Documents:**
+- Bank Statement: Record of account transactions over a period
+- Check/Cheque: Payment instrument drawn on a bank account
+- Deposit Slip: Record of funds deposited into account
+- Wire Transfer: Electronic fund transfer documentation
+- Letter of Credit: Bank guarantee for international trade
+
+**Payroll & Employment:**
+- Pay Stub/Payslip: Earnings statement showing salary breakdown
+- W-2 Form: Annual wage and tax statement (US)
+- Employment Contract: Agreement between employer and employee
+- Offer Letter: Job offer with terms and conditions
+- Timesheet: Record of hours worked
+
+**Tax & Compliance:**
+- Tax Form (W-9, 1099, 1040, etc.): Various tax-related forms
+- Tax Invoice: Invoice showing tax breakdown (VAT, GST, sales tax)
+- Tax Return: Annual tax filing document
+- Customs Declaration: Import/export declaration
+
+**Healthcare:**
+- Medical Record: Patient medical history and treatment notes
+- Prescription: Medication authorization from healthcare provider
+- Lab Report: Medical test results and findings
+- Insurance Claim: Request for insurance coverage/reimbursement
+- EOB (Explanation of Benefits): Insurance payment explanation
+- Medical Bill/Invoice: Healthcare services billing
+
+**Legal Documents:**
+- Contract/Agreement: Legal binding agreement between parties
+- NDA (Non-Disclosure Agreement): Confidentiality agreement
+- Power of Attorney: Legal authorization document
+- Certificate (Birth, Death, Marriage, etc.): Official certification
+- License/Permit: Official authorization or permission
+- Lease Agreement: Property rental contract
+- Deed: Property ownership transfer document
+
+**Shipping & Logistics:**
+- Shipping Label: Package destination and tracking information
+- Air Waybill: Air cargo shipping document
+- Commercial Invoice: International trade invoice
+- Certificate of Origin: Document certifying product origin
+- Customs Invoice: Invoice for customs clearance
+
+**HR & Administrative:**
+- Application Form: Form for requesting service or admission
+- Resume/CV: Career and qualifications summary
+- Reference Letter: Professional or character reference
+- Resignation Letter: Notice of employment termination
+- Performance Review: Employee evaluation document
+
+**Correspondence:**
+- Business Letter: Formal business correspondence
+- Memo: Internal communication document
+- Notice: Formal announcement or notification
+- Minutes of Meeting: Record of meeting proceedings
+
+**Reports:**
+- Business Report: Analysis or summary of business matters
+- Financial Report: Financial performance analysis
+- Audit Report: Financial or operational audit findings
+- Technical Report: Technical analysis or specifications
+- Research Report: Research findings and analysis
+
+**Other:**
+- Warranty: Product or service guarantee
+- Manual: Instruction or user guide
+- Catalog: Product or service listings
+- Brochure: Marketing or information material
+- Quote/Estimate: Price proposal for goods/services
+- RFP (Request for Proposal): Solicitation for vendor proposals
+- Inventory List: Stock or asset listing
+
+## Response Format
+
+After analyzing the document, respond with **ONLY** a valid JSON object in this exact format:
+
+```json
+{{
+    "document_type": "identified-document-type",
+    "confidence": "high|medium|low",
+    "primary_indicators": [
+        "specific indicator 1 that led to identification",
+        "specific indicator 2 that led to identification",
+        "specific indicator 3 that led to identification"
+    ],
+    "document_category": "category of the document",
+    "alternative_types": [
+        "possible alternative document type if confidence is not high"
+    ],
+    "reasoning": "brief explanation of why this document type was identified"
+}}
+```
+
+### Field Specifications:
+
+1. **document_type**: Use lowercase with hyphens (e.g., "invoice", "purchase-order", "medical-record", "bank-statement")
+   - Be specific: Use "tax-invoice" instead of just "invoice" if tax details are prominent
+   - Use compound names when necessary: "proof-of-delivery", "certificate-of-origin"
+
+2. **confidence**:
+   - "high": Multiple clear indicators, structure matches perfectly
+   - "medium": Good indicators but some ambiguity or missing elements
+   - "low": Limited indicators, could be multiple types
+
+3. **primary_indicators**: List 3-5 specific elements from the document that led to identification
+   - Example: "Invoice number INV-2024-001", "Payment terms: Net 30", "Itemized line items with tax"
+
+4. **document_category**: High-level category
+   - Examples: "financial", "legal", "healthcare", "shipping", "employment", "tax"
+
+5. **alternative_types**: If confidence is not high, list 1-2 possible alternatives
+   - Leave empty array if confidence is high
+
+6. **reasoning**: Brief 1-2 sentence explanation
+   - Focus on why this type fits best
+   - Mention key distinguishing features
+
+## Important Instructions
+
+1. **Extract, Don't Assume**: Base your analysis solely on the provided content
+2. **Be Specific**: Choose the most specific document type (e.g., "proforma-invoice" vs "invoice")
+3. **Consider Context**: Look at terminology, structure, and purpose together
+4. **Regional Variations**: Consider that document types may have regional names (e.g., "invoice" vs "bill")
+5. **JSON Only**: Return ONLY the JSON object, no additional text, explanations, or markdown formatting
+6. **Handle Uncertainty**: If truly uncertain, use "medium" or "low" confidence and provide alternatives
+7. **Standardize Names**: Use common, standardized document type names in lowercase-with-hyphens format
+
+## Example Response
+
+```json
+{{
+    "document_type": "purchase-order",
+    "confidence": "high",
+    "primary_indicators": [
+        "PO Number: PO-2024-001234",
+        "Vendor details with 'SHIP TO' and 'BILL TO' sections",
+        "Line items with quantities and unit prices",
+        "Terms and conditions section",
+        "Signature block for approval"
+    ],
+    "document_category": "financial",
+    "alternative_types": [],
+    "reasoning": "Document contains all standard purchase order elements including PO number, vendor/buyer information, itemized products with quantities and prices, and approval signatures. The presence of delivery instructions and payment terms confirms this is a purchase order rather than an invoice or quote."
+}}
+```
+
+Now analyze the document content provided and respond with your identification in the exact JSON format specified above."""
diff --git a/prompt-service/src/unstract/prompt_service/services/vibe_extractor/generator.py b/prompt-service/src/unstract/prompt_service/services/vibe_extractor/generator.py
new file mode 100644
index 0000000000..a62f6dc06b
--- /dev/null
+++ b/prompt-service/src/unstract/prompt_service/services/vibe_extractor/generator.py
@@ -0,0 +1,458 @@
+"""Vibe Extractor Generator.
+
+This module generates document extraction metadata, fields, and prompts
+using LLM-based agents, similar to the new_document_type_generator.py reference.
+"""
+
+import logging
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+from .constants import VibeExtractorBootstrapPrompts
+from .llm_helper import generate_with_llm, get_llm_client
+
+logger = logging.getLogger(__name__)
+
+
+class VibeExtractorGenerator:
+    """Generator for document extraction components using LLM."""
+
+    def __init__(self, llm_config: dict[str, Any]):
+        """Initialize the generator with LLM configuration.
+
+        Args:
+            llm_config: Configuration dictionary for LLM client
+                - adapter_id: Provider (openai, anthropic, bedrock, etc.)
+                - model: Model name
+                - api_key: API key
+                - temperature: Temperature (default: 0.7)
+                - max_tokens: Max tokens (default: 4096)
+        """
+        self.llm_config = llm_config
+        self.llm_client = None
+
+    def _ensure_llm_client(self):
+        """Ensure LLM client is initialized."""
+        if self.llm_client is None:
+            self.llm_client = get_llm_client(self.llm_config)
+
+    def _clean_llm_response(self, response_text: str) -> str:
+        """Remove code block markers from LLM response.
+
+        Args:
+            response_text: Raw response from LLM
+
+        Returns:
+            Cleaned response text
+        """
+        response_text = response_text.strip()
+
+        # Remove markdown code blocks
+        if response_text.startswith("```markdown"):
+            response_text = response_text[11:]
+        elif response_text.startswith("```yaml"):
+            response_text = response_text[7:]
+        elif response_text.startswith("```"):
+            response_text = response_text[3:]
+
+        if response_text.endswith("```"):
+            response_text = response_text[:-3]
+
+        return response_text.strip()
+
+    async def generate_metadata(
+        self, doc_type: str, reference_template: str
+    ) -> dict[str, Any]:
+        """Generate metadata for a document type using LLM.
+
+        Args:
+            doc_type: Document type name (e.g., "invoice", "receipt")
+            reference_template: Reference metadata.yaml template content
+
+        Returns:
+            Dictionary containing generated metadata
+
+        Raises:
+            Exception: If metadata generation fails
+        """
+        self._ensure_llm_client()
+        logger.info(f"Generating metadata for '{doc_type}' using LLM...")
+
+        prompt = VibeExtractorBootstrapPrompts.DOCUMENT_METADATA.format(
+            doc_type=doc_type, reference_template=reference_template
+        )
+
+        try:
+            response = await generate_with_llm(self.llm_client, prompt, max_tokens=1000)
+
+            # Clean and parse YAML response
+            yaml_content = self._clean_llm_response(response)
+            metadata = yaml.safe_load(yaml_content)
+
+            logger.info(f"Successfully generated metadata for '{doc_type}'")
+            return metadata
+
+        except Exception as e:
+            error_msg = f"Error generating metadata: {str(e)}"
+            logger.error(error_msg)
+            raise Exception(error_msg) from e
+
+    async def generate_extraction_fields(
+        self, doc_type: str, metadata: dict[str, Any]
+    ) -> str:
+        """Generate extraction.yaml structure using LLM.
+
+        Args:
+            doc_type: Document type name
+            metadata: Generated metadata dictionary
+
+        Returns:
+            YAML string defining extraction fields
+
+        Raises:
+            Exception: If extraction fields generation fails
+        """
+        self._ensure_llm_client()
+        logger.info(f"Generating extraction fields for '{doc_type}' using LLM...")
+
+        metadata_description = metadata.get("description", "")
+        prompt = VibeExtractorBootstrapPrompts.DOCUMENT_EXTRACTION_FIELDS.format(
+            doc_type=doc_type, metadata_description=metadata_description
+        )
+
+        try:
+            response = await generate_with_llm(self.llm_client, prompt, max_tokens=2000)
+
+            # Clean YAML response
+            yaml_content = self._clean_llm_response(response)
+
+            logger.info(f"Successfully generated extraction fields for '{doc_type}'")
+            return yaml_content
+
+        except Exception as e:
+            error_msg = f"Error generating extraction fields: {str(e)}"
+            logger.error(error_msg)
+            raise Exception(error_msg) from e
+
+    async def generate_page_extraction_system_prompt(
+        self, doc_type: str, metadata: dict[str, Any]
+    ) -> str:
+        """Generate page extraction system prompt using LLM.
+
+        Args:
+            doc_type: Document type name
+            metadata: Generated metadata dictionary
+
+        Returns:
+            System prompt text for page extraction
+
+        Raises:
+            Exception: If prompt generation fails
+        """
+        self._ensure_llm_client()
+        logger.info(
+            f"Generating page extraction system prompt for '{doc_type}' using LLM..."
+        )
+
+        metadata_description = metadata.get("description", "")
+        prompt = VibeExtractorBootstrapPrompts.PAGE_EXTRACTION_SYSTEM.format(
+            doc_type=doc_type, metadata_description=metadata_description
+        )
+
+        try:
+            response = await generate_with_llm(self.llm_client, prompt, max_tokens=1500)
+
+            cleaned_response = self._clean_llm_response(response)
+            logger.info(
+                f"Successfully generated page extraction system prompt for '{doc_type}'"
+            )
+            return cleaned_response
+
+        except Exception as e:
+            error_msg = f"Error generating page extraction system prompt: {str(e)}"
+            logger.error(error_msg)
+            raise Exception(error_msg) from e
+
+    async def generate_page_extraction_user_prompt(
+        self, doc_type: str, metadata: dict[str, Any]
+    ) -> str:
+        """Generate page extraction user prompt using LLM.
+
+        Args:
+            doc_type: Document type name
+            metadata: Generated metadata dictionary
+
+        Returns:
+            User prompt text for page extraction
+
+        Raises:
+            Exception: If prompt generation fails
+        """
+        self._ensure_llm_client()
+        logger.info(
+            f"Generating page extraction user prompt for '{doc_type}' using LLM..."
+        )
+
+        metadata_description = metadata.get("description", "")
+        prompt = VibeExtractorBootstrapPrompts.PAGE_EXTRACTION_USER.format(
+            doc_type=doc_type, metadata_description=metadata_description
+        )
+
+        try:
+            response = await generate_with_llm(self.llm_client, prompt, max_tokens=500)
+
+            cleaned_response = self._clean_llm_response(response)
+            logger.info(
+                f"Successfully generated page extraction user prompt for '{doc_type}'"
+            )
+            return cleaned_response
+
+        except Exception as e:
+            error_msg = f"Error generating page extraction user prompt: {str(e)}"
+            logger.error(error_msg)
+            raise Exception(error_msg) from e
+
+    async def generate_scalar_extraction_system_prompt(
+        self, doc_type: str, metadata: dict[str, Any], extraction_yaml: str
+    ) -> str:
+        """Generate scalar extraction system prompt using LLM.
+
+        Args:
+            doc_type: Document type name
+            metadata: Generated metadata dictionary
+            extraction_yaml: Generated extraction YAML content
+
+        Returns:
+            System prompt text for scalar extraction
+
+        Raises:
+            Exception: If prompt generation fails
+        """
+        self._ensure_llm_client()
+        logger.info(
+            f"Generating scalar extraction system prompt for '{doc_type}' using LLM..."
+        )
+
+        # Parse extraction YAML to get scalar fields
+        try:
+            extraction_data = yaml.safe_load(extraction_yaml)
+            scalar_fields = []
+            for key, value in extraction_data.items():
+                if not isinstance(value, list):
+                    scalar_fields.append(key)
+        except Exception:
+            scalar_fields = []
+
+        metadata_description = metadata.get("description", "")
+        scalar_fields_str = ", ".join(scalar_fields[:5])
+
+        prompt = VibeExtractorBootstrapPrompts.SCALARS_EXTRACTION_SYSTEM.format(
+            doc_type=doc_type,
+            metadata_description=metadata_description,
+            scalar_fields=scalar_fields_str,
+        )
+
+        try:
+            response = await generate_with_llm(self.llm_client, prompt, max_tokens=1500)
+
+            cleaned_response = self._clean_llm_response(response)
+            logger.info(
+                f"Successfully generated scalar extraction system prompt for '{doc_type}'"
+            )
+            return cleaned_response
+
+        except Exception as e:
+            error_msg = f"Error generating scalar extraction system prompt: {str(e)}"
+            logger.error(error_msg)
+            raise Exception(error_msg) from e
+
+    async def generate_scalar_extraction_user_prompt(
+        self, doc_type: str, metadata: dict[str, Any]
+    ) -> str:
+        """Generate scalar extraction user prompt using LLM.
+
+        Args:
+            doc_type: Document type name
+            metadata: Generated metadata dictionary
+
+        Returns:
+            User prompt text for scalar extraction
+
+        Raises:
+            Exception: If prompt generation fails
+        """
+        self._ensure_llm_client()
+        logger.info(
+            f"Generating scalar extraction user prompt for '{doc_type}' using LLM..."
+        )
+
+        prompt = VibeExtractorBootstrapPrompts.SCALARS_EXTRACTION_USER.format(
+            doc_type=doc_type
+        )
+
+        try:
+            response = await generate_with_llm(self.llm_client, prompt, max_tokens=500)
+
+            cleaned_response = self._clean_llm_response(response)
+            logger.info(
+                f"Successfully generated scalar extraction user prompt for '{doc_type}'"
+            )
+            return cleaned_response
+
+        except Exception as e:
+            error_msg = f"Error generating scalar extraction user prompt: {str(e)}"
+            logger.error(error_msg)
+            raise Exception(error_msg) from e
+
+    async def generate_table_extraction_system_prompt(
+        self, doc_type: str, metadata: dict[str, Any], extraction_yaml: str
+    ) -> str:
+        """Generate table extraction system prompt using LLM.
+
+        Args:
+            doc_type: Document type name
+            metadata: Generated metadata dictionary
+            extraction_yaml: Generated extraction YAML content
+
+        Returns:
+            System prompt text for table extraction
+
+        Raises:
+            Exception: If prompt generation fails
+        """
+        self._ensure_llm_client()
+        logger.info(
+            f"Generating table extraction system prompt for '{doc_type}' using LLM..."
+        )
+
+        metadata_description = metadata.get("description", "")
+        prompt = VibeExtractorBootstrapPrompts.TABLES_EXTRACTION_SYSTEM.format(
+            doc_type=doc_type, metadata_description=metadata_description
+        )
+
+        try:
+            response = await generate_with_llm(self.llm_client, prompt, max_tokens=2000)
+
+            cleaned_response = self._clean_llm_response(response)
+            logger.info(
+                f"Successfully generated table extraction system prompt for '{doc_type}'"
+            )
+            return cleaned_response
+
+        except Exception as e:
+            error_msg = f"Error generating table extraction system prompt: {str(e)}"
+            logger.error(error_msg)
+            raise Exception(error_msg) from e
+
+    async def generate_table_extraction_user_prompt(
+        self, doc_type: str, metadata: dict[str, Any]
+    ) -> str:
+        """Generate table extraction user prompt using LLM.
+
+        Args:
+            doc_type: Document type name
+            metadata: Generated metadata dictionary
+
+        Returns:
+            User prompt text for table extraction
+
+        Raises:
+            Exception: If prompt generation fails
+        """
+        self._ensure_llm_client()
+        logger.info(
+            f"Generating table extraction user prompt for '{doc_type}' using LLM..."
+        )
+
+        prompt = VibeExtractorBootstrapPrompts.TABLES_EXTRACTION_USER.format(
+            doc_type=doc_type
+        )
+
+        try:
+            response = await generate_with_llm(self.llm_client, prompt, max_tokens=500)
+
+            cleaned_response = self._clean_llm_response(response)
+            logger.info(
+                f"Successfully generated table extraction user prompt for '{doc_type}'"
+            )
+            return cleaned_response
+
+        except Exception as e:
+            error_msg = f"Error generating table extraction user prompt: {str(e)}"
+            logger.error(error_msg)
+            raise Exception(error_msg) from e
+
+    def save_metadata_yaml(self, output_path: Path, metadata: dict[str, Any]) -> Path:
+        """Save metadata as YAML file.
+
+        Args:
+            output_path: Output directory path
+            metadata: Metadata dictionary to save
+
+        Returns:
+            Path to saved metadata.yaml file
+        """
+        # Add default values if not present
+        if "version" not in metadata:
+            metadata["version"] = "1.0.0"
+        if "author" not in metadata:
+            metadata["author"] = "Zipstack Inc"
+        if "release_date" not in metadata:
+            metadata["release_date"] = "2025-07-01"
+        if "price_multiplier" not in metadata:
+            metadata["price_multiplier"] = 1.0
+        if "llm_model" not in metadata:
+            metadata["llm_model"] = "claude-sonnet-1-7"
+        if "extraction_features" not in metadata:
+            metadata["extraction_features"] = {
+                "locate_pages": True,
+                "rolling_window": False,
+                "challenge": False,
+            }
+
+        metadata_file = output_path / "metadata.yaml"
+        with open(metadata_file, "w") as f:
+            yaml.dump(metadata, f, default_flow_style=False, sort_keys=False)
+
+        logger.info(f"Saved metadata to {metadata_file}")
+        return metadata_file
+
+    def save_extraction_yaml(self, output_path: Path, extraction_content: str) -> Path:
+        """Save extraction fields as YAML file.
+
+        Args:
+            output_path: Output directory path
+            extraction_content: Extraction YAML content string
+
+        Returns:
+            Path to saved extraction.yaml file
+        """
+        extraction_file = output_path / "extraction.yaml"
+        with open(extraction_file, "w") as f:
+            f.write("---\n")
+            f.write(extraction_content)
+            if not extraction_content.endswith("\n"):
+                f.write("\n")
+
+        logger.info(f"Saved extraction fields to {extraction_file}")
+        return extraction_file
+
+    def save_prompt_file(self, output_path: Path, filename: str, content: str) -> Path:
+        """Save prompt content to markdown file.
+
+        Args:
+            output_path: Output directory path (should include prompts subdir)
+            filename: Name of the markdown file
+            content: Prompt content
+
+        Returns:
+            Path to saved prompt file
+        """
+        prompt_file = output_path / filename
+        with open(prompt_file, "w") as f:
+            f.write(content)
+
+        logger.info(f"Saved prompt to {prompt_file}")
+        return prompt_file
diff --git a/prompt-service/src/unstract/prompt_service/services/vibe_extractor/llm_helper.py b/prompt-service/src/unstract/prompt_service/services/vibe_extractor/llm_helper.py
new file mode 100644
index 0000000000..6602a3e020
--- /dev/null
+++ b/prompt-service/src/unstract/prompt_service/services/vibe_extractor/llm_helper.py
@@ -0,0 +1,461 @@
+"""LLM Helper for Vibe Extractor.
+
+This module provides LLM client initialization and communication using autogen.
+Uses autogen-ext clients where available, and creates compatible adapters for others.
+"""
+
+import logging
+from typing import Any, Dict, List, Optional, Sequence
+
+from autogen_core.models import (
+    ChatCompletionClient,
+    LLMMessage,
+    SystemMessage,
+    UserMessage,
+)
+from autogen_ext.models.openai import (
+    AzureOpenAIChatCompletionClient,
+    OpenAIChatCompletionClient,
+)
+
+# Import SDKs (available through llama-index dependencies)
+try:
+    import anthropic
+except ImportError:
+    anthropic = None
+
+try:
+    import boto3
+except ImportError:
+    boto3 = None
+
+logger = logging.getLogger(__name__)
+
+
+# ============================================================================
+# TEMPORARY TESTING METHOD - REMOVE AFTER TESTING
+# ============================================================================
+def get_test_llm_config() -> Dict[str, Any]:
+    """Get hardcoded LLM config for testing purposes.
+
+    TODO: REMOVE THIS AFTER TESTING - Use proper adapter configuration instead.
+
+    This bypasses the platform settings and adapter infrastructure for quick testing.
+    To use, set environment variable ANTHROPIC_API_KEY.
+
+    Returns:
+        Dict with hardcoded Anthropic configuration
+    """
+    import os
+
+    api_key = os.getenv("ANTHROPIC_API_KEY")
+    if not api_key:
+        raise ValueError(
+            "ANTHROPIC_API_KEY environment variable required for testing. "
+            "Set it in your .env file or environment."
+        )
+
+    return {
+        "adapter_id": "anthropic",
+        "model": "claude-3-5-sonnet-20241022",
+        "api_key": api_key,
+        "temperature": 0.1,
+        "max_tokens": 4096,
+    }
+# ============================================================================
+# END TEMPORARY TESTING METHOD
+# ============================================================================
+
+
+class AnthropicAdapter(ChatCompletionClient):
+    """Adapter to make Anthropic SDK compatible with autogen's ChatCompletionClient interface."""
+
+    def __init__(self, api_key: str, model: str, temperature: float = 0.1,
+                 max_tokens: int = 4096, **kwargs):
+        if anthropic is None:
+            raise ImportError("anthropic package is required")
+
+        self._client = anthropic.Anthropic(api_key=api_key)
+        self._model = model
+        self._temperature = temperature
+        self._max_tokens = max_tokens
+
+    async def create(
+        self,
+        messages: Sequence[LLMMessage],
+        *,
+        max_tokens: Optional[int] = None,
+        temperature: Optional[float] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Create a chat completion using Anthropic API."""
+        # Convert autogen messages to Anthropic format
+        anthropic_messages = []
+        for msg in messages:
+            if isinstance(msg, UserMessage):
+                anthropic_messages.append({"role": "user", "content": msg.content})
+            elif isinstance(msg, SystemMessage):
+                # Anthropic handles system messages differently
+                # We'll prepend it to the first user message
+                pass
+
+        try:
+            response = self._client.messages.create(
+                model=self._model,
+                max_tokens=max_tokens or self._max_tokens,
+                temperature=temperature or self._temperature,
+                messages=anthropic_messages,
+            )
+
+            # Return in a format compatible with autogen
+            class CompletionResult:
+                def __init__(self, text):
+                    self.content = text
+                    self.choices = [type('obj', (object,), {'message': type('obj', (object,), {'content': text})()})]
+
+            return CompletionResult(response.content[0].text)
+
+        except Exception as e:
+            logger.error(f"Anthropic API error: {str(e)}")
+            raise
+
+
+class BedrockAdapter(ChatCompletionClient):
+    """Adapter to make AWS Bedrock compatible with autogen's ChatCompletionClient interface."""
+
+    def __init__(self, aws_access_key_id: str, aws_secret_access_key: str,
+                 region_name: str, model: str, temperature: float = 0.1,
+                 max_tokens: int = 4096, **kwargs):
+        if boto3 is None:
+            raise ImportError("boto3 is required for Bedrock")
+u
+        session = boto3.Session(
+            aws_access_key_id=aws_access_key_id,
+            aws_secret_access_key=aws_secret_access_key,
+            region_name=region_name,
+        )
+
+        # Validate credentials
+        try:
+            session.get_credentials().get_frozen_credentials()
+        except Exception as e:
+            raise RuntimeError("Invalid AWS credentials") from e
+
+        self._client = session.client('bedrock-runtime', region_name=region_name)
+        self._model = model
+        self._temperature = temperature
+        self._max_tokens = max_tokens
+
+    async def create(
+        self,
+        messages: Sequence[LLMMessage],
+        *,
+        max_tokens: Optional[int] = None,
+        temperature: Optional[float] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """Create a chat completion using Bedrock API."""
+        import json
+
+        # Convert autogen messages to Bedrock format
+        bedrock_messages = []
+        for msg in messages:
+            if isinstance(msg, UserMessage):
+                bedrock_messages.append({"role": "user", "content": msg.content})
+
+        try:
+            body = json.dumps({
+                "anthropic_version": "bedrock-2023-05-31",
+                "max_tokens": max_tokens or self._max_tokens,
+                "temperature": temperature or self._temperature,
+                "messages": bedrock_messages,
+            })
+
+            response = self._client.invoke_model(
+                modelId=self._model,
+                body=body
+            )
+
+            response_body = json.loads(response['body'].read())
+            text = response_body['content'][0]['text']
+
+            # Return in a format compatible with autogen
+            class CompletionResult:
+                def __init__(self, text):
+                    self.content = text
+                    self.choices = [type('obj', (object,), {'message': type('obj', (object,), {'content': text})()})]
+
+            return CompletionResult(text)
+
+        except Exception as e:
+            logger.error(f"Bedrock API error: {str(e)}")
+            raise
+
+
+def get_llm_client(llm_config: Dict[str, Any]) -> ChatCompletionClient:
+    """Initialize and return an LLM client based on configuration.
+
+    Args:
+        llm_config: Configuration dictionary containing:
+            - adapter_id: Provider identifier (openai, azureopenai, anthropic, bedrock)
+            - model: Model name
+            - api_key: API key for the provider
+            - temperature: Temperature for generation (default: 0.1)
+            - max_tokens: Maximum tokens to generate (default: 4096)
+            - Other provider-specific parameters
+
+    Returns:
+        ChatCompletionClient instance
+
+    Raises:
+        Exception: If client initialization fails
+    """
+    try:
+        adapter_id = llm_config.get("adapter_id")
+
+        if adapter_id == "azureopenai":
+            return AzureOpenAIChatCompletionClient(
+                model=llm_config.get("model"),
+                azure_endpoint=llm_config.get("api_base"),
+                temperature=llm_config.get("temperature", 0.1),
+                max_tokens=llm_config.get("max_tokens", 4096),
+                api_version=llm_config.get("api_version"),
+                api_key=llm_config.get("api_key"),
+                azure_deployment=llm_config.get("deployment"),
+                timeout=llm_config.get("timeout", 900),
+            )
+
+        elif adapter_id == "openai":
+            return OpenAIChatCompletionClient(
+                model=llm_config.get("model"),
+                api_key=llm_config.get("api_key"),
+                temperature=llm_config.get("temperature", 0.1),
+                max_tokens=llm_config.get("max_tokens", 4096),
+                request_timeout=llm_config.get("request_timeout", 60),
+                base_url=llm_config.get("api_base"),
+                max_retries=llm_config.get("max_retries", 3),
+                timeout=llm_config.get("timeout", 900),
+            )
+
+        elif adapter_id == "anthropic":
+            return AnthropicAdapter(
+                api_key=llm_config.get("api_key"),
+                model=llm_config.get("model"),
+                temperature=llm_config.get("temperature", 0.1),
+                max_tokens=llm_config.get("max_tokens", 4096),
+            )
+
+        elif adapter_id == "bedrock":
+            return BedrockAdapter(
+                aws_access_key_id=llm_config.get("aws_access_key_id"),
+                aws_secret_access_key=llm_config.get("aws_secret_access_key"),
+                region_name=llm_config.get("region_name"),
+                model=llm_config.get("model"),
+                temperature=llm_config.get("temperature", 0.1),
+                max_tokens=llm_config.get("max_tokens", 4096),
+            )
+
+        else:
+            raise ValueError(
+                f"Unknown adapter_id: {adapter_id}. "
+                f"Supported: openai, azureopenai, anthropic, bedrock"
+            )
+
+    except Exception as e:
+        error_msg = f"Failed to initialize LLM client: {str(e)}"
+        logger.error(error_msg)
+        raise Exception(error_msg) from e
+
+
+async def generate_with_llm(
+    llm_client: ChatCompletionClient, prompt: str, max_tokens: int = 2000
+) -> str:
+    """Generate a response using autogen's completion interface.
+
+    Args:
+        llm_client: ChatCompletionClient instance (from get_llm_client)
+        prompt: The prompt to send to the LLM
+        max_tokens: Maximum tokens to generate
+
+    Returns:
+        Generated text response
+
+    Raises:
+        Exception: If generation fails
+    """
+    try:
+        # Create messages in autogen format
+        messages = [
+            SystemMessage(content="You are a helpful assistant that generates document extraction metadata and prompts."),
+            UserMessage(content=prompt, source="user"),
+        ]
+
+        # Use autogen's completion API
+        response = await llm_client.create(
+            messages=messages,
+            max_tokens=max_tokens,
+        )
+
+        # Extract text from response
+        if hasattr(response, 'content'):
+            return response.content.strip()
+        elif hasattr(response, 'choices') and len(response.choices) > 0:
+            return response.choices[0].message.content.strip()
+        else:
+            raise ValueError("Unexpected response format from LLM")
+
+    except Exception as e:
+        error_msg = f"Failed to generate with LLM: {str(e)}"
+        logger.error(error_msg)
+        raise Exception(error_msg) from e
+
+
+async def guess_document_type_with_llm(
+    file_content: str,
+    llm_config: Dict[str, Any],
+) -> Dict[str, Any]:
+    """Guess document type from file content using LLM.
+
+    Args:
+        file_content: Extracted text content from the document
+        llm_config: LLM configuration dictionary
+
+    Returns:
+        Dictionary containing:
+            - status: "success" or "error"
+            - document_type: Guessed document type (if success)
+            - confidence: Confidence description (if applicable)
+            - primary_indicators: List of indicators found
+            - document_category: Document category
+            - alternative_types: List of alternative types
+            - reasoning: Reasoning for the identification
+            - error: Error message (if error)
+    """
+    try:
+        from .constants import VibeExtractorBootstrapPrompts
+        import json
+        import re
+        from json_repair import repair_json
+
+        # Truncate content if too long (keep first 4000 characters)
+        content_sample = (
+            file_content[:4000] if len(file_content) > 4000 else file_content
+        )
+
+        # Create the full prompt using the constant
+        full_prompt = f"""{VibeExtractorBootstrapPrompts.DOCUMENT_TYPE_IDENTIFICATION}
+
+## Document Content to Analyze
+
+```
+{content_sample}
+```
+
+Analyze the document content above and respond with your identification in the exact JSON format specified."""
+
+        # Get LLM client
+        llm_client = get_llm_client(llm_config)
+
+        # Generate response with higher token limit for detailed analysis
+        response_text = await generate_with_llm(
+            llm_client=llm_client,
+            prompt=full_prompt,
+            max_tokens=1000,
+        )
+
+        # Try to extract JSON from response (in case LLM added markdown)
+        json_match = re.search(
+            r'```json\s*(\{.*?\})\s*```', response_text, re.DOTALL
+        )
+        if json_match:
+            json_str = json_match.group(1)
+        else:
+            # Try to find JSON object directly
+            json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
+            if json_match:
+                json_str = json_match.group(0)
+            else:
+                # No JSON found in response
+                logger.error(
+                    f"No JSON object found in LLM response: {response_text}"
+                )
+                return {
+                    "status": "error",
+                    "error": "LLM did not return a valid JSON response. "
+                    "Please try again or check the LLM configuration.",
+                    "raw_response": response_text[:500],
+                }
+
+        # Try to parse JSON
+        response_json = None
+        try:
+            response_json = json.loads(json_str)
+        except json.JSONDecodeError as json_error:
+            # Try to repair the JSON
+            logger.warning(
+                f"Initial JSON parsing failed: {json_error}. "
+                f"Attempting to repair JSON..."
+            )
+            try:
+                repaired_json_str = repair_json(json_str)
+                response_json = json.loads(repaired_json_str)
+                logger.info("Successfully repaired and parsed JSON response")
+            except Exception as repair_error:
+                # JSON repair also failed
+                logger.error(
+                    f"Failed to repair JSON. "
+                    f"Original error: {json_error}. "
+                    f"Repair error: {repair_error}. "
+                    f"Attempted to parse: {json_str[:200]}"
+                )
+                return {
+                    "status": "error",
+                    "error": (
+                        f"Failed to parse LLM response as JSON. "
+                        f"Original error: {str(json_error)}. "
+                        f"JSON repair also failed: {str(repair_error)}"
+                    ),
+                    "raw_response": response_text[:500],
+                    "attempted_json": json_str[:200],
+                }
+
+        # Validate required fields
+        required_fields = ["document_type", "confidence", "reasoning"]
+        missing_fields = [
+            field for field in required_fields if field not in response_json
+        ]
+
+        if missing_fields:
+            logger.warning(
+                f"LLM response missing required fields: {missing_fields}. "
+                f"Response: {response_json}"
+            )
+            return {
+                "status": "error",
+                "error": (
+                    f"LLM response missing required fields: "
+                    f"{', '.join(missing_fields)}"
+                ),
+                "partial_response": response_json,
+            }
+
+        # Successfully parsed and validated
+        return {
+            "status": "success",
+            "document_type": response_json.get("document_type", "unknown"),
+            "confidence": response_json.get("confidence", "unknown"),
+            "primary_indicators": response_json.get("primary_indicators", []),
+            "document_category": response_json.get(
+                "document_category", "unknown"
+            ),
+            "alternative_types": response_json.get("alternative_types", []),
+            "reasoning": response_json.get("reasoning", ""),
+        }
+
+    except Exception as e:
+        error_msg = f"Failed to guess document type with LLM: {str(e)}"
+        logger.error(error_msg, exc_info=True)
+        return {
+            "status": "error",
+            "error": error_msg,
+        }
diff --git a/prompt-service/src/unstract/prompt_service/services/vibe_extractor/service.py b/prompt-service/src/unstract/prompt_service/services/vibe_extractor/service.py
new file mode 100644
index 0000000000..9a823c74c1
--- /dev/null
+++ b/prompt-service/src/unstract/prompt_service/services/vibe_extractor/service.py
@@ -0,0 +1,238 @@
+"""Vibe Extractor Service.
+
+This module provides the main service interface for generating
+document extraction components. It orchestrates the complete
+generation flow.
+"""
+
+import logging
+from pathlib import Path
+from typing import Any
+
+from .generator import VibeExtractorGenerator
+
+logger = logging.getLogger(__name__)
+
+
+class VibeExtractorService:
+    """Service for generating document extraction components."""
+
+    def __init__(self, llm_config: dict[str, Any], output_dir: str):
+        """Initialize the service.
+
+        Args:
+            llm_config: LLM configuration dictionary
+            output_dir: Base output directory for generated files
+        """
+        self.generator = VibeExtractorGenerator(llm_config)
+        self.output_dir = Path(output_dir)
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+
+    async def generate_all(
+        self,
+        doc_type: str,
+        reference_template: str,
+        progress_callback: callable | None = None,
+    ) -> dict[str, Any]:
+        """Generate all components for a document type.
+
+        Args:
+            doc_type: Document type name (e.g., "invoice", "receipt")
+            reference_template: Reference metadata.yaml template content
+            progress_callback: Optional callback to report progress
+
+        Returns:
+            Dictionary containing:
+                - status: "success" or "error"
+                - output_path: Path to generated files
+                - files: Dictionary of generated file paths
+                - error: Error message if status is "error"
+        """
+        try:
+            logger.info(f"Starting generation for document type: {doc_type}")
+
+            # Create output directory for this document type
+            doc_output_dir = self.output_dir / doc_type
+            doc_output_dir.mkdir(parents=True, exist_ok=True)
+
+            # Create prompts subdirectory
+            prompts_dir = doc_output_dir / "prompts"
+            prompts_dir.mkdir(parents=True, exist_ok=True)
+
+            result = {
+                "status": "success",
+                "output_path": str(doc_output_dir),
+                "files": {},
+            }
+
+            # Step 1: Generate metadata
+            if progress_callback:
+                progress_callback("generating_metadata", "in_progress")
+
+            logger.info("Step 1/6: Generating metadata...")
+            metadata = await self.generator.generate_metadata(
+                doc_type, reference_template
+            )
+            metadata_file = self.generator.save_metadata_yaml(doc_output_dir, metadata)
+            result["files"]["metadata"] = str(metadata_file)
+
+            if progress_callback:
+                progress_callback("generating_metadata", "completed")
+
+            # Step 2: Generate extraction fields
+            if progress_callback:
+                progress_callback("generating_extraction_fields", "in_progress")
+
+            logger.info("Step 2/6: Generating extraction fields...")
+            extraction_yaml = await self.generator.generate_extraction_fields(
+                doc_type, metadata
+            )
+            extraction_file = self.generator.save_extraction_yaml(
+                doc_output_dir, extraction_yaml
+            )
+            result["files"]["extraction"] = str(extraction_file)
+
+            if progress_callback:
+                progress_callback("generating_extraction_fields", "completed")
+
+            # Step 3: Generate page extraction prompts
+            if progress_callback:
+                progress_callback("generating_page_prompts", "in_progress")
+
+            logger.info("Step 3/6: Generating page extraction prompts...")
+            page_system_prompt = (
+                await self.generator.generate_page_extraction_system_prompt(
+                    doc_type, metadata
+                )
+            )
+            page_system_file = self.generator.save_prompt_file(
+                prompts_dir, "page-extraction-system.md", page_system_prompt
+            )
+            result["files"]["page_extraction_system"] = str(page_system_file)
+
+            page_user_prompt = await self.generator.generate_page_extraction_user_prompt(
+                doc_type, metadata
+            )
+            page_user_file = self.generator.save_prompt_file(
+                prompts_dir, "page-extraction-user.md", page_user_prompt
+            )
+            result["files"]["page_extraction_user"] = str(page_user_file)
+
+            if progress_callback:
+                progress_callback("generating_page_prompts", "completed")
+
+            # Step 4: Generate scalar extraction prompts
+            if progress_callback:
+                progress_callback("generating_scalar_prompts", "in_progress")
+
+            logger.info("Step 4/6: Generating scalar extraction prompts...")
+            scalar_system_prompt = (
+                await self.generator.generate_scalar_extraction_system_prompt(
+                    doc_type, metadata, extraction_yaml
+                )
+            )
+            scalar_system_file = self.generator.save_prompt_file(
+                prompts_dir, "extraction-scalars-system.md", scalar_system_prompt
+            )
+            result["files"]["scalars_extraction_system"] = str(scalar_system_file)
+
+            scalar_user_prompt = (
+                await self.generator.generate_scalar_extraction_user_prompt(
+                    doc_type, metadata
+                )
+            )
+            scalar_user_file = self.generator.save_prompt_file(
+                prompts_dir, "extraction-scalars-user.md", scalar_user_prompt
+            )
+            result["files"]["scalars_extraction_user"] = str(scalar_user_file)
+
+            if progress_callback:
+                progress_callback("generating_scalar_prompts", "completed")
+
+            # Step 5: Generate table extraction prompts
+            if progress_callback:
+                progress_callback("generating_table_prompts", "in_progress")
+
+            logger.info("Step 5/6: Generating table extraction prompts...")
+            table_system_prompt = (
+                await self.generator.generate_table_extraction_system_prompt(
+                    doc_type, metadata, extraction_yaml
+                )
+            )
+            table_system_file = self.generator.save_prompt_file(
+                prompts_dir, "extraction-table-system.md", table_system_prompt
+            )
+            result["files"]["tables_extraction_system"] = str(table_system_file)
+
+            table_user_prompt = (
+                await self.generator.generate_table_extraction_user_prompt(
+                    doc_type, metadata
+                )
+            )
+            table_user_file = self.generator.save_prompt_file(
+                prompts_dir, "extraction-table-user.md", table_user_prompt
+            )
+            result["files"]["tables_extraction_user"] = str(table_user_file)
+
+            if progress_callback:
+                progress_callback("generating_table_prompts", "completed")
+
+            logger.info(
+                f"Successfully generated all components for '{doc_type}' at {doc_output_dir}"
+            )
+            return result
+
+        except Exception as e:
+            error_msg = f"Error during generation: {str(e)}"
+            logger.error(error_msg, exc_info=True)
+
+            if progress_callback:
+                progress_callback("error", "failed", error_msg)
+
+            return {"status": "error", "error": error_msg}
+
+    async def generate_metadata_only(
+        self, doc_type: str, reference_template: str
+    ) -> dict[str, Any]:
+        """Generate only metadata for a document type.
+
+        Args:
+            doc_type: Document type name
+            reference_template: Reference metadata.yaml template
+
+        Returns:
+            Dictionary containing generated metadata or error
+        """
+        try:
+            logger.info(f"Generating metadata for: {doc_type}")
+            metadata = await self.generator.generate_metadata(
+                doc_type, reference_template
+            )
+            return {"status": "success", "metadata": metadata}
+        except Exception as e:
+            error_msg = f"Error generating metadata: {str(e)}"
+            logger.error(error_msg)
+            return {"status": "error", "error": error_msg}
+
+    async def generate_extraction_fields_only(
+        self, doc_type: str, metadata: dict[str, Any]
+    ) -> dict[str, Any]:
+        """Generate only extraction fields for a document type.
+
+        Args:
+            doc_type: Document type name
+            metadata: Metadata dictionary
+
+        Returns:
+            Dictionary containing extraction YAML or error
+        """
+        try:
+            logger.info(f"Generating extraction fields for: {doc_type}")
+            extraction_yaml = await self.generator.generate_extraction_fields(
+                doc_type, metadata
+            )
+            return {"status": "success", "extraction_yaml": extraction_yaml}
+        except Exception as e:
+            error_msg = f"Error generating extraction fields: {str(e)}"
+            logger.error(error_msg)
+            return {"status": "error", "error": error_msg}
diff --git a/prompt-service/uv.lock b/prompt-service/uv.lock
index 542e3db393..ad109c594b 100644
--- a/prompt-service/uv.lock
+++ b/prompt-service/uv.lock
@@ -238,6 +238,47 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/0e/aa/91355b5f539caf1b94f0e66ff1e4ee39373b757fce08204981f7829ede51/authlib-1.6.4-py2.py3-none-any.whl", hash = "sha256:39313d2a2caac3ecf6d8f95fbebdfd30ae6ea6ae6a6db794d976405fdd9aa796", size = 243076 },
 ]
 
+[[package]]
+name = "autogen-agentchat"
+version = "0.4.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "autogen-core" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ed/50/065e357b08e4594ec949343e8e5b74ecca557e480c3072d4555569f3b517/autogen_agentchat-0.4.4.tar.gz", hash = "sha256:bb4a636707a5fd91950685b68f28019bdc9f64a101cd87109715dd212d295106", size = 58173 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f9/fd/7fade5f943a8c5094130f9aefd72a18818d1862d85078ec0d9ac62b0b51a/autogen_agentchat-0.4.4-py3-none-any.whl", hash = "sha256:c10e6e5a867403b8cc37c9f733e6f8a9b4f32e399808bdb1720bc00274b5e516", size = 63016 },
+]
+
+[[package]]
+name = "autogen-core"
+version = "0.4.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "jsonref" },
+    { name = "opentelemetry-api" },
+    { name = "pillow" },
+    { name = "protobuf" },
+    { name = "pydantic" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/1a/f5/de81486709bf04d89e4824a4b755e66c53d6d572e802514731f23314f319/autogen_core-0.4.4.tar.gz", hash = "sha256:053a17bbf7fb345bbe1249f0f7181f6a6b15f2dfa47b8ce69910b8001ff96156", size = 2310980 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4e/2f/f8dcd48ccb99f92252987d20c86e51e58eaed6bcfb1cf727b9bdfa51b1bc/autogen_core-0.4.4-py3-none-any.whl", hash = "sha256:2e891b20817b90e847c0f580d9ccd1cb1f682bdb2830d3b7d9425bd6966f6d28", size = 78245 },
+]
+
+[[package]]
+name = "autogen-ext"
+version = "0.4.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "autogen-core" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/10/4a/9ff4b65c773cc086f2b81cbff44161ff7f976fa235a2ec63470869cd1526/autogen_ext-0.4.4.tar.gz", hash = "sha256:116ece3a75af48f194da3a482b7f546e252d398dfaa133c62145d24a88fb8ac0", size = 140011 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/07/b4/cda27ed34a8be22d9c082f1b1b41ca5f4695b6b6462419ffffe50b23f086/autogen_ext-0.4.4-py3-none-any.whl", hash = "sha256:f994006ee34473524cd3ff9017c558d55a6610013d1e5d3a8f7ac90e15fb12df", size = 143012 },
+]
+
 [[package]]
 name = "azure-core"
 version = "1.35.1"
@@ -1328,6 +1369,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6a/c7/dd23f764de95771300a8a7ae17293c47ba0e48f826154229d18ecfe147cd/json_repair-0.42.0-py3-none-any.whl", hash = "sha256:7b6805162053dfe65722e961bc51b5eecec0582ec8a8e0fd218d33e8de757daf", size = 21612 },
 ]
 
+[[package]]
+name = "jsonref"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/aa/0d/c1f3277e90ccdb50d33ed5ba1ec5b3f0a242ed8c1b1a85d3afeb68464dca/jsonref-1.1.0.tar.gz", hash = "sha256:32fe8e1d85af0fdefbebce950af85590b22b60f9e95443176adbde4e1ecea552", size = 8814 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0c/ec/e1db9922bceb168197a558a2b8c03a7963f1afe93517ddd3cf99f202f996/jsonref-1.1.0-py3-none-any.whl", hash = "sha256:590dc7773df6c21cbf948b5dac07a72a251db28b0238ceecce0a2abfa8ec30a9", size = 9425 },
+]
+
 [[package]]
 name = "jsonschema"
 version = "4.25.1"
@@ -3152,6 +3202,9 @@ name = "unstract-prompt-service"
 version = "0.0.1"
 source = { editable = "." }
 dependencies = [
+    { name = "autogen-agentchat" },
+    { name = "autogen-core" },
+    { name = "autogen-ext" },
     { name = "flask" },
     { name = "json-repair" },
     { name = "llama-index" },
@@ -3186,6 +3239,9 @@ test = [
 
 [package.metadata]
 requires-dist = [
+    { name = "autogen-agentchat", specifier = ">=0.4.0" },
+    { name = "autogen-core", specifier = ">=0.4.0" },
+    { name = "autogen-ext", specifier = ">=0.4.0" },
     { name = "flask", specifier = "~=3.0" },
     { name = "json-repair", specifier = "~=0.42.0" },
     { name = "llama-index", specifier = "==0.13.2" },
diff --git a/unstract/sdk1/src/unstract/sdk1/prompt.py b/unstract/sdk1/src/unstract/sdk1/prompt.py
index 85e87a527e..67a2e192f1 100644
--- a/unstract/sdk1/src/unstract/sdk1/prompt.py
+++ b/unstract/sdk1/src/unstract/sdk1/prompt.py
@@ -171,6 +171,158 @@ def summarize(
             headers=headers,
         )
 
+    @log_elapsed(operation="VIBE_EXTRACTOR_GUESS_DOCUMENT_TYPE")
+    @handle_service_exceptions("guessing document type")
+    def guess_document_type(
+        self,
+        payload: dict[str, Any],
+        params: dict[str, str] | None = None,
+        headers: dict[str, str] | None = None,
+    ) -> dict[str, Any]:
+        """Guess document type from file content using LLM.
+
+        Args:
+            payload: Dictionary with file_content and llm_config
+            params: Optional query parameters
+            headers: Optional request headers
+
+        Returns:
+            dict: Response with document_type, confidence, and metadata
+        """
+        return self._call_service(
+            url_path="vibe-extractor/guess-document-type",
+            payload=payload,
+            params=params,
+            headers=headers,
+        )
+
+    @log_elapsed(operation="VIBE_EXTRACTOR_GENERATE_METADATA")
+    @handle_service_exceptions("generating metadata")
+    def generate_metadata(
+        self,
+        payload: dict[str, Any],
+        params: dict[str, str] | None = None,
+        headers: dict[str, str] | None = None,
+    ) -> dict[str, Any]:
+        """Generate metadata for a document type.
+
+        Args:
+            payload: Dictionary with doc_type, llm_config, reference_template
+            params: Optional query parameters
+            headers: Optional request headers
+
+        Returns:
+            dict: Response with generated metadata
+        """
+        return self._call_service(
+            url_path="vibe-extractor/generate-metadata",
+            payload=payload,
+            params=params,
+            headers=headers,
+        )
+
+    @log_elapsed(operation="VIBE_EXTRACTOR_GENERATE_EXTRACTION_FIELDS")
+    @handle_service_exceptions("generating extraction fields")
+    def generate_extraction_fields(
+        self,
+        payload: dict[str, Any],
+        params: dict[str, str] | None = None,
+        headers: dict[str, str] | None = None,
+    ) -> dict[str, Any]:
+        """Generate extraction fields YAML for a document type.
+
+        Args:
+            payload: Dictionary with doc_type, metadata_description, llm_config
+            params: Optional query parameters
+            headers: Optional request headers
+
+        Returns:
+            dict: Response with extraction_yaml string
+        """
+        return self._call_service(
+            url_path="vibe-extractor/generate-extraction-fields",
+            payload=payload,
+            params=params,
+            headers=headers,
+        )
+
+    @log_elapsed(operation="VIBE_EXTRACTOR_GENERATE_PAGE_PROMPTS")
+    @handle_service_exceptions("generating page prompts")
+    def generate_page_prompts(
+        self,
+        payload: dict[str, Any],
+        params: dict[str, str] | None = None,
+        headers: dict[str, str] | None = None,
+    ) -> dict[str, Any]:
+        """Generate page extraction prompts for a document type.
+
+        Args:
+            payload: Dictionary with doc_type, metadata_description, llm_config
+            params: Optional query parameters
+            headers: Optional request headers
+
+        Returns:
+            dict: Response with system_prompt and user_prompt
+        """
+        return self._call_service(
+            url_path="vibe-extractor/generate-page-prompts",
+            payload=payload,
+            params=params,
+            headers=headers,
+        )
+
+    @log_elapsed(operation="VIBE_EXTRACTOR_GENERATE_SCALAR_PROMPTS")
+    @handle_service_exceptions("generating scalar prompts")
+    def generate_scalar_prompts(
+        self,
+        payload: dict[str, Any],
+        params: dict[str, str] | None = None,
+        headers: dict[str, str] | None = None,
+    ) -> dict[str, Any]:
+        """Generate scalar extraction prompts for a document type.
+
+        Args:
+            payload: Dictionary with doc_type, metadata_description,
+                    extraction_yaml, scalar_fields, llm_config
+            params: Optional query parameters
+            headers: Optional request headers
+
+        Returns:
+            dict: Response with system_prompt and user_prompt
+        """
+        return self._call_service(
+            url_path="vibe-extractor/generate-scalar-prompts",
+            payload=payload,
+            params=params,
+            headers=headers,
+        )
+
+    @log_elapsed(operation="VIBE_EXTRACTOR_GENERATE_TABLE_PROMPTS")
+    @handle_service_exceptions("generating table prompts")
+    def generate_table_prompts(
+        self,
+        payload: dict[str, Any],
+        params: dict[str, str] | None = None,
+        headers: dict[str, str] | None = None,
+    ) -> dict[str, Any]:
+        """Generate table extraction prompts for a document type.
+
+        Args:
+            payload: Dictionary with doc_type, metadata_description,
+                    extraction_yaml, list_fields, llm_config
+            params: Optional query parameters
+            headers: Optional request headers
+
+        Returns:
+            dict: Response with system_prompt and user_prompt
+        """
+        return self._call_service(
+            url_path="vibe-extractor/generate-table-prompts",
+            payload=payload,
+            params=params,
+            headers=headers,
+        )
+
     def _get_headers(self, headers: dict[str, str] | None = None) -> dict[str, str]:
         """Get default headers for requests.