Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/amazon_s3_embedding/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from dotenv import load_dotenv
from psycopg_pool import ConnectionPool
from psycopg_pool import ConnectionPool # type: ignore[import-not-found]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need to add this?

It's already included in amazon_s3_embedding/pyproject.toml. As long as we run pip install . under the specific example directory, we shouldn't get "import not found", right?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, I was under the impression that we would be running mypy with the venv of the main project. Will install dependencies per example and retest

import cocoindex
import os
from typing import Any
Expand Down
2 changes: 1 addition & 1 deletion examples/azure_blob_embedding/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from dotenv import load_dotenv
from psycopg_pool import ConnectionPool
from psycopg_pool import ConnectionPool # type: ignore[import-not-found]
import cocoindex
import os
from typing import Any
Expand Down
4 changes: 2 additions & 2 deletions examples/code_embedding/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from dotenv import load_dotenv
from psycopg_pool import ConnectionPool
from pgvector.psycopg import register_vector
from psycopg_pool import ConnectionPool # type: ignore[import-not-found]
from pgvector.psycopg import register_vector # type: ignore[import-not-found]
from typing import Any
import functools
import cocoindex
Expand Down
2 changes: 1 addition & 1 deletion examples/custom_output_files/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def mutate(

@cocoindex.op.function()
def markdown_to_html(text: str) -> str:
return _markdown_it.render(text)
return str(_markdown_it.render(text))


@cocoindex.flow_def(name="CustomOutputFiles")
Expand Down
22 changes: 11 additions & 11 deletions examples/face_recognition/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
import datetime
import io
import os
from typing import cast

import face_recognition
import face_recognition # type: ignore[import-not-found]
import numpy as np
from PIL import Image

Expand Down Expand Up @@ -52,8 +53,9 @@ def extract_faces(content: bytes) -> list[FaceBase]:
ratio = 1.0
img = orig_img

# Extract face locations.
locs = face_recognition.face_locations(np.array(img), model="cnn")
locs: list[tuple[int, int, int, int]] = face_recognition.face_locations( # type: ignore[attr-defined]
np.array(img), model="cnn"
)

faces: list[FaceBase] = []
for min_y, max_x, max_y, min_x in locs:
Expand All @@ -63,8 +65,6 @@ def extract_faces(content: bytes) -> list[FaceBase]:
max_x=int(max_x * ratio),
max_y=int(max_y * ratio),
)

# Crop the face and save it as a PNG.
buf = io.BytesIO()
orig_img.crop((rect.min_x, rect.min_y, rect.max_x, rect.max_y)).save(
buf, format="PNG"
Expand All @@ -76,16 +76,16 @@ def extract_faces(content: bytes) -> list[FaceBase]:


@cocoindex.op.function(cache=True, behavior_version=1, gpu=True)
def extract_face_embedding(
face: bytes,
) -> cocoindex.Vector[cocoindex.Float32]:
def extract_face_embedding(face: bytes) -> cocoindex.Vector[cocoindex.Float32]:
"""Extract the embedding of a face."""
img = Image.open(io.BytesIO(face)).convert("RGB")
embedding = face_recognition.face_encodings(
encoding: np.ndarray = face_recognition.face_encodings( # type: ignore[attr-defined]
np.array(img),
known_face_locations=[(0, img.width - 1, img.height - 1, 0)],
)[0]
return embedding
)[
0
]
return cast(cocoindex.Vector[cocoindex.Float32], encoding.astype(np.float32))


@cocoindex.flow_def(name="FaceRecognition")
Expand Down
19 changes: 10 additions & 9 deletions examples/fastapi_server_docker/main.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import cocoindex
import uvicorn
import uvicorn # type: ignore[import-not-found]
from dotenv import load_dotenv
from fastapi import FastAPI, Query
from fastapi import Request
from psycopg_pool import ConnectionPool
from fastapi import FastAPI, Query # type: ignore[import-not-found]
from fastapi import Request # type: ignore[import-not-found]
from psycopg_pool import ConnectionPool # type: ignore[import-not-found]
from contextlib import asynccontextmanager
import os
from typing import Any, AsyncIterator


@cocoindex.transform_flow()
Expand All @@ -26,7 +27,7 @@ def text_to_embedding(
@cocoindex.flow_def(name="MarkdownEmbeddingFastApiExample")
def markdown_embedding_flow(
flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope
):
) -> None:
"""
Define an example flow that embeds markdown files into a vector database.
"""
Expand Down Expand Up @@ -65,7 +66,7 @@ def markdown_embedding_flow(
)


def search(pool: ConnectionPool, query: str, top_k: int = 5):
def search(pool: ConnectionPool, query: str, top_k: int = 5) -> list[dict[str, Any]]:
# Get the table name, for the export target in the text_embedding_flow above.
table_name = cocoindex.utils.get_target_default_name(
markdown_embedding_flow, "doc_embeddings"
Expand All @@ -89,7 +90,7 @@ def search(pool: ConnectionPool, query: str, top_k: int = 5):


@asynccontextmanager
def lifespan(app: FastAPI):
async def lifespan(app: FastAPI) -> AsyncIterator[None]:
load_dotenv()
cocoindex.init()
pool = ConnectionPool(os.getenv("COCOINDEX_DATABASE_URL"))
Expand All @@ -103,12 +104,12 @@ def lifespan(app: FastAPI):
fastapi_app = FastAPI(lifespan=lifespan)


@fastapi_app.get("/search")
@fastapi_app.get("/search") # type: ignore[misc]
def search_endpoint(
request: Request,
q: str = Query(..., description="Search query"),
limit: int = Query(5, description="Number of results"),
):
) -> dict[str, Any]:
pool = request.app.state.pool
results = search(pool, q, limit)
return {"results": results}
Expand Down
9 changes: 5 additions & 4 deletions examples/gdrive_text_embedding/main.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from dotenv import load_dotenv
from psycopg_pool import ConnectionPool
from psycopg_pool import ConnectionPool # type: ignore[import-not-found]
import cocoindex
import datetime
import os
from typing import Any


@cocoindex.transform_flow()
Expand All @@ -23,7 +24,7 @@ def text_to_embedding(
@cocoindex.flow_def(name="GoogleDriveTextEmbedding")
def gdrive_text_embedding_flow(
flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope
):
) -> None:
"""
Define an example flow that embeds text into a vector database.
"""
Expand Down Expand Up @@ -71,7 +72,7 @@ def gdrive_text_embedding_flow(
)


def search(pool: ConnectionPool, query: str, top_k: int = 5):
def search(pool: ConnectionPool, query: str, top_k: int = 5) -> list[dict[str, Any]]:
# Get the table name, for the export target in the gdrive_text_embedding_flow above.
table_name = cocoindex.utils.get_target_default_name(
gdrive_text_embedding_flow, "doc_embeddings"
Expand All @@ -94,7 +95,7 @@ def search(pool: ConnectionPool, query: str, top_k: int = 5):
]


def _main():
def _main() -> None:
# Initialize the database connection pool.
pool = ConnectionPool(os.getenv("COCOINDEX_DATABASE_URL"))
# Run queries in a loop to demonstrate the query capabilities.
Expand Down
57 changes: 40 additions & 17 deletions examples/image_search/colpali_main.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import datetime
import os
from contextlib import asynccontextmanager
from typing import Any
from typing import AsyncIterator, List, Optional
from dataclasses import dataclass

import cocoindex
from dotenv import load_dotenv
from fastapi import FastAPI, Query
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from qdrant_client import QdrantClient
from fastapi import FastAPI, Query # type: ignore[import-not-found]
from fastapi.middleware.cors import CORSMiddleware # type: ignore[import-not-found]
from fastapi.staticfiles import StaticFiles # type: ignore[import-not-found]
from qdrant_client import QdrantClient # type: ignore[import-not-found]


# --- Config ---
Expand All @@ -17,7 +18,7 @@
QDRANT_URL = os.getenv("QDRANT_URL", "localhost:6334")
PREFER_GRPC = os.getenv("QDRANT_PREFER_GRPC", "true").lower() == "true"

# Use HTTP
# Use HTTP (uncomment if needed)
# QDRANT_URL = os.getenv("QDRANT_URL", "localhost:6333")
# PREFER_GRPC = os.getenv("QDRANT_PREFER_GRPC", "false").lower() == "true"

Expand All @@ -27,6 +28,9 @@
print(f"📐 Using ColPali model {COLPALI_MODEL_NAME}")


# --- Embedding helpers ---


@cocoindex.transform_flow()
def text_to_colpali_embedding(
text: cocoindex.DataSlice[str],
Expand Down Expand Up @@ -70,8 +74,11 @@ def image_object_embedding_flow(
)


# --- Lifespan context ---


@asynccontextmanager
async def lifespan(app: FastAPI) -> None:
async def lifespan(app: FastAPI) -> AsyncIterator[None]:
load_dotenv()
cocoindex.init()
image_object_embedding_flow.setup(report_to_stdout=True)
Expand All @@ -85,6 +92,21 @@ async def lifespan(app: FastAPI) -> None:
yield


# --- Response Dataclasses ---


@dataclass
class SearchResult:
filename: str
score: float
caption: Optional[str] = None


@dataclass
class SearchResponse:
results: List[SearchResult]


# --- FastAPI app for web API ---
app = FastAPI(lifespan=lifespan)

Expand All @@ -95,16 +117,17 @@ async def lifespan(app: FastAPI) -> None:
allow_methods=["*"],
allow_headers=["*"],
)

# Serve images from the 'img' directory at /img
app.mount("/img", StaticFiles(directory="img"), name="img")


# --- Search API ---
@app.get("/search")
@app.get("/search", response_model=SearchResponse) # type: ignore[misc]
def search(
q: str = Query(..., description="Search query"),
limit: int = Query(5, description="Number of results"),
) -> Any:
) -> SearchResponse:
# Get the multi-vector embedding for the query
query_embedding = text_to_colpali_embedding.eval(q)
print(
Expand All @@ -122,13 +145,13 @@ def search(

print(f"📈 Found {len(search_results.points)} results with MaxSim scoring")

return {
"results": [
{
"filename": result.payload["filename"],
"score": result.score,
"caption": result.payload.get("caption"),
}
return SearchResponse(
results=[
SearchResult(
filename=result.payload["filename"],
score=result.score,
caption=result.payload.get("caption"),
)
for result in search_results.points
]
}
)
Loading