Skip to content
This repository was archived by the owner on Jun 5, 2025. It is now read-only.

Commit ebd5b80

Browse files
author
Luke Hinds
committed
Merge remote-tracking branch 'origin/main' into cline
2 parents 647f012 + 8b95d7f commit ebd5b80

File tree

18 files changed

+248
-177
lines changed

18 files changed

+248
-177
lines changed

.github/ISSUE_TEMPLATE/bug_report.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name: "Bug Report"
22
description: "Report a bug to help us improve the proxy system."
3-
title: "[Bug]: Provide a general summary of the issue"
3+
title: "-- Provide a general summary of the issue --"
44
labels: [bug]
55
assignees: "-"
66
body:

.github/workflows/image-publish.yml

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
name: Publish Docker Image
22
on:
3-
schedule:
4-
# Once weekly on Fridays at noon
5-
- cron: "00 12 * * 5"
3+
release:
4+
types:
5+
- published
66
workflow_dispatch:
77

88
jobs:
@@ -59,16 +59,14 @@ jobs:
5959
github_token: ${{ github.token }}
6060
workflow: ".github/workflows/import_packages.yml"
6161
workflow_conclusion: success
62-
name: backup_weaviate
62+
name: sqlite_data
6363
name_is_regexp: true
6464
skip_unpack: false
6565
if_no_artifact_found: ignore
6666
- name: Fetch latest FE commit SHA
6767
id: fetch_commit_fe_sha
6868
run: |
6969
echo "LATEST_RELEASE=$(curl -s "https://api.github.com/repos/stacklok/codegate-ui/releases/latest" -H "Authorization: Bearer ${{ secrets.GH_CI_TOKEN }}" | grep '"zipball_url":' | cut -d '"' -f 4)" >> $GITHUB_ENV
70-
- name: Rename to accommodate to image
71-
run: mv ./backup_weaviate ./weaviate_backup
7270
- name: Download git lfs dependencies
7371
run: |
7472
git lfs install

.github/workflows/import_packages.yml

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -51,33 +51,31 @@ jobs:
5151
github_token: ${{ github.token }}
5252
workflow: ".github/workflows/import_packages.yml"
5353
workflow_conclusion: success
54-
name: backup_weaviate
54+
name: sqlite_data
5555
path: /tmp/
5656
name_is_regexp: true
5757
skip_unpack: false
58-
if_no_artifact_found: ignore
58+
if_no_artifact_found: ignore
5959

60-
- name: Create folder if artifact download is not enabled
61-
if: ${{ github.event.inputs.enable_artifact_download == 'false' }}
60+
- name: Install Poetry
6261
run: |
63-
mkdir -p /tmp/backup_weaviate
64-
echo "Folder ./backup_weaviate created because artifact download is disabled."
62+
curl -sSL https://install.python-poetry.org | python3 -
63+
64+
- name: Add Poetry to PATH
65+
run: |
66+
echo "PATH=$HOME/.poetry/bin:$PATH" >> $GITHUB_ENV
67+
68+
- name: Install dependencies with Poetry
69+
run: |
70+
poetry install
6571
66-
- name: Run sync
72+
- name: 'Run import_packages.py with poetry'
6773
run: |
68-
export PYTHONPATH=$PYTHONPATH:./
69-
export BACKUP_FILESYSTEM_PATH=/tmp/backup_weaviate/
70-
export BACKUP_FOLDER=backup
71-
# Conditionally export the variables only if artifact download is enabled
72-
if [ "${{ github.event.inputs.enable_artifact_download }}" == "true" ]; then
73-
python scripts/import_packages.py --jsonl-dir /tmp/jsonl-files/
74-
else
75-
python scripts/import_packages.py --restore-backup False --jsonl-dir /tmp/jsonl-files/
76-
fi
74+
poetry run python scripts/import_packages.py --jsonl-dir /tmp/jsonl-files --vec-db-path /tmp/sqlite_data/vectordb.db
7775
78-
- name: 'Upload Backup Files'
76+
- name: 'Upload SQLite Vector DB File'
7977
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4
8078
with:
81-
name: backup_weaviate
82-
path: /tmp/backup_weaviate/backup*
79+
name: sqlite_data
80+
path: /tmp/sqlite_data/vectordb.db
8381
retention-days: 90

Dockerfile

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
6464
# Create a non-root user
6565
RUN useradd -m -u 1000 -r codegate
6666

67-
# Copy backup if needed
68-
RUN mkdir -p /tmp/weaviate_backup
69-
# will not fail if the file does not exist
70-
COPY weaviate_backu[p] /tmp/weaviate_backup
71-
RUN chown -R codegate /tmp/weaviate_backup
7267

7368
# Set permissions for user codegate to run nginx
7469
RUN chown -R codegate /var/lib/nginx && \

poetry.lock

Lines changed: 123 additions & 103 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,22 +12,23 @@ PyYAML = ">=6.0.1"
1212
fastapi = ">=0.115.5"
1313
uvicorn = ">=0.32.1"
1414
structlog = ">=24.4.0"
15-
litellm = "^1.55.11"
15+
litellm = "^1.56.8"
1616
llama_cpp_python = ">=0.3.2"
1717
cryptography = "^44.0.0"
1818
sqlalchemy = "^2.0.28"
1919
greenlet = "^3.0.3"
2020
aiosqlite = "^0.20.0"
2121
ollama = ">=0.4.4"
22-
pydantic-settings = "^2.7.0"
23-
sqlite-vec = ">=0.1.0"
22+
pydantic-settings = "^2.7.1"
2423
numpy = ">=1.24.0"
2524
tree-sitter = ">=0.23.2"
2625
tree-sitter-go = ">=0.23.4"
2726
tree-sitter-java = ">=0.23.5"
2827
tree-sitter-javascript = ">=0.23.1"
2928
tree-sitter-python = ">=0.23.6"
29+
tree-sitter-rust = ">=0.23.2"
3030

31+
sqlite-vec-sl-tmp = "^0.0.4"
3132
[tool.poetry.group.dev.dependencies]
3233
pytest = ">=7.4.0"
3334
pytest-cov = ">=4.1.0"
@@ -37,7 +38,7 @@ bandit = ">=1.7.10"
3738
build = ">=1.0.0"
3839
wheel = ">=0.40.0"
3940
litellm = ">=1.52.11"
40-
pytest-asyncio = "0.25.0"
41+
pytest-asyncio = "0.25.1"
4142
llama_cpp_python = ">=0.3.2"
4243
scikit-learn = ">=1.6.0"
4344
python-dotenv = ">=1.0.1"

scripts/entrypoint.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
#!/bin/bash
22

33
# those are hardcoded on the image, will not change
4-
BACKUP_PATH="/tmp/weaviate_backup"
54
BACKUP_NAME="backup"
65
MODEL_BASE_PATH="/app/codegate_volume/models"
76
CODEGATE_DB_FILE="/app/codegate_volume/db/codegate.db"
7+
CODEGATE_VEC_DB_FILE="/app/sqlite_data/vectordb.db"
88
CODEGATE_CERTS="/app/codegate_volume/certs"
99

1010
# Function to restore backup if paths are provided
@@ -37,7 +37,7 @@ start_application() {
3737
# first restore the models
3838
mkdir -p /app/codegate_volume/models
3939
cp /app/default_models/* /app/codegate_volume/models
40-
CMD_ARGS="--port 8989 --host 0.0.0.0 --model-base-path $MODEL_BASE_PATH --db-path $CODEGATE_DB_FILE"
40+
CMD_ARGS="--port 8989 --host 0.0.0.0 --model-base-path $MODEL_BASE_PATH --db-path $CODEGATE_DB_FILE --vec-db-path $CODEGATE_VEC_DB_FILE"
4141

4242
# Check and append additional URLs if they are set
4343
[ -n "$CODEGATE_OPENAI_URL" ] && CMD_ARGS+=" --openai-url $CODEGATE_OPENAI_URL"

scripts/import_packages.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,16 @@
55
import sqlite3
66

77
import numpy as np
8-
import sqlite_vec
8+
import sqlite_vec_sl_tmp
99

1010
from codegate.inference.inference_engine import LlamaCppInferenceEngine
1111
from codegate.utils.utils import generate_vector_string
1212

1313

1414
class PackageImporter:
15-
def __init__(self, jsonl_dir="data", db_path="./sqlite_data/vectordb.db"):
16-
os.makedirs(os.path.dirname(db_path), exist_ok=True)
17-
self.db_path = db_path
15+
def __init__(self, jsonl_dir="data", vec_db_path="./sqlite_data/vectordb.db"):
16+
os.makedirs(os.path.dirname(vec_db_path), exist_ok=True)
17+
self.vec_db_path = vec_db_path
1818
self.json_files = [
1919
os.path.join(jsonl_dir, "archived.jsonl"),
2020
os.path.join(jsonl_dir, "deprecated.jsonl"),
@@ -25,9 +25,9 @@ def __init__(self, jsonl_dir="data", db_path="./sqlite_data/vectordb.db"):
2525
self.model_path = "./codegate_volume/models/all-minilm-L6-v2-q5_k_m.gguf"
2626

2727
def _get_connection(self):
28-
conn = sqlite3.connect(self.db_path)
28+
conn = sqlite3.connect(self.vec_db_path)
2929
conn.enable_load_extension(True)
30-
sqlite_vec.load(conn)
30+
sqlite_vec_sl_tmp.load(conn)
3131
conn.enable_load_extension(False)
3232
return conn
3333

@@ -129,12 +129,12 @@ def __del__(self):
129129
help="Directory containing JSONL files. Default is 'data'.",
130130
)
131131
parser.add_argument(
132-
"--db-path",
132+
"--vec-db-path",
133133
type=str,
134134
default="./sqlite_data/vectordb.db",
135135
help="Path to SQLite database file. Default is './sqlite_data/vectordb.db'.",
136136
)
137137
args = parser.parse_args()
138138

139-
importer = PackageImporter(jsonl_dir=args.jsonl_dir, db_path=args.db_path)
139+
importer = PackageImporter(jsonl_dir=args.jsonl_dir, vec_db_path=args.vec_db_path)
140140
asyncio.run(importer.run_import())

src/codegate/ca/codegate_ca.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ def _load_existing_certificates(self) -> None:
173173
self._cert_cache[common_name] = CachedCertificate(
174174
cert_path=cert_path,
175175
key_path=key_path,
176-
creation_time=datetime.utcnow(),
176+
creation_time=datetime.now(datetime.UTC),
177177
)
178178
else:
179179
logger.debug(f"Skipping expired certificate for {common_name}")

src/codegate/codegate_logging.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ def setup_logging(
148148
# Set explicitly the log level for other modules
149149
logging.getLogger("sqlalchemy").disabled = True
150150
logging.getLogger("uvicorn.error").disabled = True
151+
logging.getLogger("aiosqlite").disabled = True
151152

152153
# Create a logger for our package
153154
logger = structlog.get_logger("codegate")

src/codegate/dashboard/post_processing.py

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import asyncio
22
import json
3+
import re
34
from typing import List, Optional, Tuple, Union
45

56
import structlog
@@ -180,6 +181,20 @@ async def parse_get_prompt_with_output(
180181
)
181182

182183

184+
def parse_question_answer(input_text: str) -> str:
185+
# given a string, detect if we have a pattern of "Context: xxx \n\nQuery: xxx" and strip it
186+
pattern = r'^Context:.*?\n\n\s*Query:\s*(.*)$'
187+
188+
# Search using the regex pattern
189+
match = re.search(pattern, input_text, re.DOTALL)
190+
191+
# If a match is found, return the captured group after "Query:"
192+
if match:
193+
return match.group(1)
194+
else:
195+
return input_text
196+
197+
183198
async def match_conversations(
184199
partial_conversations: List[Optional[PartialConversation]],
185200
) -> List[Conversation]:
@@ -205,17 +220,26 @@ async def match_conversations(
205220
conversations = []
206221
for chat_id, sorted_convers in sorted_convers.items():
207222
questions_answers = []
223+
first_partial_conversation = None
208224
for partial_conversation in sorted_convers:
209-
questions_answers.append(partial_conversation.question_answer)
210-
conversations.append(
211-
Conversation(
212-
question_answers=questions_answers,
213-
provider=partial_conversation.provider,
214-
type=partial_conversation.type,
215-
chat_id=chat_id,
216-
conversation_timestamp=sorted_convers[0].request_timestamp,
225+
# check if we have an answer, otherwise do not add it
226+
if partial_conversation.question_answer.answer is not None:
227+
first_partial_conversation = partial_conversation
228+
partial_conversation.question_answer.question.message = parse_question_answer(
229+
partial_conversation.question_answer.question.message)
230+
questions_answers.append(partial_conversation.question_answer)
231+
232+
# only add conversation if we have some answers
233+
if len(questions_answers) > 0 and first_partial_conversation is not None:
234+
conversations.append(
235+
Conversation(
236+
question_answers=questions_answers,
237+
provider=first_partial_conversation.provider,
238+
type=first_partial_conversation.type,
239+
chat_id=chat_id,
240+
conversation_timestamp=sorted_convers[0].request_timestamp,
241+
)
217242
)
218-
)
219243

220244
return conversations
221245

src/codegate/db/connection.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,8 @@ async def record_outputs(self, outputs: List[Output]) -> Optional[Output]:
144144
"""
145145
)
146146
recorded_output = await self._insert_pydantic_model(output_db, sql)
147-
logger.debug(f"Recorded output: {recorded_output}")
147+
# Uncomment to debug
148+
# logger.debug(f"Recorded output: {recorded_output}")
148149
return recorded_output
149150

150151
async def record_alerts(self, alerts: List[Alert]) -> List[Alert]:
@@ -177,8 +178,8 @@ async def record_alerts(self, alerts: List[Alert]) -> List[Alert]:
177178
recorded_alerts.append(alert_result)
178179
if alert_result and alert_result.trigger_category == "critical":
179180
await alert_queue.put(f"New alert detected: {alert.timestamp}")
180-
181-
logger.debug(f"Recorded alerts: {recorded_alerts}")
181+
# Uncomment to debug the recorded alerts
182+
# logger.debug(f"Recorded alerts: {recorded_alerts}")
182183
return recorded_alerts
183184

184185
def _should_record_context(self, context: Optional[PipelineContext]) -> bool:

src/codegate/pipeline/base.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,8 @@ def add_alert(
117117
timestamp=datetime.datetime.now(datetime.timezone.utc),
118118
)
119119
)
120-
logger.debug(f"Added alert to context: {self.alerts_raised[-1]}")
120+
# Uncomment the below to debug
121+
# logger.debug(f"Added alert to context: {self.alerts_raised[-1]}")
121122

122123
def add_input_request(
123124
self, normalized_request: ChatCompletionRequest, is_fim_request: bool, provider: str
@@ -159,7 +160,8 @@ def add_output(self, model_response: ModelResponse) -> None:
159160
output=output_str,
160161
)
161162
)
162-
logger.debug(f"Added output to context: {self.output_responses[-1]}")
163+
# Uncomment the below to debug the responses
164+
# logger.debug(f"Added output to context: {self.output_responses[-1]}")
163165
except Exception as e:
164166
logger.error(f"Failed to serialize output: {model_response}", error=str(e))
165167
return

src/codegate/storage/storage_engine.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from typing import List
55

66
import numpy as np
7-
import sqlite_vec
7+
import sqlite_vec_sl_tmp
88
import structlog
99

1010
from codegate.config import Config
@@ -62,7 +62,7 @@ def _get_connection(self):
6262
try:
6363
conn = sqlite3.connect(self.db_path)
6464
conn.enable_load_extension(True)
65-
sqlite_vec.load(conn)
65+
sqlite_vec_sl_tmp.load(conn)
6666
conn.enable_load_extension(False)
6767
return conn
6868
except Exception as e:
@@ -200,14 +200,15 @@ async def search(
200200

201201
# Log the raw SQL results
202202
rows = cursor.fetchall()
203-
logger.debug(
204-
"Raw SQL results",
205-
row_count=len(rows),
206-
rows=[
207-
{"name": row[0], "type": row[1], "status": row[2], "description": row[3]}
208-
for row in rows
209-
],
210-
)
203+
# Uncomment the following lines to log
204+
# logger.debug(
205+
# "Raw SQL results",
206+
# row_count=len(rows),
207+
# rows=[
208+
# {"name": row[0], "type": row[1], "status": row[2], "description": row[3]}
209+
# for row in rows
210+
# ],
211+
# )
211212

212213
results = []
213214
query_words = None

0 commit comments

Comments
 (0)