Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 45 additions & 14 deletions backend/src/memory/hsg.ts
Original file line number Diff line number Diff line change
Expand Up @@ -336,8 +336,18 @@ export function extract_essence(
.map((s) => s.trim())
.filter((s) => s.length > 10);
if (sents.length === 0) return raw.slice(0, max_len);
const score_sent = (s: string): number => {
const score_sent = (s: string, idx: number): number => {
let sc = 0;
// First sentence bonus - titles/headers are essential for retrieval
if (idx === 0) sc += 10;
// Second sentence often contains key context
if (idx === 1) sc += 5;
// Header/section markers (markdown or label-style)
if (/^#+\s/.test(s) || /^[A-Z][A-Z\s]+:/.test(s)) sc += 8;
// Colon-prefixed labels like "PROBLEM:", "SOLUTION:", "CONTEXT:"
if (/^[A-Z][a-z]+:/i.test(s)) sc += 6;
// Date patterns (ISO format)
if (/\d{4}-\d{2}-\d{2}/.test(s)) sc += 7;
if (
/\b(january|february|march|april|may|june|july|august|september|october|november|december)\s+\d+/i.test(
s,
Expand All @@ -347,7 +357,7 @@ export function extract_essence(
if (/\$\d+|\d+\s*(miles|dollars|years|months|km)/.test(s)) sc += 4;
if (/\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)+/.test(s)) sc += 3;
if (
/\b(bought|purchased|serviced|visited|went|got|received|paid|earned|learned|discovered|found|saw|met|completed|finished)\b/i.test(
/\b(bought|purchased|serviced|visited|went|got|received|paid|earned|learned|discovered|found|saw|met|completed|finished|fixed|implemented|created|updated|added|removed|resolved)\b/i.test(
s,
)
)
Expand All @@ -357,21 +367,42 @@ export function extract_essence(
if (/\b(I|my|me)\b/.test(s)) sc += 1;
return sc;
};
const scored = sents.map((s) => ({ text: s, score: score_sent(s) }));
const scored = sents.map((s, idx) => ({ text: s, score: score_sent(s, idx), idx }));
scored.sort((a, b) => b.score - a.score);
// Build result, ensuring first sentence is always included if space permits
let comp = "";
for (const item of scored) {
const cand = comp ? `${comp}. ${item.text}` : item.text;
if (cand.length <= max_len) {
comp = cand;
} else if (comp.length < max_len * 0.7) {
const rem = max_len - comp.length - 2;
if (rem > 20) {
comp += ". " + item.text.slice(0, rem);
const firstSent = sents[0];
if (firstSent && firstSent.length <= max_len * 0.5) {
comp = firstSent;
const remaining = scored.filter((item) => item.idx !== 0);
for (const item of remaining) {
const cand = comp ? `${comp}. ${item.text}` : item.text;
if (cand.length <= max_len) {
comp = cand;
} else if (comp.length < max_len * 0.7) {
const rem = max_len - comp.length - 2;
if (rem > 20) {
comp += ". " + item.text.slice(0, rem);
}
break;
} else {
break;
}
}
} else {
for (const item of scored) {
const cand = comp ? `${comp}. ${item.text}` : item.text;
if (cand.length <= max_len) {
comp = cand;
} else if (comp.length < max_len * 0.7) {
const rem = max_len - comp.length - 2;
if (rem > 20) {
comp += ". " + item.text.slice(0, rem);
}
break;
} else {
break;
}
break;
} else {
break;
}
}
return comp || raw.slice(0, max_len);
Expand Down
Loading