Skip to content

Commit 261c607

Browse files
committed
Merge remote-tracking branch 'origin/main' into aamirj/precommitmypy
2 parents 3b180a6 + 9078911 commit 261c607

File tree

3 files changed

+249
-12
lines changed

3 files changed

+249
-12
lines changed

packages/memory_module/core/memory_core.py

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ async def process_semantic_messages(
134134
if extraction.action == "add" and extraction.facts:
135135
for fact in extraction.facts:
136136
decision = await self._get_add_memory_processing_decision(fact.text, author_id)
137-
if decision == "ignore":
137+
if decision.decision == "ignore":
138138
logger.info(f"Decision to ignore fact {fact.text}")
139139
continue
140140
metadata = await self._extract_metadata_from_fact(fact.text)
@@ -194,10 +194,12 @@ async def remove_messages(self, message_ids: List[str]) -> None:
194194
await self.storage.remove_messages(message_ids)
195195
logger.info("messages {} are removed".format(",".join(message_ids)))
196196

197-
async def _get_add_memory_processing_decision(self, new_memory_fact: str, user_id: Optional[str]) -> str:
197+
async def _get_add_memory_processing_decision(
198+
self, new_memory_fact: str, user_id: Optional[str]
199+
) -> ProcessSemanticMemoryDecision:
198200
similar_memories = await self.retrieve_memories(new_memory_fact, user_id, None)
199201
decision = await self._extract_memory_processing_decision(new_memory_fact, similar_memories, user_id)
200-
return decision.decision
202+
return decision
201203

202204
async def _extract_memory_processing_decision(
203205
self, new_memory: str, old_memories: List[Memory], user_id: Optional[str]
@@ -208,15 +210,35 @@ async def _extract_memory_processing_decision(
208210
old_memory_content = "\n".join(
209211
[f"<MEMORY created_at={str(memory.created_at)}>{memory.content}</MEMORY>" for memory in old_memories]
210212
)
211-
system_message = f"""You are a semantic memory management agent. Your goal is to determine whether this new memory is duplicated with existing old memories.
213+
system_message = f"""You are a semantic memory management agent. Your task is to decide whether the new memory should be added to the memory system or ignored as a duplicate.
214+
212215
Considerations:
213-
- Time-based order: Each old memory has a creation time. Please take creation time into consideration.
214-
- Repeated behavior: If the new memory indicates a repeated idea over a period of time, it should be added to reflect the pattern.
215-
Return value:
216-
- Add: add new memory while keep old memories
217-
- Ignore: indicates that this memory is similar to an older memory and should be ignored
216+
1. Context Overlap:
217+
If the new memory conveys information that is substantially covered by an existing memory, it should be ignored.
218+
If the new memory adds unique or specific information not present in any old memory, it should be added.
219+
2. Granularity of Detail:
220+
Broader or more general memories should not replace specific ones. However, a specific detail can replace a general statement if it conveys the same underlying idea.
221+
For example:
222+
Old memory: “The user enjoys hiking in national parks.”
223+
New memory: “The user enjoys hiking in Yellowstone National Park.”
224+
Result: Ignore (The older memory already encompasses the specific case).
225+
3. Repeated Patterns:
226+
If the new memory reinforces a pattern of behavior over time (e.g., multiple mentions of a recurring habit, preference, or routine), it should be added to reflect this trend.
227+
4. Temporal Relevance:
228+
If the new memory reflects a significant change or update to the old memory, it should be added.
229+
For example:
230+
Old memory: “The user is planning a trip to Japan.”
231+
New memory: “The user has canceled their trip to Japan.”
232+
Result: Add (The new memory reflects a change).
233+
234+
Process:
235+
1. Compare the specificity, unique details, and time relevance of the new memory against old memories.
236+
2. Decide whether to add or ignore based on the considerations above.
237+
3. Provide a clear and concise justification for your decision.
238+
218239
Here are the old memories:
219240
{old_memory_content}
241+
220242
Here is the new memory:
221243
{new_memory} created at {str(datetime.datetime.now())}
222244
""" # noqa: E501

scripts/evaluate_memory_decisions.py

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
import asyncio
2+
import sys
3+
from datetime import datetime, timedelta
4+
from pathlib import Path
5+
from typing import cast
6+
from uuid import uuid4
7+
8+
from tqdm import tqdm
9+
10+
root_dir = Path(__file__).parent.parent
11+
sys.path.extend([str(root_dir), str(root_dir / "packages")])
12+
13+
from memory_module import MemoryModule, MemoryModuleConfig, UserMessageInput # noqa: E402
14+
from memory_module.services.scheduled_events_service import ScheduledEventsService # noqa: E402
15+
16+
from tests.memory_module.utils import build_llm_config # noqa: E402
17+
18+
# Test cases from before
19+
TEST_CASES = [
20+
{
21+
"title": "General vs. Specific Detail",
22+
"old_messages": ["I love outdoor activities.", "I often visit national parks."],
23+
"incoming_message": "I enjoy hiking in Yellowstone National Park.",
24+
"expected_decision": "ignore",
25+
"reason": "The old messages already cover the new message’s context.",
26+
},
27+
{
28+
"title": "Specific Detail vs. General",
29+
"old_messages": ["I really enjoy hiking in Yellowstone National Park.", "I like exploring scenic trails."],
30+
"incoming_message": "I enjoy hiking in national parks.",
31+
"expected_decision": "ignore",
32+
"reason": "The new message is broader and redundant to the old messages.",
33+
},
34+
{
35+
"title": "Repeated Behavior Over Time",
36+
"old_messages": ["I had coffee at 8 AM yesterday.", "I had coffee at 8 AM this morning."],
37+
"incoming_message": "I had coffee at 8 AM again today.",
38+
"expected_decision": "add",
39+
"reason": "This reinforces a recurring pattern of behavior over time.",
40+
},
41+
{
42+
"title": "Updated Temporal Context",
43+
"old_messages": ["I’m planning a trip to Japan.", "I’ve been looking at flights to Japan."],
44+
"incoming_message": "I just canceled my trip to Japan.",
45+
"expected_decision": "add",
46+
"reason": "The new message reflects a significant update to the old messages.",
47+
},
48+
{
49+
"title": "Irrelevant or Unnecessary Update",
50+
"old_messages": ["I prefer tea over coffee.", "I usually drink tea every day."],
51+
"incoming_message": "I like tea.",
52+
"expected_decision": "ignore",
53+
"reason": "The new message does not add any unique or relevant information.",
54+
},
55+
{
56+
"title": "Redundant Memory with Different Wording",
57+
"old_messages": ["I have an iPhone 12.", "I bought an iPhone 12 back in 2022."],
58+
"incoming_message": "I own an iPhone 12.",
59+
"expected_decision": "ignore",
60+
"reason": "The new message is a rephrased duplicate of the old messages.",
61+
},
62+
{
63+
"title": "Additional Specific Information",
64+
"old_messages": ["I like playing video games.", "I often play games on my console."],
65+
"incoming_message": "I love playing RPG video games like Final Fantasy.",
66+
"expected_decision": "add",
67+
"reason": "The new message adds specific details about the type of games.",
68+
},
69+
{
70+
"title": "Contradictory Information",
71+
"old_messages": ["I like cats.", "I have a cat named Whiskers."],
72+
"incoming_message": "Actually, I don’t like cats.",
73+
"expected_decision": "add",
74+
"reason": "The new message reflects a contradiction or change in preference.",
75+
},
76+
{
77+
"title": "New Memory Completely Unrelated",
78+
"old_messages": ["I love reading mystery novels.", "I’m a big fan of Agatha Christie’s books."],
79+
"incoming_message": "I really enjoy playing soccer.",
80+
"expected_decision": "add",
81+
"reason": "The new message introduces entirely new information.",
82+
},
83+
{
84+
"title": "Multiple Old Messages with Partial Overlap",
85+
"old_messages": ["I have a car.", "My car is a Toyota Camry."],
86+
"incoming_message": "I own a blue Toyota Camry.",
87+
"expected_decision": "add",
88+
"reason": "The new message adds a specific detail (color) not covered by the old messages.",
89+
},
90+
]
91+
92+
93+
async def evaluate_decision(memory_module, test_case):
94+
"""Evaluate a single decision test case."""
95+
conversation_id = str(uuid4())
96+
97+
# Add old messages
98+
for message_content in test_case["old_messages"]:
99+
message = UserMessageInput(
100+
id=str(uuid4()),
101+
content=message_content,
102+
author_id="user-123",
103+
conversation_ref=conversation_id,
104+
created_at=datetime.now() - timedelta(days=1),
105+
)
106+
await memory_module.add_message(message)
107+
108+
await memory_module.message_queue.message_buffer.scheduler.flush()
109+
110+
# Create incoming message
111+
new_message = [
112+
UserMessageInput(
113+
id=str(uuid4()),
114+
content=test_case["incoming_message"],
115+
author_id="user-123",
116+
conversation_ref=conversation_id,
117+
created_at=datetime.now(),
118+
)
119+
]
120+
121+
# Get the decision
122+
extraction = await memory_module.memory_core._extract_semantic_fact_from_messages(new_message)
123+
if not (extraction.action == "add" and extraction.facts):
124+
return {
125+
"success": False,
126+
"error": "Failed to extract semantic facts",
127+
"test_case": test_case,
128+
"expected": test_case["expected_decision"],
129+
"got": "failed_extraction",
130+
"reason": "Failed to extract semantic facts",
131+
}
132+
133+
for fact in extraction.facts:
134+
decision = await memory_module.memory_core._get_add_memory_processing_decision(fact.text, "user-123")
135+
return {
136+
"success": decision.decision == test_case["expected_decision"],
137+
"expected": test_case["expected_decision"],
138+
"got": decision.decision,
139+
"reason": decision.reason_for_decision,
140+
"test_case": test_case,
141+
}
142+
143+
144+
async def main():
145+
# Initialize config and memory module
146+
llm_config = build_llm_config()
147+
if not llm_config.api_key:
148+
print("Error: OpenAI API key not provided")
149+
sys.exit(1)
150+
151+
db_path = Path(__file__).parent / "data" / "evaluation" / "memory_module.db"
152+
# Create db directory if it doesn't exist
153+
db_path.parent.mkdir(parents=True, exist_ok=True)
154+
config = MemoryModuleConfig(
155+
db_path=db_path,
156+
buffer_size=5,
157+
timeout_seconds=60,
158+
llm=llm_config,
159+
)
160+
161+
# Delete existing db if it exists
162+
if db_path.exists():
163+
db_path.unlink()
164+
165+
memory_module = MemoryModule(config=config)
166+
167+
results = []
168+
successes = 0
169+
failures = 0
170+
171+
# Run evaluations with progress bar
172+
print("\nEvaluating memory processing decisions...")
173+
for test_case in tqdm(TEST_CASES, desc="Processing test cases"):
174+
result = await evaluate_decision(memory_module, test_case)
175+
results.append(result)
176+
if result["success"]:
177+
successes += 1
178+
else:
179+
failures += 1
180+
181+
# Calculate statistics
182+
total = len(TEST_CASES)
183+
success_rate = (successes / total) * 100
184+
185+
# Print summary
186+
print("\n=== Evaluation Summary ===")
187+
print(f"Total test cases: {total}")
188+
print(f"Successes: {successes} ({success_rate:.1f}%)")
189+
print(f"Failures: {failures} ({100 - success_rate:.1f}%)")
190+
191+
# Print detailed failures if any
192+
if failures > 0:
193+
print("\n=== Failed Cases ===")
194+
for result in results:
195+
if not result["success"]:
196+
test_case = result["test_case"]
197+
print(f"\nTest Case: {test_case['title']}")
198+
print(f"Reason: {test_case['reason']}")
199+
print(f"Actual result: {result['reason']}")
200+
print(f"Expected: {result['expected']}")
201+
print(f"Got: {result['got']}")
202+
print("Old messages:")
203+
for msg in test_case["old_messages"]:
204+
print(f" - {msg}")
205+
print(f"New message: {test_case['incoming_message']}")
206+
print("-" * 50)
207+
208+
# Cleanup
209+
await cast(ScheduledEventsService, memory_module.message_queue.message_buffer.scheduler).cleanup()
210+
211+
212+
if __name__ == "__main__":
213+
asyncio.run(main())

tests/memory_module/test_memory_module.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,10 @@ async def _mock_embedding(**kwargs):
116116
@pytest_asyncio.fixture(autouse=True)
117117
async def cleanup_scheduled_events(memory_module):
118118
"""Fixture to cleanup scheduled events after each test."""
119-
yield
120-
await memory_module.message_queue.message_buffer.scheduler.cleanup()
119+
try:
120+
yield
121+
finally:
122+
await memory_module.message_queue.message_buffer.scheduler.cleanup()
121123

122124

123125
@pytest.mark.asyncio
@@ -342,7 +344,7 @@ async def _validate_decision(memory_module, message: List[UserMessageInput], exp
342344
assert extraction.action == "add" and extraction.facts
343345
for fact in extraction.facts:
344346
decision = await memory_module.memory_core._get_add_memory_processing_decision(fact.text, "user-123")
345-
assert decision == expected_decision
347+
assert decision.decision == expected_decision
346348

347349

348350
@pytest.mark.asyncio

0 commit comments

Comments
 (0)