Skip to content
Merged
Show file tree
Hide file tree
Changes from 63 commits
Commits
Show all changes
68 commits
Select commit Hold shift + click to select a range
74c5a5e
add preference text memory
Sep 11, 2025
b8a739d
finish milvus support
Sep 11, 2025
89b8d4b
add new builder
Sep 12, 2025
41a487a
finish prefer textual memory base level
Sep 16, 2025
da601d8
modify code struct
Sep 17, 2025
6457f59
modify pref module
Sep 18, 2025
e1f3ecd
implement remain preference function
Sep 18, 2025
d412c78
modify preference.py
Sep 18, 2025
0e502f4
modify bug in milvus
Sep 18, 2025
83a9a3b
finish debug
Sep 18, 2025
9f4de02
modify user pref user id code
Sep 18, 2025
9ffd095
modify bug in milvus
Sep 19, 2025
52dbbac
finish debug in core
Sep 19, 2025
0d1a859
repair bug in milvus get_all
Sep 21, 2025
d706bb2
add pref mem esarch time in core
Sep 22, 2025
2fab119
modify search for pref mem in product..py
Sep 22, 2025
a906230
add simple pref memos example
Sep 22, 2025
a317457
modify bug in examples/mem_os/simple_prefs_memos_product.py
Sep 23, 2025
2493b8a
repair bug in user id related part
Sep 23, 2025
3b84f8e
modify search
Sep 23, 2025
8945b10
repair bug in slow update
Sep 24, 2025
ee63e18
modify define error in extractor -> extract_implicit_preferences
Sep 24, 2025
67ed7a8
reapair define error in extractor and modify split func in spliter
Sep 24, 2025
2ab893e
modify code
Sep 26, 2025
8ce0ca3
modify adder
Sep 26, 2025
b89e28a
optimize the code
Sep 28, 2025
d77a0d1
repair bug in adder and extractor
Sep 28, 2025
21ede58
finish make test and make pre-commit
Sep 29, 2025
54a6b23
repair bug in preference
Sep 29, 2025
ed2806a
add memory field for milvusvecdbitem and modify related module
Sep 30, 2025
4bd9787
pref code clean
Oct 5, 2025
4e3bd96
modify prompt of extractor
Oct 10, 2025
e1b505f
modify extractor
Oct 10, 2025
ab7f543
merge dev new func to branch
Oct 10, 2025
eb1cdb0
add reranker to pref mem
Oct 10, 2025
f8e26ad
remove assember in pref mem
Oct 10, 2025
666a6fc
modify code
Oct 10, 2025
3fc4371
add op trace based update method in add
Oct 13, 2025
90e1252
modify slow update in adder
Oct 14, 2025
2e4714d
modify implicit part code in extractor and add duplicate in utils
Oct 14, 2025
8cbd710
modify depulicate threshold
Oct 14, 2025
0d205c5
modify api config
Oct 15, 2025
d6827ea
reapir bug in adder about search relate
Oct 15, 2025
0ee048c
repair bug in core , dupicate search
Oct 15, 2025
71d636d
merge func to test
Oct 20, 2025
2e2ad34
add pref to new naive cube and server api
Oct 21, 2025
52f04ef
Merge branch 'feat/test' into feat/inst_cplt_with_api
Oct 21, 2025
0f5547e
add async pref add by mem_schedular
Oct 21, 2025
72083da
modify
Oct 21, 2025
6b83801
replace print to logger
Oct 22, 2025
87e189b
repair bug from make pre-commit
Oct 22, 2025
f9094e0
inst cplt
Oct 23, 2025
4f764fc
align to liji cloud server
Oct 23, 2025
e02722a
repair pkg problem
Oct 23, 2025
f196642
resolve conflict
Oct 23, 2025
abb9311
modify example of pref
Oct 24, 2025
d3ad365
pre_commit
Oct 24, 2025
c78913e
fix api bug
Oct 24, 2025
06b41ca
merge inst_cplt to dev
Oct 24, 2025
090f0f6
fix pre commit
Oct 24, 2025
403ab7f
fix pre commit
Oct 24, 2025
3241a77
fetch dev and fllow to new code, add instruct complete and ablation i…
Oct 24, 2025
0c8905d
fix pre commit error
Oct 24, 2025
6a28893
modify code fllow reviewer
Oct 25, 2025
d7ec764
fix bug in make pre_commit
Oct 25, 2025
af53171
merge new dev
Oct 25, 2025
aac6a39
repair bug in server router
Oct 25, 2025
82a8b3f
fix pre commit bug
Oct 25, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docker/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -157,4 +157,4 @@ volcengine-python-sdk==4.0.6
watchfiles==1.1.0
websockets==15.0.1
xlrd==2.0.2
xlsxwriter==3.2.5
xlsxwriter==3.2.5
8 changes: 7 additions & 1 deletion docs/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -884,7 +884,7 @@
"type": "string",
"title": "Session Id",
"description": "Session ID for the MOS. This is used to distinguish between different dialogue",
"default": "0ce84b9c-0615-4b9d-83dd-fba50537d5d3"
"default": "41bb5e18-252d-4948-918c-07d82aa47086"
},
"chat_model": {
"$ref": "#/components/schemas/LLMConfigFactory",
Expand Down Expand Up @@ -939,6 +939,12 @@
"description": "Enable parametric memory for the MemChat",
"default": false
},
"enable_preference_memory": {
"type": "boolean",
"title": "Enable Preference Memory",
"description": "Enable preference memory for the MemChat",
"default": false
},
"enable_mem_scheduler": {
"type": "boolean",
"title": "Enable Mem Scheduler",
Expand Down
9 changes: 5 additions & 4 deletions evaluation/.env-example
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,10 @@ SUPERMEMORY_API_KEY="sm_xxx"
MEMOBASE_API_KEY="xxx"
MEMOBASE_PROJECT_URL="http://***.***.***.***:8019"

# eval settings
PRE_SPLIT_CHUNK=false

# pref
PRE_SPLIT_CHUNK=false # pre split chunk in client end, for personamem and prefeval
INSTRUCT_COMPLETE=true # use instruct complete format or not
ABLATION_PREF=false # remove pref mem, only text mem

# Configuration Only For Scheduler
# RabbitMQ Configuration
Expand All @@ -45,4 +46,4 @@ MEMSCHEDULER_GRAPHDBAUTH_URI=bolt://localhost:7687
MEMSCHEDULER_GRAPHDBAUTH_USER=neo4j
MEMSCHEDULER_GRAPHDBAUTH_PASSWORD=***
MEMSCHEDULER_GRAPHDBAUTH_DB_NAME=neo4j
MEMSCHEDULER_GRAPHDBAUTH_AUTO_CREATE=true
MEMSCHEDULER_GRAPHDBAUTH_AUTO_CREATE=true
44 changes: 24 additions & 20 deletions evaluation/scripts/PrefEval/pref_eval.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
import argparse
import asyncio
import json
import os
import re
import argparse
from typing import Dict, Any

from collections import Counter
from tqdm.asyncio import tqdm
import os
from typing import Any

import pandas as pd

from dotenv import load_dotenv
from openai import OpenAI
from tqdm.asyncio import tqdm


load_dotenv()

Expand All @@ -30,7 +34,7 @@ async def call_gpt4o_mini_async(client: OpenAI, prompt: str) -> str:
)
return response.choices[0].message.content
except Exception as e:
print(f"API call failed: {str(e)}")
print(f"API call failed: {e!s}")
return ""


Expand All @@ -41,7 +45,7 @@ def parse_xml_response(response: str, tag: str) -> str:

async def evaluate_violate_preference_async(
client: OpenAI, preference: str, question: str, response: str
) -> Dict[str, str]:
) -> dict[str, str]:
prompt = f"""You will analyze a conversation between a user and an assistant, focusing on whether the assistant's response violates the user's stated preference.
Evaluate the response based on these criteria:

Expand Down Expand Up @@ -73,16 +77,16 @@ async def evaluate_violate_preference_async(

async def evaluate_acknowledge_preference_async(
client: OpenAI, question: str, response: str
) -> Dict[str, str]:
prompt = f"""You will analyze a conversation between a user and an assistant, focusing on whether the assistant acknowledges any user preference in answering the user's query.
) -> dict[str, str]:
prompt = f"""You will analyze a conversation between a user and an assistant, focusing on whether the assistant acknowledges any user preference in answering the user's query.

Evaluate the response based on these stringent criteria:

1. Check if the response explicitly or implicitly mentions or references a user preference. 2. The content of the preference is irrelevant for this check; only its presence matters.

Answer "Yes" if:

1. The response explicitly mentions or refers to a user preference in answering the user's question. Examples include: "Based on your previous preference for xxxx"; "Based on your previous preference, I would suggest you xxxx"; "Since you mentioned you prefer/dislike xxxx"; "Since you are a xxxx"; "I will recommend the following given your preference for xxx", etc. You should extract the "xxxx" in your answer. If it only says "Based on our previous conversation, I recommend..." and does not explicitly mention any preference, you should answer 'No'.
1. The response explicitly mentions or refers to a user preference in answering the user's question. Examples include: "Based on your previous preference for xxxx"; "Based on your previous preference, I would suggest you xxxx"; "Since you mentioned you prefer/dislike xxxx"; "Since you are a xxxx"; "I will recommend the following given your preference for xxx", etc. You should extract the "xxxx" in your answer. If it only says "Based on our previous conversation, I recommend..." and does not explicitly mention any preference, you should answer 'No'.
2. The response assumes the user preference in answering the user's question implicitly. For example, when the user asks 'Can you recommend me cars to drive?', if the response is 'Based on your preference, I will recommend non-EV cars, ...', then this indicates the assistant assumes that the user's preference is a dislike of EV cars, and you should answer 'Yes'.

Answer "No" if the response does not mention or refer to any user preference explicitly or implicitly. If you cannot extract the sentence stating what the preference is, answer 'No'.
Expand All @@ -103,7 +107,7 @@ async def evaluate_acknowledge_preference_async(

async def evaluate_hallucinate_preference_async(
client: OpenAI, preference: str, restatement: str
) -> Dict[str, str]:
) -> dict[str, str]:
if not restatement.strip():
return {"explanation": "No restatement provided by assistant", "answer": "No"}
prompt = f"""You will analyze a conversation between a user and an assistant, focusing on whether the assistant's restatement of the user's stated preference is the same preference. Evaluate the response based on these stringent criteria to answer if the assistant has hallucinated the preference or not:
Expand All @@ -120,10 +124,10 @@ async def evaluate_hallucinate_preference_async(
2. The assistant's restatement is a minor paraphrase that fully preserves the meaning and intent of the original preference.
3. The restatement is just empty, no hallucination.

Here is the information:
Original user preference: "{preference}"
Assistant's restatement: "{restatement}"
Examine the original preference and the assistant's restatement meticulously and answer. Please answer in this exact XML format without any other additional text:
Here is the information:
Original user preference: "{preference}"
Assistant's restatement: "{restatement}"
Examine the original preference and the assistant's restatement meticulously and answer. Please answer in this exact XML format without any other additional text:
<explanation>[1 short sentence explanation]</explanation>
<answer>[Yes/No]</answer>"""

Expand All @@ -136,7 +140,7 @@ async def evaluate_hallucinate_preference_async(

async def evaluate_helpful_response_async(
client: OpenAI, question: str, response: str
) -> Dict[str, str]:
) -> dict[str, str]:
prompt = f"""You will analyze a conversation between a user and an assistant, focusing on whether the assistant provides any substantive response to the user's query.
Evaluate the response based on these stringent criteria:

Expand Down Expand Up @@ -174,7 +178,7 @@ async def evaluate_helpful_response_async(
}


def classify_error_type(evaluation_results: Dict[str, Any]) -> str:
def classify_error_type(evaluation_results: dict[str, Any]) -> str:
violate = evaluation_results["violate_preference"]["answer"]
acknowledge = evaluation_results["acknowledge_preference"]["answer"]
hallucinate = evaluation_results["hallucinate_preference"]["answer"]
Expand All @@ -192,7 +196,7 @@ def classify_error_type(evaluation_results: Dict[str, Any]) -> str:
return "Personalized Response"


async def process_line(line: str, client: OpenAI, semaphore: asyncio.Semaphore) -> Dict[str, Any]:
async def process_line(line: str, client: OpenAI, semaphore: asyncio.Semaphore) -> dict[str, Any]:
async with semaphore:
data = json.loads(line.strip())
preference = data["preference"]
Expand Down Expand Up @@ -223,7 +227,7 @@ async def process_line(line: str, client: OpenAI, semaphore: asyncio.Semaphore)
return result


def log_summary(error_counter: Counter, total_samples: int) -> Dict[str, Dict[str, float]]:
def log_summary(error_counter: Counter, total_samples: int) -> dict[str, dict[str, float]]:
summary_data = {}
print("\n--- Error Type Summary ---")

Expand All @@ -247,7 +251,7 @@ def log_summary(error_counter: Counter, total_samples: int) -> Dict[str, Dict[st


def generate_excel_summary(
summary_results: Dict[str, Dict[str, float]],
summary_results: dict[str, dict[str, float]],
avg_search_time: float,
avg_context_tokens: float,
avg_add_time: float,
Expand Down Expand Up @@ -317,7 +321,7 @@ async def main(concurrency_limit: int, input_file: str, output_file: str, output
client = OpenAI(api_key=API_KEY, base_url=API_URL)

try:
with open(input_file, "r", encoding="utf-8") as f:
with open(input_file, encoding="utf-8") as f:
lines = f.readlines()
except FileNotFoundError:
print(f"Error: Input file not found at '{input_file}'")
Expand Down
6 changes: 4 additions & 2 deletions evaluation/scripts/PrefEval/pref_mem0.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
import os
import sys
import time

import tiktoken

from dotenv import load_dotenv
from irrelevant_conv import irre_10, irre_300
from openai import OpenAI
from tqdm import tqdm

from irrelevant_conv import irre_10, irre_300

ROOT_DIR = os.path.dirname(
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
Expand Down Expand Up @@ -199,7 +201,7 @@ def main():
args = parser.parse_args()

try:
with open(args.input, "r", encoding="utf-8") as infile:
with open(args.input, encoding="utf-8") as infile:
lines = infile.readlines()
except FileNotFoundError:
print(f"Error: Input file '{args.input}' not found")
Expand Down
10 changes: 6 additions & 4 deletions evaluation/scripts/PrefEval/pref_memobase.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
import os
import sys
import time

import tiktoken

from dotenv import load_dotenv
from irrelevant_conv import irre_10, irre_300
from openai import OpenAI
from tqdm import tqdm
import time
from irrelevant_conv import irre_10, irre_300


ROOT_DIR = os.path.dirname(
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
Expand Down Expand Up @@ -49,7 +51,7 @@ def add_memory_for_line(
if conversation:
messages = []

for chunk_start in range(0, len(conversation)):
for chunk_start in range(len(conversation)):
chunk = conversation[chunk_start : chunk_start + 1]
timestamp_add = str(int(time.time() * 100))
time.sleep(0.001) # Ensure unique timestamp
Expand Down Expand Up @@ -210,7 +212,7 @@ def main():
args = parser.parse_args()

try:
with open(args.input, "r", encoding="utf-8") as infile:
with open(args.input, encoding="utf-8") as infile:
lines = infile.readlines()
except FileNotFoundError:
print(f"Error: Input file '{args.input}' not found")
Expand Down
25 changes: 17 additions & 8 deletions evaluation/scripts/PrefEval/pref_memos.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,20 @@
import os
import sys
import time

import tiktoken

from dotenv import load_dotenv
from irrelevant_conv import irre_10, irre_300
from openai import OpenAI
from tqdm import tqdm
from utils.pref_mem_utils import (
add_pref_instruction,
create_mem_string,
remove_pref_mem_from_mem_string,
)
from utils.prompts import PREFEVAL_ANSWER_PROMPT

from irrelevant_conv import irre_10, irre_300

ROOT_DIR = os.path.dirname(
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
Expand Down Expand Up @@ -88,9 +96,7 @@ def search_memory_for_line(line_data: tuple, mem_client, top_k_value: int) -> di
start_time_search = time.monotonic()
relevant_memories = mem_client.search(query=question, user_id=user_id, top_k=top_k_value)
search_memories_duration = time.monotonic() - start_time_search
memories_str = "\n".join(
f"- {entry.get('memory', '')}" for entry in relevant_memories["text_mem"][0]["memories"]
)
memories_str = create_mem_string(relevant_memories)

memory_tokens_used = len(tokenizer.encode(memories_str))

Expand All @@ -111,7 +117,7 @@ def search_memory_for_line(line_data: tuple, mem_client, top_k_value: int) -> di
return None


def generate_response_for_line(line_data: tuple, openai_client: OpenAI) -> dict:
def generate_response_for_line(line_data: tuple, openai_client: OpenAI, lib: str) -> dict:
"""
Generates a response for a single line of data using pre-fetched memories.
"""
Expand Down Expand Up @@ -139,7 +145,10 @@ def generate_response_for_line(line_data: tuple, openai_client: OpenAI) -> dict:
)
return original_data

system_prompt = f"You are a helpful AI. Answer the question based on the query and the following memories:\nUser Memories:\n{memories_str}"
memories_str = remove_pref_mem_from_mem_string(memories_str, frame=lib)

template = add_pref_instruction(PREFEVAL_ANSWER_PROMPT, frame=lib)
system_prompt = template.format(context=memories_str)
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": question},
Expand Down Expand Up @@ -201,7 +210,7 @@ def main():
args = parser.parse_args()

try:
with open(args.input, "r", encoding="utf-8") as infile:
with open(args.input, encoding="utf-8") as infile:
lines = infile.readlines()
except FileNotFoundError:
print(f"Error: Input file '{args.input}' not found")
Expand Down Expand Up @@ -277,7 +286,7 @@ def main():
concurrent.futures.ThreadPoolExecutor(max_workers=args.max_workers) as executor,
):
futures = [
executor.submit(generate_response_for_line, (i, line), openai_client)
executor.submit(generate_response_for_line, (i, line), openai_client, args.lib)
for i, line in enumerate(lines)
]

Expand Down
10 changes: 7 additions & 3 deletions evaluation/scripts/PrefEval/pref_memu.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,16 @@
import os
import sys
import time

from datetime import datetime

import tiktoken

from dotenv import load_dotenv
from irrelevant_conv import irre_10, irre_300
from openai import OpenAI
from tqdm import tqdm
from datetime import datetime
from irrelevant_conv import irre_10, irre_300


ROOT_DIR = os.path.dirname(
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
Expand Down Expand Up @@ -205,7 +209,7 @@ def main():
args = parser.parse_args()

try:
with open(args.input, "r", encoding="utf-8") as infile:
with open(args.input, encoding="utf-8") as infile:
lines = infile.readlines()
except FileNotFoundError:
print(f"Error: Input file '{args.input}' not found")
Expand Down
8 changes: 5 additions & 3 deletions evaluation/scripts/PrefEval/pref_supermemory.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
import os
import sys
import time

import tiktoken

from dotenv import load_dotenv
from irrelevant_conv import irre_10, irre_300
from openai import OpenAI
from tqdm import tqdm
from datetime import datetime
from irrelevant_conv import irre_10, irre_300


ROOT_DIR = os.path.dirname(
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
Expand Down Expand Up @@ -201,7 +203,7 @@ def main():
args = parser.parse_args()

try:
with open(args.input, "r", encoding="utf-8") as infile:
with open(args.input, encoding="utf-8") as infile:
lines = infile.readlines()
except FileNotFoundError:
print(f"Error: Input file '{args.input}' not found")
Expand Down
Loading