MemTensor · fridayL · Oct 25, 2025 · Sep 11, 2025 · Sep 11, 2025 · Sep 12, 2025
diff --git a/docker/requirements.txt b/docker/requirements.txt
@@ -157,4 +157,4 @@ volcengine-python-sdk==4.0.6
 watchfiles==1.1.0
 websockets==15.0.1
 xlrd==2.0.2
-xlsxwriter==3.2.5
+xlsxwriter==3.2.5
diff --git a/docs/openapi.json b/docs/openapi.json
@@ -884,7 +884,7 @@
             "type": "string",
             "title": "Session Id",
             "description": "Session ID for the MOS. This is used to distinguish between different dialogue",
-            "default": "0ce84b9c-0615-4b9d-83dd-fba50537d5d3"
+            "default": "41bb5e18-252d-4948-918c-07d82aa47086"
           },
           "chat_model": {
             "$ref": "#/components/schemas/LLMConfigFactory",
@@ -939,6 +939,12 @@
             "description": "Enable parametric memory for the MemChat",
             "default": false
           },
+          "enable_preference_memory": {
+            "type": "boolean",
+            "title": "Enable Preference Memory",
+            "description": "Enable preference memory for the MemChat",
+            "default": false
+          },
           "enable_mem_scheduler": {
             "type": "boolean",
             "title": "Enable Mem Scheduler",

diff --git a/evaluation/.env-example b/evaluation/.env-example
@@ -22,9 +22,10 @@ SUPERMEMORY_API_KEY="sm_xxx"
 MEMOBASE_API_KEY="xxx"
 MEMOBASE_PROJECT_URL="http://***.***.***.***:8019"
 
-# eval settings
-PRE_SPLIT_CHUNK=false
-
+# pref
+PRE_SPLIT_CHUNK=false  # pre split chunk in client end, for personamem and prefeval
+INSTRUCT_COMPLETE=true  # use instruct complete format or not
+ABLATION_PREF=false  # remove pref mem, only text mem
 
 # Configuration Only For Scheduler
 # RabbitMQ Configuration
@@ -45,4 +46,4 @@ MEMSCHEDULER_GRAPHDBAUTH_URI=bolt://localhost:7687
 MEMSCHEDULER_GRAPHDBAUTH_USER=neo4j
 MEMSCHEDULER_GRAPHDBAUTH_PASSWORD=***
 MEMSCHEDULER_GRAPHDBAUTH_DB_NAME=neo4j
-MEMSCHEDULER_GRAPHDBAUTH_AUTO_CREATE=true
+MEMSCHEDULER_GRAPHDBAUTH_AUTO_CREATE=true
diff --git a/evaluation/scripts/PrefEval/pref_eval.py b/evaluation/scripts/PrefEval/pref_eval.py
@@ -1,14 +1,18 @@
+import argparse
 import asyncio
 import json
+import os
 import re
-import argparse
-from typing import Dict, Any
+
 from collections import Counter
-from tqdm.asyncio import tqdm
-import os
+from typing import Any
+
 import pandas as pd
+
 from dotenv import load_dotenv
 from openai import OpenAI
+from tqdm.asyncio import tqdm
+
 
 load_dotenv()
 
@@ -30,7 +34,7 @@ async def call_gpt4o_mini_async(client: OpenAI, prompt: str) -> str:
         )
         return response.choices[0].message.content
     except Exception as e:
-        print(f"API call failed: {str(e)}")
+        print(f"API call failed: {e!s}")
         return ""
 
 
@@ -41,7 +45,7 @@ def parse_xml_response(response: str, tag: str) -> str:
 
 async def evaluate_violate_preference_async(
     client: OpenAI, preference: str, question: str, response: str
-) -> Dict[str, str]:
+) -> dict[str, str]:
     prompt = f"""You will analyze a conversation between a user and an assistant, focusing on whether the assistant's response violates the user's stated preference.
 Evaluate the response based on these criteria:
 
@@ -73,16 +77,16 @@ async def evaluate_violate_preference_async(
 
 async def evaluate_acknowledge_preference_async(
     client: OpenAI, question: str, response: str
-) -> Dict[str, str]:
-    prompt = f"""You will analyze a conversation between a user and an assistant, focusing on whether the assistant acknowledges any user preference in answering the user's query. 
+) -> dict[str, str]:
+    prompt = f"""You will analyze a conversation between a user and an assistant, focusing on whether the assistant acknowledges any user preference in answering the user's query.
 
 Evaluate the response based on these stringent criteria:
 
 1. Check if the response explicitly or implicitly mentions or references a user preference. 2. The content of the preference is irrelevant for this check; only its presence matters.
 
 Answer "Yes" if:
 
-1. The response explicitly mentions or refers to a user preference in answering the user's question. Examples include: "Based on your previous preference for xxxx"; "Based on your previous preference, I would suggest you xxxx"; "Since you mentioned you prefer/dislike xxxx"; "Since you are a xxxx"; "I will recommend the following given your preference for xxx", etc. You should extract the "xxxx" in your answer. If it only says "Based on our previous conversation, I recommend..." and does not explicitly mention any preference, you should answer 'No'. 
+1. The response explicitly mentions or refers to a user preference in answering the user's question. Examples include: "Based on your previous preference for xxxx"; "Based on your previous preference, I would suggest you xxxx"; "Since you mentioned you prefer/dislike xxxx"; "Since you are a xxxx"; "I will recommend the following given your preference for xxx", etc. You should extract the "xxxx" in your answer. If it only says "Based on our previous conversation, I recommend..." and does not explicitly mention any preference, you should answer 'No'.
 2. The response assumes the user preference in answering the user's question implicitly. For example, when the user asks 'Can you recommend me cars to drive?', if the response is 'Based on your preference, I will recommend non-EV cars, ...', then this indicates the assistant assumes that the user's preference is a dislike of EV cars, and you should answer 'Yes'.
 
 Answer "No" if the response does not mention or refer to any user preference explicitly or implicitly. If you cannot extract the sentence stating what the preference is, answer 'No'.
@@ -103,7 +107,7 @@ async def evaluate_acknowledge_preference_async(
 
 async def evaluate_hallucinate_preference_async(
     client: OpenAI, preference: str, restatement: str
-) -> Dict[str, str]:
+) -> dict[str, str]:
     if not restatement.strip():
         return {"explanation": "No restatement provided by assistant", "answer": "No"}
     prompt = f"""You will analyze a conversation between a user and an assistant, focusing on whether the assistant's restatement of the user's stated preference is the same preference. Evaluate the response based on these stringent criteria to answer if the assistant has hallucinated the preference or not:
@@ -120,10 +124,10 @@ async def evaluate_hallucinate_preference_async(
 2. The assistant's restatement is a minor paraphrase that fully preserves the meaning and intent of the original preference.
 3. The restatement is just empty, no hallucination.
 
-Here is the information: 
-Original user preference: "{preference}" 
-Assistant's restatement: "{restatement}" 
-Examine the original preference and the assistant's restatement meticulously and answer. Please answer in this exact XML format without any other additional text: 
+Here is the information:
+Original user preference: "{preference}"
+Assistant's restatement: "{restatement}"
+Examine the original preference and the assistant's restatement meticulously and answer. Please answer in this exact XML format without any other additional text:
 <explanation>[1 short sentence explanation]</explanation>
 <answer>[Yes/No]</answer>"""
 
@@ -136,7 +140,7 @@ async def evaluate_hallucinate_preference_async(
 
 async def evaluate_helpful_response_async(
     client: OpenAI, question: str, response: str
-) -> Dict[str, str]:
+) -> dict[str, str]:
     prompt = f"""You will analyze a conversation between a user and an assistant, focusing on whether the assistant provides any substantive response to the user's query.
 Evaluate the response based on these stringent criteria:
 
@@ -174,7 +178,7 @@ async def evaluate_helpful_response_async(
     }
 
 
-def classify_error_type(evaluation_results: Dict[str, Any]) -> str:
+def classify_error_type(evaluation_results: dict[str, Any]) -> str:
     violate = evaluation_results["violate_preference"]["answer"]
     acknowledge = evaluation_results["acknowledge_preference"]["answer"]
     hallucinate = evaluation_results["hallucinate_preference"]["answer"]
@@ -192,7 +196,7 @@ def classify_error_type(evaluation_results: Dict[str, Any]) -> str:
         return "Personalized Response"
 
 
-async def process_line(line: str, client: OpenAI, semaphore: asyncio.Semaphore) -> Dict[str, Any]:
+async def process_line(line: str, client: OpenAI, semaphore: asyncio.Semaphore) -> dict[str, Any]:
     async with semaphore:
         data = json.loads(line.strip())
         preference = data["preference"]
@@ -223,7 +227,7 @@ async def process_line(line: str, client: OpenAI, semaphore: asyncio.Semaphore)
         return result
 
 
-def log_summary(error_counter: Counter, total_samples: int) -> Dict[str, Dict[str, float]]:
+def log_summary(error_counter: Counter, total_samples: int) -> dict[str, dict[str, float]]:
     summary_data = {}
     print("\n--- Error Type Summary ---")
 
@@ -247,7 +251,7 @@ def log_summary(error_counter: Counter, total_samples: int) -> Dict[str, Dict[st
 
 
 def generate_excel_summary(
-    summary_results: Dict[str, Dict[str, float]],
+    summary_results: dict[str, dict[str, float]],
     avg_search_time: float,
     avg_context_tokens: float,
     avg_add_time: float,
@@ -317,7 +321,7 @@ async def main(concurrency_limit: int, input_file: str, output_file: str, output
     client = OpenAI(api_key=API_KEY, base_url=API_URL)
 
     try:
-        with open(input_file, "r", encoding="utf-8") as f:
+        with open(input_file, encoding="utf-8") as f:
             lines = f.readlines()
     except FileNotFoundError:
         print(f"Error: Input file not found at '{input_file}'")

diff --git a/evaluation/scripts/PrefEval/pref_mem0.py b/evaluation/scripts/PrefEval/pref_mem0.py
@@ -4,12 +4,14 @@
 import os
 import sys
 import time
+
 import tiktoken
+
 from dotenv import load_dotenv
+from irrelevant_conv import irre_10, irre_300
 from openai import OpenAI
 from tqdm import tqdm
 
-from irrelevant_conv import irre_10, irre_300
 
 ROOT_DIR = os.path.dirname(
     os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -199,7 +201,7 @@ def main():
     args = parser.parse_args()
 
     try:
-        with open(args.input, "r", encoding="utf-8") as infile:
+        with open(args.input, encoding="utf-8") as infile:
             lines = infile.readlines()
     except FileNotFoundError:
         print(f"Error: Input file '{args.input}' not found")

diff --git a/evaluation/scripts/PrefEval/pref_memobase.py b/evaluation/scripts/PrefEval/pref_memobase.py
@@ -4,12 +4,14 @@
 import os
 import sys
 import time
+
 import tiktoken
+
 from dotenv import load_dotenv
+from irrelevant_conv import irre_10, irre_300
 from openai import OpenAI
 from tqdm import tqdm
-import time
-from irrelevant_conv import irre_10, irre_300
+
 
 ROOT_DIR = os.path.dirname(
     os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -49,7 +51,7 @@ def add_memory_for_line(
         if conversation:
             messages = []
 
-            for chunk_start in range(0, len(conversation)):
+            for chunk_start in range(len(conversation)):
                 chunk = conversation[chunk_start : chunk_start + 1]
                 timestamp_add = str(int(time.time() * 100))
                 time.sleep(0.001)  # Ensure unique timestamp
@@ -210,7 +212,7 @@ def main():
     args = parser.parse_args()
 
     try:
-        with open(args.input, "r", encoding="utf-8") as infile:
+        with open(args.input, encoding="utf-8") as infile:
             lines = infile.readlines()
     except FileNotFoundError:
         print(f"Error: Input file '{args.input}' not found")

diff --git a/evaluation/scripts/PrefEval/pref_memos.py b/evaluation/scripts/PrefEval/pref_memos.py
@@ -4,12 +4,20 @@
 import os
 import sys
 import time
+
 import tiktoken
+
 from dotenv import load_dotenv
+from irrelevant_conv import irre_10, irre_300
 from openai import OpenAI
 from tqdm import tqdm
+from utils.pref_mem_utils import (
+    add_pref_instruction,
+    create_mem_string,
+    remove_pref_mem_from_mem_string,
+)
+from utils.prompts import PREFEVAL_ANSWER_PROMPT
 
-from irrelevant_conv import irre_10, irre_300
 
 ROOT_DIR = os.path.dirname(
     os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -88,9 +96,7 @@ def search_memory_for_line(line_data: tuple, mem_client, top_k_value: int) -> di
         start_time_search = time.monotonic()
         relevant_memories = mem_client.search(query=question, user_id=user_id, top_k=top_k_value)
         search_memories_duration = time.monotonic() - start_time_search
-        memories_str = "\n".join(
-            f"- {entry.get('memory', '')}" for entry in relevant_memories["text_mem"][0]["memories"]
-        )
+        memories_str = create_mem_string(relevant_memories)
 
         memory_tokens_used = len(tokenizer.encode(memories_str))
 
@@ -111,7 +117,7 @@ def search_memory_for_line(line_data: tuple, mem_client, top_k_value: int) -> di
         return None
 
 
-def generate_response_for_line(line_data: tuple, openai_client: OpenAI) -> dict:
+def generate_response_for_line(line_data: tuple, openai_client: OpenAI, lib: str) -> dict:
     """
     Generates a response for a single line of data using pre-fetched memories.
     """
@@ -139,7 +145,10 @@ def generate_response_for_line(line_data: tuple, openai_client: OpenAI) -> dict:
             )
             return original_data
 
-        system_prompt = f"You are a helpful AI. Answer the question based on the query and the following memories:\nUser Memories:\n{memories_str}"
+        memories_str = remove_pref_mem_from_mem_string(memories_str, frame=lib)
+
+        template = add_pref_instruction(PREFEVAL_ANSWER_PROMPT, frame=lib)
+        system_prompt = template.format(context=memories_str)
         messages = [
             {"role": "system", "content": system_prompt},
             {"role": "user", "content": question},
@@ -201,7 +210,7 @@ def main():
     args = parser.parse_args()
 
     try:
-        with open(args.input, "r", encoding="utf-8") as infile:
+        with open(args.input, encoding="utf-8") as infile:
             lines = infile.readlines()
     except FileNotFoundError:
         print(f"Error: Input file '{args.input}' not found")
@@ -277,7 +286,7 @@ def main():
             concurrent.futures.ThreadPoolExecutor(max_workers=args.max_workers) as executor,
         ):
             futures = [
-                executor.submit(generate_response_for_line, (i, line), openai_client)
+                executor.submit(generate_response_for_line, (i, line), openai_client, args.lib)
                 for i, line in enumerate(lines)
             ]
 

diff --git a/evaluation/scripts/PrefEval/pref_memu.py b/evaluation/scripts/PrefEval/pref_memu.py
@@ -4,12 +4,16 @@
 import os
 import sys
 import time
+
+from datetime import datetime
+
 import tiktoken
+
 from dotenv import load_dotenv
+from irrelevant_conv import irre_10, irre_300
 from openai import OpenAI
 from tqdm import tqdm
-from datetime import datetime
-from irrelevant_conv import irre_10, irre_300
+
 
 ROOT_DIR = os.path.dirname(
     os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -205,7 +209,7 @@ def main():
     args = parser.parse_args()
 
     try:
-        with open(args.input, "r", encoding="utf-8") as infile:
+        with open(args.input, encoding="utf-8") as infile:
             lines = infile.readlines()
     except FileNotFoundError:
         print(f"Error: Input file '{args.input}' not found")

diff --git a/evaluation/scripts/PrefEval/pref_supermemory.py b/evaluation/scripts/PrefEval/pref_supermemory.py
@@ -4,12 +4,14 @@
 import os
 import sys
 import time
+
 import tiktoken
+
 from dotenv import load_dotenv
+from irrelevant_conv import irre_10, irre_300
 from openai import OpenAI
 from tqdm import tqdm
-from datetime import datetime
-from irrelevant_conv import irre_10, irre_300
+
 
 ROOT_DIR = os.path.dirname(
     os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -201,7 +203,7 @@ def main():
     args = parser.parse_args()
 
     try:
-        with open(args.input, "r", encoding="utf-8") as infile:
+        with open(args.input, encoding="utf-8") as infile:
             lines = infile.readlines()
     except FileNotFoundError:
         print(f"Error: Input file '{args.input}' not found")