22
33from typing import Any
44
5+ from memos .memories .textual .item import SourceMessage
6+
57
68_TAG1 = re .compile (r"^\s*\[[^\]]*\]\s*" )
79
810
11+ def get_encoded_tokens (content : str ) -> int :
12+ """
13+ Get encoded tokens.
14+ Args:
15+ content: str
16+ Returns:
17+ int: Encoded tokens.
18+ """
19+ return len (content )
20+
21+
22+ def truncate_data (data : list [str | dict [str , Any ] | Any ], max_tokens : int ) -> list [str ]:
23+ """
24+ Truncate data to max tokens.
25+ Args:
26+ data: List of strings or dictionaries.
27+ max_tokens: Maximum number of tokens.
28+ Returns:
29+ str: Truncated string.
30+ """
31+ truncated_string = ""
32+ for item in data :
33+ if isinstance (item , SourceMessage ):
34+ content = getattr (item , "content" , "" )
35+ chat_time = getattr (item , "chat_time" , "" )
36+ if not content :
37+ continue
38+ truncated_string += f"[{ chat_time } ]: { content } \n "
39+ if get_encoded_tokens (truncated_string ) > max_tokens :
40+ break
41+ return truncated_string
42+
43+
944def process_source (
10- items : list [tuple [Any , str | dict [str , Any ] | list [Any ]]] | None = None , recent_num : int = 3
45+ items : list [tuple [Any , str | dict [str , Any ] | list [Any ]]] | None = None ,
46+ recent_num : int = 10 ,
47+ max_tokens : int = 2048 ,
1148) -> str :
1249 """
1350 Args:
@@ -23,19 +60,16 @@ def process_source(
2360 memory = None
2461 for item in items :
2562 memory , source = item
26- for content in source :
27- if isinstance (content , str ):
28- if "assistant:" in content :
29- continue
30- concat_data .append (content )
63+ concat_data .extend (source [- recent_num :])
64+ truncated_string = truncate_data (concat_data , max_tokens )
3165 if memory is not None :
32- concat_data = [ memory , * concat_data ]
33- return " \n " . join ( concat_data )
66+ truncated_string = f" { memory } \n { truncated_string } "
67+ return truncated_string
3468
3569
3670def concat_original_source (
3771 graph_results : list ,
38- merge_field : list [ str ] | None = None ,
72+ rerank_source : str | None = None ,
3973) -> list [str ]:
4074 """
4175 Merge memory items with original dialogue.
@@ -45,14 +79,16 @@ def concat_original_source(
4579 Returns:
4680 list[str]: List of memory and concat orginal memory.
4781 """
48- if merge_field is None :
49- merge_field = ["sources" ]
82+ merge_field = []
83+ merge_field = ["sources" ] if rerank_source is None else rerank_source . split ( "," )
5084 documents = []
5185 for item in graph_results :
5286 memory = _TAG1 .sub ("" , m ) if isinstance ((m := getattr (item , "memory" , None )), str ) else m
5387 sources = []
5488 for field in merge_field :
55- source = getattr (item .metadata , field , "" )
89+ source = getattr (item .metadata , field , None )
90+ if source is None :
91+ continue
5692 sources .append ((memory , source ))
5793 concat_string = process_source (sources )
5894 documents .append (concat_string )
0 commit comments