Fix: improve history compression with retry logic and multi-byte character support (#1167)

* first commit

* Reduce retry wait time to 100ms

* * Add incremental delay and modify the context truncation logic
This commit is contained in:
lxowalle
2026-03-09 13:41:41 +08:00
committed by GitHub
parent a521a49162
commit 2c3952b8c0
+119 -27
View File
@@ -1492,10 +1492,21 @@ func (al *AgentLoop) summarizeSession(agent *AgentInstance, sessionKey string) {
return
}
const (
maxSummarizationMessages = 10
llmMaxRetries = 3
llmMaxTokens = 1024
llmTemperature = 0.3
fallbackMaxContentLength = 200
)
// Multi-Part Summarization
var finalSummary string
if len(validMessages) > 10 {
if len(validMessages) > maxSummarizationMessages {
mid := len(validMessages) / 2
mid = al.findNearestUserMessage(validMessages, mid)
part1 := validMessages[:mid]
part2 := validMessages[mid:]
@@ -1507,18 +1518,9 @@ func (al *AgentLoop) summarizeSession(agent *AgentInstance, sessionKey string) {
s1,
s2,
)
resp, err := agent.Provider.Chat(
ctx,
[]providers.Message{{Role: "user", Content: mergePrompt}},
nil,
agent.Model,
map[string]any{
"max_tokens": 1024,
"temperature": 0.3,
"prompt_cache_key": agent.ID,
},
)
if err == nil {
resp, err := al.retryLLMCall(ctx, agent, mergePrompt, llmMaxRetries)
if err == nil && resp.Content != "" {
finalSummary = resp.Content
} else {
finalSummary = s1 + " " + s2
@@ -1538,6 +1540,69 @@ func (al *AgentLoop) summarizeSession(agent *AgentInstance, sessionKey string) {
}
}
// findNearestUserMessage finds the nearest user message to the given index.
// It searches backward first, then forward if no user message is found.
func (al *AgentLoop) findNearestUserMessage(messages []providers.Message, mid int) int {
originalMid := mid
for mid > 0 && messages[mid].Role != "user" {
mid--
}
if messages[mid].Role == "user" {
return mid
}
mid = originalMid
for mid < len(messages) && messages[mid].Role != "user" {
mid++
}
if mid < len(messages) {
return mid
}
return originalMid
}
// retryLLMCall calls the LLM with retry logic.
func (al *AgentLoop) retryLLMCall(
ctx context.Context,
agent *AgentInstance,
prompt string,
maxRetries int,
) (*providers.LLMResponse, error) {
const (
llmMaxTokens = 1024
llmTemperature = 0.3
)
var resp *providers.LLMResponse
var err error
for attempt := 0; attempt < maxRetries; attempt++ {
resp, err = agent.Provider.Chat(
ctx,
[]providers.Message{{Role: "user", Content: prompt}},
nil,
agent.Model,
map[string]any{
"max_tokens": llmMaxTokens,
"temperature": llmTemperature,
"prompt_cache_key": agent.ID,
},
)
if err == nil && resp != nil && resp.Content != "" {
return resp, nil
}
if attempt < maxRetries-1 {
time.Sleep(time.Duration(attempt+1) * 100 * time.Millisecond)
}
}
return resp, err
}
// summarizeBatch summarizes a batch of messages.
func (al *AgentLoop) summarizeBatch(
ctx context.Context,
@@ -1545,6 +1610,14 @@ func (al *AgentLoop) summarizeBatch(
batch []providers.Message,
existingSummary string,
) (string, error) {
const (
llmMaxRetries = 3
llmMaxTokens = 1024
llmTemperature = 0.3
fallbackMinContentLength = 200
fallbackMaxContentPercent = 10
)
var sb strings.Builder
sb.WriteString(
"Provide a concise summary of this conversation segment, preserving core context and key points.\n",
@@ -1560,21 +1633,40 @@ func (al *AgentLoop) summarizeBatch(
}
prompt := sb.String()
response, err := agent.Provider.Chat(
ctx,
[]providers.Message{{Role: "user", Content: prompt}},
nil,
agent.Model,
map[string]any{
"max_tokens": 1024,
"temperature": 0.3,
"prompt_cache_key": agent.ID,
},
)
if err != nil {
return "", err
response, err := al.retryLLMCall(ctx, agent, prompt, llmMaxRetries)
if err == nil && response.Content != "" {
return strings.TrimSpace(response.Content), nil
}
return response.Content, nil
var fallback strings.Builder
fallback.WriteString("Conversation summary: ")
for i, m := range batch {
if i > 0 {
fallback.WriteString(" | ")
}
content := strings.TrimSpace(m.Content)
runes := []rune(content)
if len(runes) == 0 {
fallback.WriteString(fmt.Sprintf("%s: ", m.Role))
continue
}
keepLength := len(runes) * fallbackMaxContentPercent / 100
if keepLength < fallbackMinContentLength {
keepLength = fallbackMinContentLength
}
if keepLength > len(runes) {
keepLength = len(runes)
}
content = string(runes[:keepLength])
if keepLength < len(runes) {
content += "..."
}
fallback.WriteString(fmt.Sprintf("%s: %s", m.Role, content))
}
return fallback.String(), nil
}
// estimateTokens estimates the number of tokens in a message list.