From 2c3952b8c02d93774115adabe4eb452ac320d254 Mon Sep 17 00:00:00 2001 From: lxowalle <83055338+lxowalle@users.noreply.github.com> Date: Mon, 9 Mar 2026 13:41:41 +0800 Subject: [PATCH] Fix: improve history compression with retry logic and multi-byte character support (#1167) * first commit * Reduce retry wait time to 100ms * * Add incremental delay and modify the context truncation logic --- pkg/agent/loop.go | 146 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 119 insertions(+), 27 deletions(-) diff --git a/pkg/agent/loop.go b/pkg/agent/loop.go index 9a54f5077..eee0027ee 100644 --- a/pkg/agent/loop.go +++ b/pkg/agent/loop.go @@ -1492,10 +1492,21 @@ func (al *AgentLoop) summarizeSession(agent *AgentInstance, sessionKey string) { return } + const ( + maxSummarizationMessages = 10 + llmMaxRetries = 3 + llmMaxTokens = 1024 + llmTemperature = 0.3 + fallbackMaxContentLength = 200 + ) + // Multi-Part Summarization var finalSummary string - if len(validMessages) > 10 { + if len(validMessages) > maxSummarizationMessages { mid := len(validMessages) / 2 + + mid = al.findNearestUserMessage(validMessages, mid) + part1 := validMessages[:mid] part2 := validMessages[mid:] @@ -1507,18 +1518,9 @@ func (al *AgentLoop) summarizeSession(agent *AgentInstance, sessionKey string) { s1, s2, ) - resp, err := agent.Provider.Chat( - ctx, - []providers.Message{{Role: "user", Content: mergePrompt}}, - nil, - agent.Model, - map[string]any{ - "max_tokens": 1024, - "temperature": 0.3, - "prompt_cache_key": agent.ID, - }, - ) - if err == nil { + + resp, err := al.retryLLMCall(ctx, agent, mergePrompt, llmMaxRetries) + if err == nil && resp.Content != "" { finalSummary = resp.Content } else { finalSummary = s1 + " " + s2 @@ -1538,6 +1540,69 @@ func (al *AgentLoop) summarizeSession(agent *AgentInstance, sessionKey string) { } } +// findNearestUserMessage finds the nearest user message to the given index. +// It searches backward first, then forward if no user message is found. +func (al *AgentLoop) findNearestUserMessage(messages []providers.Message, mid int) int { + originalMid := mid + + for mid > 0 && messages[mid].Role != "user" { + mid-- + } + + if messages[mid].Role == "user" { + return mid + } + + mid = originalMid + for mid < len(messages) && messages[mid].Role != "user" { + mid++ + } + + if mid < len(messages) { + return mid + } + + return originalMid +} + +// retryLLMCall calls the LLM with retry logic. +func (al *AgentLoop) retryLLMCall( + ctx context.Context, + agent *AgentInstance, + prompt string, + maxRetries int, +) (*providers.LLMResponse, error) { + const ( + llmMaxTokens = 1024 + llmTemperature = 0.3 + ) + + var resp *providers.LLMResponse + var err error + + for attempt := 0; attempt < maxRetries; attempt++ { + resp, err = agent.Provider.Chat( + ctx, + []providers.Message{{Role: "user", Content: prompt}}, + nil, + agent.Model, + map[string]any{ + "max_tokens": llmMaxTokens, + "temperature": llmTemperature, + "prompt_cache_key": agent.ID, + }, + ) + if err == nil && resp != nil && resp.Content != "" { + return resp, nil + } + if attempt < maxRetries-1 { + time.Sleep(time.Duration(attempt+1) * 100 * time.Millisecond) + } + } + + return resp, err +} + // summarizeBatch summarizes a batch of messages. func (al *AgentLoop) summarizeBatch( ctx context.Context, @@ -1545,6 +1610,14 @@ func (al *AgentLoop) summarizeBatch( batch []providers.Message, existingSummary string, ) (string, error) { + const ( + llmMaxRetries = 3 + llmMaxTokens = 1024 + llmTemperature = 0.3 + fallbackMinContentLength = 200 + fallbackMaxContentPercent = 10 + ) + var sb strings.Builder sb.WriteString( "Provide a concise summary of this conversation segment, preserving core context and key points.\n", @@ -1560,21 +1633,40 @@ func (al *AgentLoop) summarizeBatch( } prompt := sb.String() - response, err := agent.Provider.Chat( - ctx, - []providers.Message{{Role: "user", Content: prompt}}, - nil, - agent.Model, - map[string]any{ - "max_tokens": 1024, - "temperature": 0.3, - "prompt_cache_key": agent.ID, - }, - ) - if err != nil { - return "", err + response, err := al.retryLLMCall(ctx, agent, prompt, llmMaxRetries) + if err == nil && response.Content != "" { + return strings.TrimSpace(response.Content), nil } - return response.Content, nil + + var fallback strings.Builder + fallback.WriteString("Conversation summary: ") + for i, m := range batch { + if i > 0 { + fallback.WriteString(" | ") + } + content := strings.TrimSpace(m.Content) + runes := []rune(content) + if len(runes) == 0 { + fallback.WriteString(fmt.Sprintf("%s: ", m.Role)) + continue + } + + keepLength := len(runes) * fallbackMaxContentPercent / 100 + if keepLength < fallbackMinContentLength { + keepLength = fallbackMinContentLength + } + + if keepLength > len(runes) { + keepLength = len(runes) + } + + content = string(runes[:keepLength]) + if keepLength < len(runes) { + content += "..." + } + fallback.WriteString(fmt.Sprintf("%s: %s", m.Role, content)) + } + return fallback.String(), nil } // estimateTokens estimates the number of tokens in a message list.