mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
Fix: improve history compression with retry logic and multi-byte character support (#1167)
* first commit * Reduce retry wait time to 100ms * * Add incremental delay and modify the context truncation logic
This commit is contained in:
+119
-27
@@ -1492,10 +1492,21 @@ func (al *AgentLoop) summarizeSession(agent *AgentInstance, sessionKey string) {
|
||||
return
|
||||
}
|
||||
|
||||
const (
|
||||
maxSummarizationMessages = 10
|
||||
llmMaxRetries = 3
|
||||
llmMaxTokens = 1024
|
||||
llmTemperature = 0.3
|
||||
fallbackMaxContentLength = 200
|
||||
)
|
||||
|
||||
// Multi-Part Summarization
|
||||
var finalSummary string
|
||||
if len(validMessages) > 10 {
|
||||
if len(validMessages) > maxSummarizationMessages {
|
||||
mid := len(validMessages) / 2
|
||||
|
||||
mid = al.findNearestUserMessage(validMessages, mid)
|
||||
|
||||
part1 := validMessages[:mid]
|
||||
part2 := validMessages[mid:]
|
||||
|
||||
@@ -1507,18 +1518,9 @@ func (al *AgentLoop) summarizeSession(agent *AgentInstance, sessionKey string) {
|
||||
s1,
|
||||
s2,
|
||||
)
|
||||
resp, err := agent.Provider.Chat(
|
||||
ctx,
|
||||
[]providers.Message{{Role: "user", Content: mergePrompt}},
|
||||
nil,
|
||||
agent.Model,
|
||||
map[string]any{
|
||||
"max_tokens": 1024,
|
||||
"temperature": 0.3,
|
||||
"prompt_cache_key": agent.ID,
|
||||
},
|
||||
)
|
||||
if err == nil {
|
||||
|
||||
resp, err := al.retryLLMCall(ctx, agent, mergePrompt, llmMaxRetries)
|
||||
if err == nil && resp.Content != "" {
|
||||
finalSummary = resp.Content
|
||||
} else {
|
||||
finalSummary = s1 + " " + s2
|
||||
@@ -1538,6 +1540,69 @@ func (al *AgentLoop) summarizeSession(agent *AgentInstance, sessionKey string) {
|
||||
}
|
||||
}
|
||||
|
||||
// findNearestUserMessage finds the nearest user message to the given index.
|
||||
// It searches backward first, then forward if no user message is found.
|
||||
func (al *AgentLoop) findNearestUserMessage(messages []providers.Message, mid int) int {
|
||||
originalMid := mid
|
||||
|
||||
for mid > 0 && messages[mid].Role != "user" {
|
||||
mid--
|
||||
}
|
||||
|
||||
if messages[mid].Role == "user" {
|
||||
return mid
|
||||
}
|
||||
|
||||
mid = originalMid
|
||||
for mid < len(messages) && messages[mid].Role != "user" {
|
||||
mid++
|
||||
}
|
||||
|
||||
if mid < len(messages) {
|
||||
return mid
|
||||
}
|
||||
|
||||
return originalMid
|
||||
}
|
||||
|
||||
// retryLLMCall calls the LLM with retry logic.
|
||||
func (al *AgentLoop) retryLLMCall(
|
||||
ctx context.Context,
|
||||
agent *AgentInstance,
|
||||
prompt string,
|
||||
maxRetries int,
|
||||
) (*providers.LLMResponse, error) {
|
||||
const (
|
||||
llmMaxTokens = 1024
|
||||
llmTemperature = 0.3
|
||||
)
|
||||
|
||||
var resp *providers.LLMResponse
|
||||
var err error
|
||||
|
||||
for attempt := 0; attempt < maxRetries; attempt++ {
|
||||
resp, err = agent.Provider.Chat(
|
||||
ctx,
|
||||
[]providers.Message{{Role: "user", Content: prompt}},
|
||||
nil,
|
||||
agent.Model,
|
||||
map[string]any{
|
||||
"max_tokens": llmMaxTokens,
|
||||
"temperature": llmTemperature,
|
||||
"prompt_cache_key": agent.ID,
|
||||
},
|
||||
)
|
||||
if err == nil && resp != nil && resp.Content != "" {
|
||||
return resp, nil
|
||||
}
|
||||
if attempt < maxRetries-1 {
|
||||
time.Sleep(time.Duration(attempt+1) * 100 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
|
||||
return resp, err
|
||||
}
|
||||
|
||||
// summarizeBatch summarizes a batch of messages.
|
||||
func (al *AgentLoop) summarizeBatch(
|
||||
ctx context.Context,
|
||||
@@ -1545,6 +1610,14 @@ func (al *AgentLoop) summarizeBatch(
|
||||
batch []providers.Message,
|
||||
existingSummary string,
|
||||
) (string, error) {
|
||||
const (
|
||||
llmMaxRetries = 3
|
||||
llmMaxTokens = 1024
|
||||
llmTemperature = 0.3
|
||||
fallbackMinContentLength = 200
|
||||
fallbackMaxContentPercent = 10
|
||||
)
|
||||
|
||||
var sb strings.Builder
|
||||
sb.WriteString(
|
||||
"Provide a concise summary of this conversation segment, preserving core context and key points.\n",
|
||||
@@ -1560,21 +1633,40 @@ func (al *AgentLoop) summarizeBatch(
|
||||
}
|
||||
prompt := sb.String()
|
||||
|
||||
response, err := agent.Provider.Chat(
|
||||
ctx,
|
||||
[]providers.Message{{Role: "user", Content: prompt}},
|
||||
nil,
|
||||
agent.Model,
|
||||
map[string]any{
|
||||
"max_tokens": 1024,
|
||||
"temperature": 0.3,
|
||||
"prompt_cache_key": agent.ID,
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
return "", err
|
||||
response, err := al.retryLLMCall(ctx, agent, prompt, llmMaxRetries)
|
||||
if err == nil && response.Content != "" {
|
||||
return strings.TrimSpace(response.Content), nil
|
||||
}
|
||||
return response.Content, nil
|
||||
|
||||
var fallback strings.Builder
|
||||
fallback.WriteString("Conversation summary: ")
|
||||
for i, m := range batch {
|
||||
if i > 0 {
|
||||
fallback.WriteString(" | ")
|
||||
}
|
||||
content := strings.TrimSpace(m.Content)
|
||||
runes := []rune(content)
|
||||
if len(runes) == 0 {
|
||||
fallback.WriteString(fmt.Sprintf("%s: ", m.Role))
|
||||
continue
|
||||
}
|
||||
|
||||
keepLength := len(runes) * fallbackMaxContentPercent / 100
|
||||
if keepLength < fallbackMinContentLength {
|
||||
keepLength = fallbackMinContentLength
|
||||
}
|
||||
|
||||
if keepLength > len(runes) {
|
||||
keepLength = len(runes)
|
||||
}
|
||||
|
||||
content = string(runes[:keepLength])
|
||||
if keepLength < len(runes) {
|
||||
content += "..."
|
||||
}
|
||||
fallback.WriteString(fmt.Sprintf("%s: %s", m.Role, content))
|
||||
}
|
||||
return fallback.String(), nil
|
||||
}
|
||||
|
||||
// estimateTokens estimates the number of tokens in a message list.
|
||||
|
||||
Reference in New Issue
Block a user