mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
f0e6b7aa37
SQLite FTS5 bm25() returns negative values where numerically smaller (more negative) indicates a better match. The official docs state: "The better the match, the numerically smaller the value returned." Two comments incorrectly stated "closer to 0 = better match" and "lower = better match". Updated all rank descriptions to use the unambiguous "more negative = higher relevance" phrasing. This matters because these comments are used as tool prompt hints for LLM agents, and incorrect semantics could lead to wrong ranking decisions.
213 lines
6.1 KiB
Go
213 lines
6.1 KiB
Go
package seahorse
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// ParseLastDuration parses a "last" duration string like "6h", "7d", "2w", "1m".
|
|
// Returns the duration and nil error, or zero and error if invalid.
|
|
func ParseLastDuration(s string) (time.Duration, error) {
|
|
if s == "" {
|
|
return 0, fmt.Errorf("empty duration")
|
|
}
|
|
|
|
re := regexp.MustCompile(`^(\d+)([hdwm])$`)
|
|
matches := re.FindStringSubmatch(s)
|
|
if matches == nil {
|
|
return 0, fmt.Errorf("invalid duration format: %q (use format like 6h, 7d, 2w, 1m)", s)
|
|
}
|
|
|
|
value, _ := strconv.Atoi(matches[1])
|
|
unit := matches[2]
|
|
|
|
switch unit {
|
|
case "h":
|
|
return time.Duration(value) * time.Hour, nil
|
|
case "d":
|
|
return time.Duration(value) * 24 * time.Hour, nil
|
|
case "w":
|
|
return time.Duration(value) * 7 * 24 * time.Hour, nil
|
|
case "m":
|
|
return time.Duration(value) * 30 * 24 * time.Hour, nil
|
|
default:
|
|
return 0, fmt.Errorf("unknown unit: %q", unit)
|
|
}
|
|
}
|
|
|
|
// GrepInput controls search across summaries and messages.
|
|
type GrepInput struct {
|
|
Pattern string `json:"pattern"`
|
|
Scope string `json:"scope,omitempty"` // "both" (default), "summary", or "message"
|
|
Role string `json:"role,omitempty"` // "user", "assistant", or "" (all)
|
|
AllConversations bool `json:"allConversations,omitempty"`
|
|
Since *time.Time `json:"since,omitempty"`
|
|
Before *time.Time `json:"before,omitempty"`
|
|
Last string `json:"last,omitempty"` // shortcut: "6h", "7d", "2w", "1m"
|
|
Limit int `json:"limit,omitempty"`
|
|
}
|
|
|
|
// GrepResult contains search results.
|
|
type GrepResult struct {
|
|
Success bool `json:"success"`
|
|
Summaries []GrepSummaryResult `json:"summaries"`
|
|
Messages []GrepMessageResult `json:"messages"`
|
|
TotalSummaries int `json:"totalSummaries"`
|
|
TotalMessages int `json:"totalMessages"`
|
|
Hint string `json:"hint,omitempty"`
|
|
}
|
|
|
|
// GrepSummaryResult is a summary match from grep.
|
|
type GrepSummaryResult struct {
|
|
ID string `json:"id"`
|
|
Content string `json:"content"`
|
|
Depth int `json:"depth"`
|
|
Kind SummaryKind `json:"kind"`
|
|
ConversationID int64 `json:"conversationId"`
|
|
// Rank is the bm25 relevance score (negative value, lower = better match).
|
|
// Examples: -5.0 = excellent match, -2.0 = good match, -0.5 = partial match.
|
|
Rank float64 `json:"rank,omitempty"`
|
|
}
|
|
|
|
// GrepMessageResult is a message match from grep.
|
|
type GrepMessageResult struct {
|
|
ID int64 `json:"id,string"`
|
|
Snippet string `json:"snippet"`
|
|
Role string `json:"role"`
|
|
ConversationID int64 `json:"conversationId"`
|
|
Rank float64 `json:"rank,omitempty"` // Relevance score (more negative = better match)
|
|
}
|
|
|
|
// ExpandMessagesResult contains expanded messages.
|
|
type ExpandMessagesResult struct {
|
|
Messages []Message `json:"messages"`
|
|
TokenCount int `json:"tokenCount"`
|
|
}
|
|
|
|
// Grep searches summaries and messages for matching content.
|
|
func (r *RetrievalEngine) Grep(ctx context.Context, input GrepInput) (*GrepResult, error) {
|
|
if input.Pattern == "" {
|
|
return nil, fmt.Errorf("grep: pattern is required")
|
|
}
|
|
|
|
limit := input.Limit
|
|
if limit == 0 {
|
|
limit = 20
|
|
}
|
|
|
|
// Handle Last parameter: convert to Since
|
|
since := input.Since
|
|
if input.Last != "" {
|
|
dur, err := ParseLastDuration(input.Last)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("grep: invalid last: %w", err)
|
|
}
|
|
t := time.Now().UTC().Add(-dur)
|
|
since = &t
|
|
}
|
|
|
|
// Auto-detect mode: use LIKE if pattern contains %, otherwise full-text
|
|
mode := ""
|
|
if strings.Contains(input.Pattern, "%") {
|
|
mode = "like"
|
|
}
|
|
|
|
searchInput := SearchInput{
|
|
Pattern: input.Pattern,
|
|
Mode: mode,
|
|
Role: input.Role,
|
|
AllConversations: input.AllConversations,
|
|
Since: since,
|
|
Before: input.Before,
|
|
Limit: limit,
|
|
}
|
|
|
|
result := &GrepResult{
|
|
Success: true,
|
|
Summaries: make([]GrepSummaryResult, 0),
|
|
Messages: make([]GrepMessageResult, 0),
|
|
TotalSummaries: 0,
|
|
TotalMessages: 0,
|
|
}
|
|
|
|
// Determine scope
|
|
scope := input.Scope
|
|
if scope == "" {
|
|
scope = "both"
|
|
}
|
|
|
|
// Search summaries if requested
|
|
if scope == "both" || scope == "summary" {
|
|
sumResults, err := r.store.SearchSummaries(ctx, searchInput)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("search summaries: %w", err)
|
|
}
|
|
for _, sr := range sumResults {
|
|
if sr.SummaryID != "" {
|
|
result.Summaries = append(result.Summaries, GrepSummaryResult{
|
|
ID: sr.SummaryID,
|
|
Content: sr.Content,
|
|
Depth: sr.Depth,
|
|
Kind: sr.Kind,
|
|
ConversationID: sr.ConversationID,
|
|
Rank: sr.Rank,
|
|
})
|
|
}
|
|
}
|
|
if len(sumResults) > 0 {
|
|
result.TotalSummaries = sumResults[0].TotalCount
|
|
}
|
|
}
|
|
|
|
// Search messages if requested
|
|
if scope == "both" || scope == "message" {
|
|
msgResults, err := r.store.SearchMessages(ctx, searchInput)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("search messages: %w", err)
|
|
}
|
|
for _, sr := range msgResults {
|
|
if sr.MessageID > 0 {
|
|
result.Messages = append(result.Messages, GrepMessageResult{
|
|
ID: sr.MessageID,
|
|
Snippet: sr.Snippet,
|
|
Role: sr.Role,
|
|
ConversationID: sr.ConversationID,
|
|
Rank: sr.Rank,
|
|
})
|
|
}
|
|
}
|
|
if len(msgResults) > 0 {
|
|
result.TotalMessages = msgResults[0].TotalCount
|
|
}
|
|
}
|
|
|
|
// Add hint if no results
|
|
if len(result.Summaries) == 0 && len(result.Messages) == 0 {
|
|
result.Hint = "No matches. Try: %keyword% for fuzzy search, or all_conversations: true"
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
// ExpandMessages retrieves full message content by IDs.
|
|
func (r *RetrievalEngine) ExpandMessages(ctx context.Context, messageIDs []int64) (*ExpandMessagesResult, error) {
|
|
result := &ExpandMessagesResult{
|
|
Messages: make([]Message, 0, len(messageIDs)),
|
|
}
|
|
|
|
for _, msgID := range messageIDs {
|
|
msg, err := r.store.GetMessageByID(ctx, msgID)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
result.Messages = append(result.Messages, *msg)
|
|
result.TokenCount += msg.TokenCount
|
|
}
|
|
|
|
return result, nil
|
|
}
|