Feature: Implement Skill Discovery - With Clawhub Integration and Caching (#332)

* Add Find Skills and Install Skills

* Improvements

* fix file name

* Update pkg/skills/clawhub_registry.go

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* fix

* Comments addressed

* Resolve comments

* fix tests

* fixes

* Comments resolved

* Update pkg/skills/search_cache_repro_test.go

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* minor fix

* fix test

* fixes

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Harsh Bansal
2026-02-20 16:25:04 +05:30
committed by GitHub
parent f1223eec42
commit d692cc0cc6
20 changed files with 2303 additions and 10 deletions
+93
View File
@@ -0,0 +1,93 @@
package utils
import (
"context"
"fmt"
"io"
"net/http"
"os"
"github.com/sipeed/picoclaw/pkg/logger"
)
// DownloadToFile streams an HTTP response body to a temporary file in small
// chunks (~32KB), keeping peak memory usage constant regardless of file size.
//
// Parameters:
// - ctx: context for cancellation/timeout
// - client: HTTP client to use (caller controls timeouts, transport, etc.)
// - req: fully prepared *http.Request (method, URL, headers, etc.)
// - maxBytes: maximum bytes to download; 0 means no limit
//
// Returns the path to the temporary file. The caller is responsible for
// removing it when done (defer os.Remove(path)).
//
// On any error the temp file is cleaned up automatically.
func DownloadToFile(ctx context.Context, client *http.Client, req *http.Request, maxBytes int64) (string, error) {
// Attach context.
req = req.WithContext(ctx)
logger.DebugCF("download", "Starting download", map[string]interface{}{
"url": req.URL.String(),
"max_bytes": maxBytes,
})
resp, err := client.Do(req)
if err != nil {
return "", fmt.Errorf("request failed: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
// Read a small amount for the error message.
errBody := make([]byte, 512)
n, _ := io.ReadFull(resp.Body, errBody)
return "", fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(errBody[:n]))
}
// Create temp file.
tmpFile, err := os.CreateTemp("", "picoclaw-dl-*")
if err != nil {
return "", fmt.Errorf("failed to create temp file: %w", err)
}
tmpPath := tmpFile.Name()
logger.DebugCF("download", "Streaming to temp file", map[string]interface{}{
"path": tmpPath,
})
// Cleanup helper — removes the temp file on any error.
cleanup := func() {
_ = tmpFile.Close()
_ = os.Remove(tmpPath)
}
// Optionally limit the download size.
var src io.Reader = resp.Body
if maxBytes > 0 {
src = io.LimitReader(resp.Body, maxBytes+1) // +1 to detect overflow
}
written, err := io.Copy(tmpFile, src)
if err != nil {
cleanup()
return "", fmt.Errorf("download write failed: %w", err)
}
if maxBytes > 0 && written > maxBytes {
cleanup()
return "", fmt.Errorf("download too large: %d bytes (max %d)", written, maxBytes)
}
if err := tmpFile.Close(); err != nil {
_ = os.Remove(tmpPath)
return "", fmt.Errorf("failed to close temp file: %w", err)
}
logger.DebugCF("download", "Download complete", map[string]interface{}{
"path": tmpPath,
"bytes_written": written,
})
return tmpPath, nil
}
+19
View File
@@ -0,0 +1,19 @@
package utils
import (
"fmt"
"strings"
)
// ValidateSkillIdentifier validates that the given skill identifier (slug or registry name) is non-empty
// and does not contain path separators ("/", "\\") or ".." for security.
func ValidateSkillIdentifier(identifier string) error {
trimmed := strings.TrimSpace(identifier)
if trimmed == "" {
return fmt.Errorf("identifier is required and must be a non-empty string")
}
if strings.ContainsAny(trimmed, "/\\") || strings.Contains(trimmed, "..") {
return fmt.Errorf("identifier must not contain path separators or '..' to prevent directory traversal")
}
return nil
}
+9
View File
@@ -14,3 +14,12 @@ func Truncate(s string, maxLen int) string {
}
return string(runes[:maxLen-3]) + "..."
}
// DerefStr dereferences a pointer to a string and
// returns the value or a fallback if the pointer is nil.
func DerefStr(s *string, fallback string) string {
if s == nil {
return fallback
}
return *s
}
+120
View File
@@ -0,0 +1,120 @@
package utils
import (
"archive/zip"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"github.com/sipeed/picoclaw/pkg/logger"
)
// ExtractZipFile extracts a ZIP archive from disk to targetDir.
// It reads entries one at a time from disk, keeping memory usage minimal.
//
// Security: rejects path traversal attempts and symlinks.
func ExtractZipFile(zipPath string, targetDir string) error {
reader, err := zip.OpenReader(zipPath)
if err != nil {
return fmt.Errorf("invalid ZIP: %w", err)
}
defer reader.Close()
logger.DebugCF("zip", "Extracting ZIP", map[string]interface{}{
"zip_path": zipPath,
"target_dir": targetDir,
"entries": len(reader.File),
})
if err := os.MkdirAll(targetDir, 0755); err != nil {
return fmt.Errorf("failed to create target dir: %w", err)
}
for _, f := range reader.File {
// Path traversal protection.
cleanName := filepath.Clean(f.Name)
if strings.HasPrefix(cleanName, "..") || filepath.IsAbs(cleanName) {
return fmt.Errorf("zip entry has unsafe path: %q", f.Name)
}
destPath := filepath.Join(targetDir, cleanName)
// Double-check the resolved path is within target directory (defense-in-depth).
targetDirClean := filepath.Clean(targetDir)
if !strings.HasPrefix(filepath.Clean(destPath), targetDirClean+string(filepath.Separator)) && filepath.Clean(destPath) != targetDirClean {
return fmt.Errorf("zip entry escapes target dir: %q", f.Name)
}
mode := f.FileInfo().Mode()
// Reject any symlink.
if mode&os.ModeSymlink != 0 {
return fmt.Errorf("zip contains symlink %q; symlinks are not allowed", f.Name)
}
if f.FileInfo().IsDir() {
if err := os.MkdirAll(destPath, 0755); err != nil {
return err
}
continue
}
// Ensure parent directory exists.
if err := os.MkdirAll(filepath.Dir(destPath), 0755); err != nil {
return err
}
if err := extractSingleFile(f, destPath); err != nil {
return err
}
}
return nil
}
// extractSingleFile extracts one zip.File entry to destPath, with a size check.
func extractSingleFile(f *zip.File, destPath string) error {
const maxFileSize = 5 * 1024 * 1024 // 5MB, adjust as appropriate
// Check the uncompressed size from the header, if available.
if f.UncompressedSize64 > maxFileSize {
return fmt.Errorf("zip entry %q is too large (%d bytes)", f.Name, f.UncompressedSize64)
}
rc, err := f.Open()
if err != nil {
return fmt.Errorf("failed to open zip entry %q: %w", f.Name, err)
}
defer rc.Close()
outFile, err := os.Create(destPath)
if err != nil {
return fmt.Errorf("failed to create file %q: %w", destPath, err)
}
// We don't return the close error via return, since it's not a named error return.
// Instead, we log to stderr and remove the partially written file as defensive cleanup.
defer func() {
if cerr := outFile.Close(); cerr != nil {
_ = os.Remove(destPath)
logger.ErrorCF("zip", "Failed to close file", map[string]interface{}{
"dest_path": destPath,
"error": cerr.Error(),
})
}
}()
// Streamed size check: prevent overruns and malicious/corrupt headers.
written, err := io.CopyN(outFile, rc, maxFileSize+1)
if err != nil && err != io.EOF {
_ = os.Remove(destPath)
return fmt.Errorf("failed to extract %q: %w", f.Name, err)
}
if written > maxFileSize {
_ = os.Remove(destPath)
return fmt.Errorf("zip entry %q exceeds max size (%d bytes)", f.Name, written)
}
return nil
}