mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
refactor skills loader markdown metadata parsing (#1354)
This commit is contained in:
@@ -11,6 +11,7 @@ require (
|
||||
github.com/ergochat/irc-go v0.5.0
|
||||
github.com/gdamore/tcell/v2 v2.13.8
|
||||
github.com/google/uuid v1.6.0
|
||||
github.com/gomarkdown/markdown v0.0.0-20260217112301-37c66b85d6ab
|
||||
github.com/gorilla/websocket v1.5.3
|
||||
github.com/h2non/filetype v1.1.3
|
||||
github.com/larksuite/oapi-sdk-go/v3 v3.5.3
|
||||
|
||||
@@ -79,6 +79,8 @@ github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvq
|
||||
github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
|
||||
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
|
||||
github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
|
||||
github.com/gomarkdown/markdown v0.0.0-20260217112301-37c66b85d6ab h1:VYNivV7P8IRHUam2swVUNkhIdp0LRRFKe4hXNnoZKTc=
|
||||
github.com/gomarkdown/markdown v0.0.0-20260217112301-37c66b85d6ab/go.mod h1:JDGcbDT52eL4fju3sZ4TeHGsQwhG9nbDV21aMyhwPoA=
|
||||
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
||||
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
||||
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
|
||||
|
||||
+126
-43
@@ -10,14 +10,15 @@ import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/gomarkdown/markdown"
|
||||
"github.com/gomarkdown/markdown/ast"
|
||||
"github.com/gomarkdown/markdown/parser"
|
||||
"gopkg.in/yaml.v3"
|
||||
|
||||
"github.com/sipeed/picoclaw/pkg/logger"
|
||||
)
|
||||
|
||||
var (
|
||||
namePattern = regexp.MustCompile(`^[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*$`)
|
||||
reFrontmatter = regexp.MustCompile(`(?s)^---(?:\r\n|\n|\r)(.*?)(?:\r\n|\n|\r)---`)
|
||||
reStripFrontmatter = regexp.MustCompile(`(?s)^---(?:\r\n|\n|\r)(.*?)(?:\r\n|\n|\r)---(?:\r\n|\n|\r)*`)
|
||||
)
|
||||
var namePattern = regexp.MustCompile(`^[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*$`)
|
||||
|
||||
const (
|
||||
MaxNameLength = 64
|
||||
@@ -226,11 +227,20 @@ func (sl *SkillsLoader) getSkillMetadata(skillPath string) *SkillMetadata {
|
||||
return nil
|
||||
}
|
||||
|
||||
frontmatter := sl.extractFrontmatter(string(content))
|
||||
frontmatter, bodyContent := splitFrontmatter(string(content))
|
||||
dirName := filepath.Base(filepath.Dir(skillPath))
|
||||
title, bodyDescription := extractMarkdownMetadata(bodyContent)
|
||||
|
||||
metadata := &SkillMetadata{
|
||||
Name: dirName,
|
||||
Description: bodyDescription,
|
||||
}
|
||||
if title != "" && namePattern.MatchString(title) && len(title) <= MaxNameLength {
|
||||
metadata.Name = title
|
||||
}
|
||||
|
||||
if frontmatter == "" {
|
||||
return &SkillMetadata{
|
||||
Name: filepath.Base(filepath.Dir(skillPath)),
|
||||
}
|
||||
return metadata
|
||||
}
|
||||
|
||||
// Try JSON first (for backward compatibility)
|
||||
@@ -239,60 +249,133 @@ func (sl *SkillsLoader) getSkillMetadata(skillPath string) *SkillMetadata {
|
||||
Description string `json:"description"`
|
||||
}
|
||||
if err := json.Unmarshal([]byte(frontmatter), &jsonMeta); err == nil {
|
||||
return &SkillMetadata{
|
||||
Name: jsonMeta.Name,
|
||||
Description: jsonMeta.Description,
|
||||
if jsonMeta.Name != "" {
|
||||
metadata.Name = jsonMeta.Name
|
||||
}
|
||||
if jsonMeta.Description != "" {
|
||||
metadata.Description = jsonMeta.Description
|
||||
}
|
||||
return metadata
|
||||
}
|
||||
|
||||
// Fall back to simple YAML parsing
|
||||
yamlMeta := sl.parseSimpleYAML(frontmatter)
|
||||
return &SkillMetadata{
|
||||
Name: yamlMeta["name"],
|
||||
Description: yamlMeta["description"],
|
||||
if name := yamlMeta["name"]; name != "" {
|
||||
metadata.Name = name
|
||||
}
|
||||
if description := yamlMeta["description"]; description != "" {
|
||||
metadata.Description = description
|
||||
}
|
||||
return metadata
|
||||
}
|
||||
|
||||
// parseSimpleYAML parses simple key: value YAML format
|
||||
// Example: name: github\n description: "..."
|
||||
// Normalizes line endings to handle \n (Unix), \r\n (Windows), and \r (classic Mac)
|
||||
func extractMarkdownMetadata(content string) (title, description string) {
|
||||
p := parser.NewWithExtensions(parser.CommonExtensions)
|
||||
doc := markdown.Parse([]byte(content), p)
|
||||
if doc == nil {
|
||||
return "", ""
|
||||
}
|
||||
|
||||
ast.WalkFunc(doc, func(node ast.Node, entering bool) ast.WalkStatus {
|
||||
if !entering {
|
||||
return ast.GoToNext
|
||||
}
|
||||
|
||||
switch n := node.(type) {
|
||||
case *ast.Heading:
|
||||
if title == "" && n.Level == 1 {
|
||||
title = nodeText(n)
|
||||
if title != "" && description != "" {
|
||||
return ast.Terminate
|
||||
}
|
||||
}
|
||||
case *ast.Paragraph:
|
||||
if description == "" {
|
||||
description = nodeText(n)
|
||||
if title != "" && description != "" {
|
||||
return ast.Terminate
|
||||
}
|
||||
}
|
||||
}
|
||||
return ast.GoToNext
|
||||
})
|
||||
|
||||
return title, description
|
||||
}
|
||||
|
||||
func nodeText(n ast.Node) string {
|
||||
var b strings.Builder
|
||||
ast.WalkFunc(n, func(node ast.Node, entering bool) ast.WalkStatus {
|
||||
if !entering {
|
||||
return ast.GoToNext
|
||||
}
|
||||
|
||||
switch t := node.(type) {
|
||||
case *ast.Text:
|
||||
b.Write(t.Literal)
|
||||
case *ast.Code:
|
||||
b.Write(t.Literal)
|
||||
case *ast.Softbreak, *ast.Hardbreak, *ast.NonBlockingSpace:
|
||||
b.WriteByte(' ')
|
||||
}
|
||||
return ast.GoToNext
|
||||
})
|
||||
return strings.Join(strings.Fields(b.String()), " ")
|
||||
}
|
||||
|
||||
// parseSimpleYAML parses YAML frontmatter and extracts known metadata fields.
|
||||
func (sl *SkillsLoader) parseSimpleYAML(content string) map[string]string {
|
||||
result := make(map[string]string)
|
||||
|
||||
// Normalize line endings: convert \r\n and \r to \n
|
||||
normalized := strings.ReplaceAll(content, "\r\n", "\n")
|
||||
normalized = strings.ReplaceAll(normalized, "\r", "\n")
|
||||
|
||||
for line := range strings.SplitSeq(normalized, "\n") {
|
||||
line = strings.TrimSpace(line)
|
||||
if line == "" || strings.HasPrefix(line, "#") {
|
||||
continue
|
||||
}
|
||||
|
||||
parts := strings.SplitN(line, ":", 2)
|
||||
if len(parts) == 2 {
|
||||
key := strings.TrimSpace(parts[0])
|
||||
value := strings.TrimSpace(parts[1])
|
||||
// Remove quotes if present
|
||||
value = strings.Trim(value, "\"'")
|
||||
result[key] = value
|
||||
}
|
||||
var meta struct {
|
||||
Name string `yaml:"name"`
|
||||
Description string `yaml:"description"`
|
||||
}
|
||||
if err := yaml.Unmarshal([]byte(content), &meta); err != nil {
|
||||
return result
|
||||
}
|
||||
if meta.Name != "" {
|
||||
result["name"] = meta.Name
|
||||
}
|
||||
if meta.Description != "" {
|
||||
result["description"] = meta.Description
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (sl *SkillsLoader) extractFrontmatter(content string) string {
|
||||
// Support \n (Unix), \r\n (Windows), and \r (classic Mac) line endings for frontmatter blocks
|
||||
match := reFrontmatter.FindStringSubmatch(content)
|
||||
if len(match) > 1 {
|
||||
return match[1]
|
||||
}
|
||||
return ""
|
||||
frontmatter, _ := splitFrontmatter(content)
|
||||
return frontmatter
|
||||
}
|
||||
|
||||
func (sl *SkillsLoader) stripFrontmatter(content string) string {
|
||||
return reStripFrontmatter.ReplaceAllString(content, "")
|
||||
_, body := splitFrontmatter(content)
|
||||
return body
|
||||
}
|
||||
|
||||
func splitFrontmatter(content string) (frontmatter, body string) {
|
||||
normalized := string(parser.NormalizeNewlines([]byte(content)))
|
||||
lines := strings.Split(normalized, "\n")
|
||||
if len(lines) == 0 || lines[0] != "---" {
|
||||
return "", content
|
||||
}
|
||||
|
||||
end := -1
|
||||
for i := 1; i < len(lines); i++ {
|
||||
if lines[i] == "---" {
|
||||
end = i
|
||||
break
|
||||
}
|
||||
}
|
||||
if end == -1 {
|
||||
return "", content
|
||||
}
|
||||
|
||||
frontmatter = strings.Join(lines[1:end], "\n")
|
||||
body = strings.Join(lines[end+1:], "\n")
|
||||
body = strings.TrimLeft(body, "\n")
|
||||
return frontmatter, body
|
||||
}
|
||||
|
||||
func escapeXML(s string) string {
|
||||
|
||||
@@ -342,3 +342,78 @@ func TestSkillRootsTrimsWhitespaceAndDedups(t *testing.T) {
|
||||
builtin,
|
||||
}, roots)
|
||||
}
|
||||
|
||||
func TestGetSkillMetadata_UsesMarkdownParagraphWhenNoFrontmatter(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
skillDir := filepath.Join(tmp, "workspace", "skills", "plain-skill")
|
||||
require.NoError(t, os.MkdirAll(skillDir, 0o755))
|
||||
|
||||
content := "# Plain Skill\n\nThis is parsed from markdown paragraph.\n"
|
||||
require.NoError(t, os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte(content), 0o644))
|
||||
|
||||
sl := &SkillsLoader{}
|
||||
meta := sl.getSkillMetadata(filepath.Join(skillDir, "SKILL.md"))
|
||||
require.NotNil(t, meta)
|
||||
assert.Equal(t, "plain-skill", meta.Name)
|
||||
assert.Equal(t, "This is parsed from markdown paragraph.", meta.Description)
|
||||
}
|
||||
|
||||
func TestGetSkillMetadata_FrontmatterOverridesMarkdown(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
skillDir := filepath.Join(tmp, "workspace", "skills", "plain-skill")
|
||||
require.NoError(t, os.MkdirAll(skillDir, 0o755))
|
||||
|
||||
content := "---\nname: frontmatter-skill\ndescription: frontmatter description\n---\n\n# Plain Skill\n\nBody description.\n"
|
||||
require.NoError(t, os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte(content), 0o644))
|
||||
|
||||
sl := &SkillsLoader{}
|
||||
meta := sl.getSkillMetadata(filepath.Join(skillDir, "SKILL.md"))
|
||||
require.NotNil(t, meta)
|
||||
assert.Equal(t, "frontmatter-skill", meta.Name)
|
||||
assert.Equal(t, "frontmatter description", meta.Description)
|
||||
}
|
||||
|
||||
func TestGetSkillMetadata_YAMLMultilineDescription(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
skillDir := filepath.Join(tmp, "workspace", "skills", "plain-skill")
|
||||
require.NoError(t, os.MkdirAll(skillDir, 0o755))
|
||||
|
||||
content := "---\nname: frontmatter-skill\ndescription: |\n line 1: with colon\n line 2\n---\n\n# Plain Skill\n\nBody description.\n"
|
||||
require.NoError(t, os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte(content), 0o644))
|
||||
|
||||
sl := &SkillsLoader{}
|
||||
meta := sl.getSkillMetadata(filepath.Join(skillDir, "SKILL.md"))
|
||||
require.NotNil(t, meta)
|
||||
assert.Equal(t, "frontmatter-skill", meta.Name)
|
||||
assert.Equal(t, "line 1: with colon\nline 2", meta.Description)
|
||||
}
|
||||
|
||||
func TestGetSkillMetadata_InvalidHeadingNameFallsBackToDirName(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
skillDir := filepath.Join(tmp, "workspace", "skills", "valid-name")
|
||||
require.NoError(t, os.MkdirAll(skillDir, 0o755))
|
||||
|
||||
content := "# Invalid Heading Name\n\nBody description.\n"
|
||||
require.NoError(t, os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte(content), 0o644))
|
||||
|
||||
sl := &SkillsLoader{}
|
||||
meta := sl.getSkillMetadata(filepath.Join(skillDir, "SKILL.md"))
|
||||
require.NotNil(t, meta)
|
||||
assert.Equal(t, "valid-name", meta.Name)
|
||||
assert.Equal(t, "Body description.", meta.Description)
|
||||
}
|
||||
|
||||
func TestGetSkillMetadata_IgnoresHTMLCommentBlocks(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
skillDir := filepath.Join(tmp, "workspace", "skills", "biomed-skill")
|
||||
require.NoError(t, os.MkdirAll(skillDir, 0o755))
|
||||
|
||||
content := "<!--\n# COPYRIGHT NOTICE\n# This file is part of the \"Universal Biomedical Skills\" project.\n# Copyright (c) 2026 MD BABU MIA, PhD <md.babu.mia@mssm.edu>\n# All Rights Reserved.\n#\n# This code is proprietary and confidential.\n# Unauthorized copying of this file, via any medium is strictly prohibited.\n#\n# Provenance: Authenticated by MD BABU MIA\n\n-->\n\n# Biomed Skill\n\nSummarize biomedical papers.\n"
|
||||
require.NoError(t, os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte(content), 0o644))
|
||||
|
||||
sl := &SkillsLoader{}
|
||||
meta := sl.getSkillMetadata(filepath.Join(skillDir, "SKILL.md"))
|
||||
require.NotNil(t, meta)
|
||||
assert.Equal(t, "biomed-skill", meta.Name)
|
||||
assert.Equal(t, "Summarize biomedical papers.", meta.Description)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user