mirror of
https://github.com/sipeed/picoclaw.git
synced 2026-06-12 18:08:54 +00:00
1d8ef7dcfb
Add GetJob and improved UpdateJob to CronService with proper cloning, schedule diffing, and next-run recomputation. Expose get/update actions in the cron tool so agents can inspect and partially update jobs without losing payloads or needing remove+add cycles. Includes access control for remote channels and command safety gates.
629 lines
13 KiB
Go
629 lines
13 KiB
Go
package cron
|
|
|
|
import (
|
|
"crypto/rand"
|
|
"encoding/hex"
|
|
"encoding/json"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/adhocore/gronx"
|
|
|
|
"github.com/sipeed/picoclaw/pkg/fileutil"
|
|
)
|
|
|
|
type CronSchedule struct {
|
|
Kind string `json:"kind"`
|
|
AtMS *int64 `json:"atMs,omitempty"`
|
|
EveryMS *int64 `json:"everyMs,omitempty"`
|
|
Expr string `json:"expr,omitempty"`
|
|
TZ string `json:"tz,omitempty"`
|
|
}
|
|
|
|
type CronPayload struct {
|
|
Kind string `json:"kind"`
|
|
Message string `json:"message"`
|
|
Command string `json:"command,omitempty"`
|
|
Channel string `json:"channel,omitempty"`
|
|
To string `json:"to,omitempty"`
|
|
}
|
|
|
|
type CronJobState struct {
|
|
NextRunAtMS *int64 `json:"nextRunAtMs,omitempty"`
|
|
LastRunAtMS *int64 `json:"lastRunAtMs,omitempty"`
|
|
LastStatus string `json:"lastStatus,omitempty"`
|
|
LastError string `json:"lastError,omitempty"`
|
|
}
|
|
|
|
type CronJob struct {
|
|
ID string `json:"id"`
|
|
Name string `json:"name"`
|
|
Enabled bool `json:"enabled"`
|
|
Schedule CronSchedule `json:"schedule"`
|
|
Payload CronPayload `json:"payload"`
|
|
State CronJobState `json:"state"`
|
|
CreatedAtMS int64 `json:"createdAtMs"`
|
|
UpdatedAtMS int64 `json:"updatedAtMs"`
|
|
DeleteAfterRun bool `json:"deleteAfterRun"`
|
|
}
|
|
|
|
type CronStore struct {
|
|
Version int `json:"version"`
|
|
Jobs []CronJob `json:"jobs"`
|
|
}
|
|
|
|
type JobHandler func(job *CronJob) (string, error)
|
|
|
|
type CronService struct {
|
|
storePath string
|
|
store *CronStore
|
|
onJob JobHandler
|
|
mu sync.RWMutex
|
|
running bool
|
|
stopChan chan struct{}
|
|
wakeChan chan struct{}
|
|
gronx *gronx.Gronx
|
|
}
|
|
|
|
func NewCronService(storePath string, onJob JobHandler) *CronService {
|
|
cs := &CronService{
|
|
storePath: storePath,
|
|
onJob: onJob,
|
|
gronx: gronx.New(),
|
|
wakeChan: make(chan struct{}),
|
|
}
|
|
// Initialize and load store on creation
|
|
cs.loadStore()
|
|
return cs
|
|
}
|
|
|
|
func (cs *CronService) Start() error {
|
|
cs.mu.Lock()
|
|
defer cs.mu.Unlock()
|
|
|
|
if cs.running {
|
|
return nil
|
|
}
|
|
|
|
if err := cs.loadStore(); err != nil {
|
|
return fmt.Errorf("failed to load store: %w", err)
|
|
}
|
|
|
|
cs.recomputeNextRuns()
|
|
if err := cs.saveStoreUnsafe(); err != nil {
|
|
return fmt.Errorf("failed to save store: %w", err)
|
|
}
|
|
|
|
cs.stopChan = make(chan struct{})
|
|
if cs.wakeChan == nil {
|
|
cs.wakeChan = make(chan struct{})
|
|
}
|
|
cs.running = true
|
|
go cs.runLoop(cs.stopChan)
|
|
|
|
return nil
|
|
}
|
|
|
|
func (cs *CronService) Stop() {
|
|
cs.mu.Lock()
|
|
defer cs.mu.Unlock()
|
|
|
|
if !cs.running {
|
|
return
|
|
}
|
|
|
|
cs.running = false
|
|
if cs.stopChan != nil {
|
|
close(cs.stopChan)
|
|
cs.stopChan = nil
|
|
}
|
|
}
|
|
|
|
func (cs *CronService) runLoop(stopChan chan struct{}) {
|
|
timer := time.NewTimer(time.Hour)
|
|
if !timer.Stop() {
|
|
<-timer.C
|
|
}
|
|
defer timer.Stop()
|
|
|
|
for {
|
|
// every loop, recalculate the next wake time
|
|
cs.mu.RLock()
|
|
nextWake := cs.getNextWakeMS()
|
|
cs.mu.RUnlock()
|
|
|
|
var delay time.Duration
|
|
now := time.Now().UnixMilli()
|
|
|
|
if nextWake == nil {
|
|
// no jobs, sleep for a long time (or until a new job is added)
|
|
delay = time.Hour
|
|
} else {
|
|
diff := *nextWake - now
|
|
if diff <= 0 {
|
|
delay = 0
|
|
} else {
|
|
delay = time.Duration(diff) * time.Millisecond
|
|
}
|
|
}
|
|
|
|
timer.Reset(delay)
|
|
|
|
select {
|
|
case <-stopChan:
|
|
return
|
|
case <-cs.wakeChan: // wake on new job or update
|
|
if !timer.Stop() {
|
|
select {
|
|
case <-timer.C:
|
|
default:
|
|
}
|
|
}
|
|
continue
|
|
case <-timer.C:
|
|
cs.checkJobs()
|
|
}
|
|
}
|
|
}
|
|
|
|
func (cs *CronService) checkJobs() {
|
|
cs.mu.Lock()
|
|
|
|
if !cs.running {
|
|
cs.mu.Unlock()
|
|
return
|
|
}
|
|
|
|
now := time.Now().UnixMilli()
|
|
var dueJobIDs []string
|
|
|
|
// Collect jobs that are due (we need to copy them to execute outside lock)
|
|
for i := range cs.store.Jobs {
|
|
job := &cs.store.Jobs[i]
|
|
if job.Enabled && job.State.NextRunAtMS != nil && *job.State.NextRunAtMS <= now {
|
|
dueJobIDs = append(dueJobIDs, job.ID)
|
|
}
|
|
}
|
|
|
|
// Reset next run for due jobs before unlocking to avoid duplicate execution.
|
|
dueMap := make(map[string]bool, len(dueJobIDs))
|
|
for _, jobID := range dueJobIDs {
|
|
dueMap[jobID] = true
|
|
}
|
|
for i := range cs.store.Jobs {
|
|
if dueMap[cs.store.Jobs[i].ID] {
|
|
cs.store.Jobs[i].State.NextRunAtMS = nil
|
|
}
|
|
}
|
|
|
|
if err := cs.saveStoreUnsafe(); err != nil {
|
|
log.Printf("[cron] failed to save store: %v", err)
|
|
}
|
|
|
|
cs.mu.Unlock()
|
|
|
|
// Execute jobs outside lock.
|
|
for _, jobID := range dueJobIDs {
|
|
cs.executeJobByID(jobID)
|
|
}
|
|
}
|
|
|
|
func (cs *CronService) executeJobByID(jobID string) {
|
|
startTime := time.Now().UnixMilli()
|
|
|
|
cs.mu.RLock()
|
|
var callbackJob *CronJob
|
|
for i := range cs.store.Jobs {
|
|
job := &cs.store.Jobs[i]
|
|
if job.ID == jobID {
|
|
jobCopy := *job
|
|
callbackJob = &jobCopy
|
|
break
|
|
}
|
|
}
|
|
cs.mu.RUnlock()
|
|
|
|
if callbackJob == nil {
|
|
log.Printf("[cron] job %s not found, skipping", jobID)
|
|
return
|
|
}
|
|
|
|
// Log job execution start
|
|
log.Printf("[cron] ▶ executing job '%s' (id: %s, schedule: %s, channel: %s)",
|
|
callbackJob.Name, jobID, callbackJob.Schedule.Kind, callbackJob.Payload.Channel)
|
|
|
|
var err error
|
|
if cs.onJob != nil {
|
|
_, err = cs.onJob(callbackJob)
|
|
}
|
|
|
|
execDuration := time.Now().UnixMilli() - startTime
|
|
|
|
// Now acquire lock to update state
|
|
cs.mu.Lock()
|
|
defer cs.mu.Unlock()
|
|
|
|
var job *CronJob
|
|
for i := range cs.store.Jobs {
|
|
if cs.store.Jobs[i].ID == jobID {
|
|
job = &cs.store.Jobs[i]
|
|
break
|
|
}
|
|
}
|
|
if job == nil {
|
|
log.Printf("[cron] job %s disappeared before state update", jobID)
|
|
return
|
|
}
|
|
|
|
job.State.LastRunAtMS = &startTime
|
|
job.UpdatedAtMS = time.Now().UnixMilli()
|
|
|
|
if err != nil {
|
|
job.State.LastStatus = "error"
|
|
job.State.LastError = err.Error()
|
|
log.Printf("[cron] ✗ job '%s' failed after %dms: %v", job.Name, execDuration, err)
|
|
} else {
|
|
job.State.LastStatus = "ok"
|
|
job.State.LastError = ""
|
|
}
|
|
|
|
// Compute next run time
|
|
var nextRunStr string
|
|
if job.Schedule.Kind == "at" {
|
|
if job.DeleteAfterRun {
|
|
cs.removeJobUnsafe(job.ID)
|
|
nextRunStr = "(deleted)"
|
|
} else {
|
|
job.Enabled = false
|
|
job.State.NextRunAtMS = nil
|
|
nextRunStr = "(disabled)"
|
|
}
|
|
} else {
|
|
nextRun := cs.computeNextRun(&job.Schedule, time.Now().UnixMilli())
|
|
job.State.NextRunAtMS = nextRun
|
|
if nextRun != nil {
|
|
nextRunStr = time.UnixMilli(*nextRun).Format("2006-01-02 15:04:05")
|
|
} else {
|
|
nextRunStr = "(none)"
|
|
}
|
|
}
|
|
|
|
if err == nil {
|
|
log.Printf("[cron] ✓ job '%s' completed in %dms, next run: %s", job.Name, execDuration, nextRunStr)
|
|
}
|
|
|
|
if err := cs.saveStoreUnsafe(); err != nil {
|
|
log.Printf("[cron] failed to save store: %v", err)
|
|
}
|
|
}
|
|
|
|
func (cs *CronService) computeNextRun(schedule *CronSchedule, nowMS int64) *int64 {
|
|
switch schedule.Kind {
|
|
case "at":
|
|
if schedule.AtMS != nil && *schedule.AtMS > nowMS {
|
|
return schedule.AtMS
|
|
}
|
|
return nil
|
|
case "every":
|
|
if schedule.EveryMS == nil || *schedule.EveryMS <= 0 {
|
|
return nil
|
|
}
|
|
next := nowMS + *schedule.EveryMS
|
|
return &next
|
|
case "cron":
|
|
if schedule.Expr == "" {
|
|
return nil
|
|
}
|
|
|
|
// Use gronx to calculate next run time
|
|
now := time.UnixMilli(nowMS)
|
|
nextTime, err := gronx.NextTickAfter(schedule.Expr, now, false)
|
|
if err != nil {
|
|
log.Printf("[cron] failed to compute next run for expr '%s': %v", schedule.Expr, err)
|
|
return nil
|
|
}
|
|
|
|
nextMS := nextTime.UnixMilli()
|
|
return &nextMS
|
|
default:
|
|
log.Printf("[cron] unknown schedule kind '%s'", schedule.Kind)
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// wake up the loop to re-evaluate next wake time immediately (e.g. after add/update/remove jobs)
|
|
func (cs *CronService) notify() {
|
|
select {
|
|
case cs.wakeChan <- struct{}{}:
|
|
default:
|
|
// if the channel is full, it means the loop will wake up soon anyway, so we can skip sending
|
|
}
|
|
}
|
|
|
|
func (cs *CronService) recomputeNextRuns() {
|
|
now := time.Now().UnixMilli()
|
|
for i := range cs.store.Jobs {
|
|
job := &cs.store.Jobs[i]
|
|
if job.Enabled {
|
|
job.State.NextRunAtMS = cs.computeNextRun(&job.Schedule, now)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (cs *CronService) getNextWakeMS() *int64 {
|
|
var nextWake *int64
|
|
for _, job := range cs.store.Jobs {
|
|
if job.Enabled && job.State.NextRunAtMS != nil {
|
|
if nextWake == nil || *job.State.NextRunAtMS < *nextWake {
|
|
nextWake = job.State.NextRunAtMS
|
|
}
|
|
}
|
|
}
|
|
return nextWake
|
|
}
|
|
|
|
func (cs *CronService) Load() error {
|
|
cs.mu.Lock()
|
|
defer cs.mu.Unlock()
|
|
return cs.loadStore()
|
|
}
|
|
|
|
func (cs *CronService) SetOnJob(handler JobHandler) {
|
|
cs.mu.Lock()
|
|
defer cs.mu.Unlock()
|
|
cs.onJob = handler
|
|
}
|
|
|
|
func (cs *CronService) loadStore() error {
|
|
cs.store = &CronStore{
|
|
Version: 1,
|
|
Jobs: []CronJob{},
|
|
}
|
|
|
|
data, err := os.ReadFile(cs.storePath)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
return nil
|
|
}
|
|
return err
|
|
}
|
|
|
|
return json.Unmarshal(data, cs.store)
|
|
}
|
|
|
|
func (cs *CronService) saveStoreUnsafe() error {
|
|
data, err := json.MarshalIndent(cs.store, "", " ")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Use unified atomic write utility with explicit sync for flash storage reliability.
|
|
return fileutil.WriteFileAtomic(cs.storePath, data, 0o600)
|
|
}
|
|
|
|
func (cs *CronService) AddJob(
|
|
name string,
|
|
schedule CronSchedule,
|
|
message string,
|
|
channel, to string,
|
|
) (*CronJob, error) {
|
|
cs.mu.Lock()
|
|
defer cs.mu.Unlock()
|
|
|
|
now := time.Now().UnixMilli()
|
|
|
|
// One-time tasks (at) should be deleted after execution
|
|
deleteAfterRun := (schedule.Kind == "at")
|
|
|
|
job := CronJob{
|
|
ID: generateID(),
|
|
Name: name,
|
|
Enabled: true,
|
|
Schedule: schedule,
|
|
Payload: CronPayload{
|
|
Kind: "agent_turn",
|
|
Message: message,
|
|
Channel: channel,
|
|
To: to,
|
|
},
|
|
State: CronJobState{
|
|
NextRunAtMS: cs.computeNextRun(&schedule, now),
|
|
},
|
|
CreatedAtMS: now,
|
|
UpdatedAtMS: now,
|
|
DeleteAfterRun: deleteAfterRun,
|
|
}
|
|
|
|
cs.store.Jobs = append(cs.store.Jobs, job)
|
|
if err := cs.saveStoreUnsafe(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
cs.notify()
|
|
|
|
return &job, nil
|
|
}
|
|
|
|
func (cs *CronService) GetJob(jobID string) (*CronJob, bool) {
|
|
cs.mu.RLock()
|
|
defer cs.mu.RUnlock()
|
|
|
|
for i := range cs.store.Jobs {
|
|
if cs.store.Jobs[i].ID == jobID {
|
|
jobCopy := cloneCronJob(cs.store.Jobs[i])
|
|
return &jobCopy, true
|
|
}
|
|
}
|
|
return nil, false
|
|
}
|
|
|
|
func (cs *CronService) UpdateJob(job *CronJob) error {
|
|
cs.mu.Lock()
|
|
defer cs.mu.Unlock()
|
|
|
|
for i := range cs.store.Jobs {
|
|
if cs.store.Jobs[i].ID == job.ID {
|
|
previous := cs.store.Jobs[i]
|
|
updated := cloneCronJob(*job)
|
|
now := time.Now().UnixMilli()
|
|
updated.UpdatedAtMS = now
|
|
if updated.Enabled {
|
|
if previous.Enabled != updated.Enabled || !sameSchedule(previous.Schedule, updated.Schedule) {
|
|
updated.State.NextRunAtMS = cs.computeNextRun(&updated.Schedule, now)
|
|
}
|
|
} else {
|
|
updated.State.NextRunAtMS = nil
|
|
}
|
|
cs.store.Jobs[i] = updated
|
|
|
|
cs.notify()
|
|
|
|
return cs.saveStoreUnsafe()
|
|
}
|
|
}
|
|
return fmt.Errorf("job not found")
|
|
}
|
|
|
|
func cloneCronJob(job CronJob) CronJob {
|
|
clone := job
|
|
if job.Schedule.AtMS != nil {
|
|
atMS := *job.Schedule.AtMS
|
|
clone.Schedule.AtMS = &atMS
|
|
}
|
|
if job.Schedule.EveryMS != nil {
|
|
everyMS := *job.Schedule.EveryMS
|
|
clone.Schedule.EveryMS = &everyMS
|
|
}
|
|
if job.State.NextRunAtMS != nil {
|
|
nextRunAtMS := *job.State.NextRunAtMS
|
|
clone.State.NextRunAtMS = &nextRunAtMS
|
|
}
|
|
if job.State.LastRunAtMS != nil {
|
|
lastRunAtMS := *job.State.LastRunAtMS
|
|
clone.State.LastRunAtMS = &lastRunAtMS
|
|
}
|
|
return clone
|
|
}
|
|
|
|
func sameSchedule(a, b CronSchedule) bool {
|
|
return a.Kind == b.Kind &&
|
|
sameInt64(a.AtMS, b.AtMS) &&
|
|
sameInt64(a.EveryMS, b.EveryMS) &&
|
|
a.Expr == b.Expr &&
|
|
a.TZ == b.TZ
|
|
}
|
|
|
|
func sameInt64(a, b *int64) bool {
|
|
if a == nil || b == nil {
|
|
return a == b
|
|
}
|
|
return *a == *b
|
|
}
|
|
|
|
func (cs *CronService) RemoveJob(jobID string) bool {
|
|
cs.mu.Lock()
|
|
defer cs.mu.Unlock()
|
|
|
|
return cs.removeJobUnsafe(jobID)
|
|
}
|
|
|
|
func (cs *CronService) removeJobUnsafe(jobID string) bool {
|
|
before := len(cs.store.Jobs)
|
|
var jobs []CronJob
|
|
for _, job := range cs.store.Jobs {
|
|
if job.ID != jobID {
|
|
jobs = append(jobs, job)
|
|
}
|
|
}
|
|
cs.store.Jobs = jobs
|
|
removed := len(cs.store.Jobs) < before
|
|
|
|
if removed {
|
|
if err := cs.saveStoreUnsafe(); err != nil {
|
|
log.Printf("[cron] failed to save store after remove: %v", err)
|
|
}
|
|
}
|
|
|
|
cs.notify()
|
|
|
|
return removed
|
|
}
|
|
|
|
func (cs *CronService) EnableJob(jobID string, enabled bool) *CronJob {
|
|
cs.mu.Lock()
|
|
defer cs.mu.Unlock()
|
|
|
|
for i := range cs.store.Jobs {
|
|
job := &cs.store.Jobs[i]
|
|
if job.ID == jobID {
|
|
job.Enabled = enabled
|
|
job.UpdatedAtMS = time.Now().UnixMilli()
|
|
|
|
if enabled {
|
|
job.State.NextRunAtMS = cs.computeNextRun(&job.Schedule, time.Now().UnixMilli())
|
|
} else {
|
|
job.State.NextRunAtMS = nil
|
|
}
|
|
|
|
if err := cs.saveStoreUnsafe(); err != nil {
|
|
log.Printf("[cron] failed to save store after enable: %v", err)
|
|
}
|
|
|
|
cs.notify()
|
|
|
|
return job
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (cs *CronService) ListJobs(includeDisabled bool) []CronJob {
|
|
cs.mu.RLock()
|
|
defer cs.mu.RUnlock()
|
|
|
|
if includeDisabled {
|
|
return cs.store.Jobs
|
|
}
|
|
|
|
var enabled []CronJob
|
|
for _, job := range cs.store.Jobs {
|
|
if job.Enabled {
|
|
enabled = append(enabled, job)
|
|
}
|
|
}
|
|
|
|
return enabled
|
|
}
|
|
|
|
func (cs *CronService) Status() map[string]any {
|
|
cs.mu.RLock()
|
|
defer cs.mu.RUnlock()
|
|
|
|
var enabledCount int
|
|
for _, job := range cs.store.Jobs {
|
|
if job.Enabled {
|
|
enabledCount++
|
|
}
|
|
}
|
|
|
|
return map[string]any{
|
|
"enabled": cs.running,
|
|
"jobs": len(cs.store.Jobs),
|
|
"nextWakeAtMS": cs.getNextWakeMS(),
|
|
}
|
|
}
|
|
|
|
func generateID() string {
|
|
// Use crypto/rand for better uniqueness under concurrent access
|
|
b := make([]byte, 8)
|
|
if _, err := rand.Read(b); err != nil {
|
|
// Fallback to time-based if crypto/rand fails
|
|
return fmt.Sprintf("%d", time.Now().UnixNano())
|
|
}
|
|
return hex.EncodeToString(b)
|
|
}
|