picoclaw/pkg/agent/subturn_test.go

package agent

import (
	"context"
	"fmt"
	"reflect"
	"sync"
	"testing"
	"time"

	"github.com/sipeed/picoclaw/pkg/providers"
	"github.com/sipeed/picoclaw/pkg/tools"
)

// ====================== Test Helper: Event Collector ======================
type eventCollector struct {
	events []any
}

func (c *eventCollector) collect(e any) {
	c.events = append(c.events, e)
}

func (c *eventCollector) hasEventOfType(typ any) bool {
	targetType := reflect.TypeOf(typ)
	for _, e := range c.events {
		if reflect.TypeOf(e) == targetType {
			return true
		}
	}
	return false
}

func (c *eventCollector) countOfType(typ any) int {
	targetType := reflect.TypeOf(typ)
	count := 0
	for _, e := range c.events {
		if reflect.TypeOf(e) == targetType {
			count++
		}
	}
	return count
}

// ====================== Main Test Function ======================
func TestSpawnSubTurn(t *testing.T) {
	tests := []struct {
		name          string
		parentDepth   int
		config        SubTurnConfig
		wantErr       error
		wantSpawn     bool
		wantEnd       bool
		wantDepthFail bool
	}{
		{
			name:        "Basic success path - Single layer sub-turn",
			parentDepth: 0,
			config: SubTurnConfig{
				Model: "gpt-4o-mini",
				Tools: []tools.Tool{}, // At least one tool
			},
			wantErr:   nil,
			wantSpawn: true,
			wantEnd:   true,
		},
		{
			name:        "Nested 2 layers - Normal",
			parentDepth: 1,
			config: SubTurnConfig{
				Model: "gpt-4o-mini",
				Tools: []tools.Tool{},
			},
			wantErr:   nil,
			wantSpawn: true,
			wantEnd:   true,
		},
		{
			name:        "Depth limit triggered - 4th layer fails",
			parentDepth: 3,
			config: SubTurnConfig{
				Model: "gpt-4o-mini",
				Tools: []tools.Tool{},
			},
			wantErr:       ErrDepthLimitExceeded,
			wantSpawn:     false,
			wantEnd:       false,
			wantDepthFail: true,
		},
		{
			name:        "Invalid config - Empty Model",
			parentDepth: 0,
			config: SubTurnConfig{
				Model: "",
				Tools: []tools.Tool{},
			},
			wantErr:   ErrInvalidSubTurnConfig,
			wantSpawn: false,
			wantEnd:   false,
		},
	}

	al, _, _, _, cleanup := newTestAgentLoop(t)
	defer cleanup()

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			// Prepare parent Turn
			parent := &turnState{
				ctx:            context.Background(),
				turnID:         "parent-1",
				depth:          tt.parentDepth,
				childTurnIDs:   []string{},
				pendingResults: make(chan *tools.ToolResult, 10),
				session:        &ephemeralSessionStore{},
			}

			// Replace mock with test collector
			collector := &eventCollector{}
			originalEmit := MockEventBus.Emit
			MockEventBus.Emit = collector.collect
			defer func() { MockEventBus.Emit = originalEmit }()

			// Execute spawnSubTurn
			result, err := spawnSubTurn(context.Background(), al, parent, tt.config)

			// Assert errors
			if tt.wantErr != nil {
				if err == nil || err != tt.wantErr {
					t.Errorf("expected error %v, got %v", tt.wantErr, err)
				}
				return
			}
			if err != nil {
				t.Errorf("unexpected error: %v", err)
				return
			}

			// Verify result
			if result == nil {
				t.Error("expected non-nil result")
			}

			// Verify event emission
			if tt.wantSpawn {
				if !collector.hasEventOfType(SubTurnSpawnEvent{}) {
					t.Error("SubTurnSpawnEvent not emitted")
				}
			}
			if tt.wantEnd {
				if !collector.hasEventOfType(SubTurnEndEvent{}) {
					t.Error("SubTurnEndEvent not emitted")
				}
			}

			// Verify turn tree
			if len(parent.childTurnIDs) == 0 && !tt.wantDepthFail {
				t.Error("child Turn not added to parent.childTurnIDs")
			}

			// Verify result delivery (pendingResults or history)
			if len(parent.pendingResults) > 0 || len(parent.session.GetHistory("")) > 0 {
				// Result delivered via at least one path
			} else {
				t.Error("child result not delivered")
			}
		})
	}
}

// ====================== Extra Independent Test: Ephemeral Session Isolation ======================
func TestSpawnSubTurn_EphemeralSessionIsolation(t *testing.T) {
	al, _, _, _, cleanup := newTestAgentLoop(t)
	defer cleanup()

	parentSession := &ephemeralSessionStore{}
	parentSession.AddMessage("", "user", "parent msg")
	parent := &turnState{
		ctx:            context.Background(),
		turnID:         "parent-1",
		depth:          0,
		pendingResults: make(chan *tools.ToolResult, 1),
		session:        parentSession,
	}

	cfg := SubTurnConfig{Model: "gpt-4o-mini", Tools: []tools.Tool{}}

	// Record main session length before execution
	originalLen := len(parent.session.GetHistory(""))

	_, _ = spawnSubTurn(context.Background(), al, parent, cfg)

	// After sub-turn ends, main session must remain unchanged
	if len(parent.session.GetHistory("")) != originalLen {
		t.Error("ephemeral session polluted the main session")
	}
}

// ====================== Extra Independent Test: Result Delivery Path ======================
func TestSpawnSubTurn_ResultDelivery(t *testing.T) {
	al, _, _, _, cleanup := newTestAgentLoop(t)
	defer cleanup()

	parent := &turnState{
		ctx:            context.Background(),
		turnID:         "parent-1",
		depth:          0,
		pendingResults: make(chan *tools.ToolResult, 1),
		session:        &ephemeralSessionStore{},
	}

	cfg := SubTurnConfig{Model: "gpt-4o-mini", Tools: []tools.Tool{}}

	_, _ = spawnSubTurn(context.Background(), al, parent, cfg)

	// Check if pendingResults received the result
	select {
	case res := <-parent.pendingResults:
		if res == nil {
			t.Error("received nil result in pendingResults")
		}
	default:
		t.Error("result did not enter pendingResults")
	}
}

// ====================== Extra Independent Test: Orphan Result Routing ======================
func TestSpawnSubTurn_OrphanResultRouting(t *testing.T) {
	parentCtx, cancelParent := context.WithCancel(context.Background())
	parent := &turnState{
		ctx:            parentCtx,
		cancelFunc:     cancelParent,
		turnID:         "parent-1",
		depth:          0,
		pendingResults: make(chan *tools.ToolResult, 1),
		session:        &ephemeralSessionStore{},
	}

	collector := &eventCollector{}
	originalEmit := MockEventBus.Emit
	MockEventBus.Emit = collector.collect
	defer func() { MockEventBus.Emit = originalEmit }()

	// Simulate parent finishing before child delivers result
	parent.Finish()

	// Call deliverSubTurnResult directly to simulate a delayed child
	deliverSubTurnResult(parent, "delayed-child", &tools.ToolResult{ForLLM: "late result"})

	// Verify Orphan event is emitted
	if !collector.hasEventOfType(SubTurnOrphanResultEvent{}) {
		t.Error("SubTurnOrphanResultEvent not emitted for finished parent")
	}

	// Verify history is NOT polluted
	if len(parent.session.GetHistory("")) != 0 {
		t.Error("Parent history was polluted by orphan result")
	}
}

// ====================== Extra Independent Test: Result Channel Registration ======================
func TestSubTurnResultChannelRegistration(t *testing.T) {
	al, _, _, _, cleanup := newTestAgentLoop(t)
	defer cleanup()

	parent := &turnState{
		ctx:            context.Background(),
		turnID:         "parent-reg-1",
		depth:          0,
		pendingResults: make(chan *tools.ToolResult, 4),
		session:        &ephemeralSessionStore{},
	}

	cfg := SubTurnConfig{Model: "gpt-4o-mini", Tools: []tools.Tool{}}

	// Before spawn: channel should not be registered
	if results := al.dequeuePendingSubTurnResults(parent.turnID); results != nil {
		t.Error("expected no channel before spawnSubTurn")
	}

	_, _ = spawnSubTurn(context.Background(), al, parent, cfg)

	// After spawn completes: channel should be unregistered (defer cleanup in spawnSubTurn)
	if _, ok := al.subTurnResults.Load(parent.turnID); ok {
		t.Error("channel should be unregistered after spawnSubTurn completes")
	}
}

// ====================== Extra Independent Test: Dequeue Pending SubTurn Results ======================
func TestDequeuePendingSubTurnResults(t *testing.T) {
	al, _, _, _, cleanup := newTestAgentLoop(t)
	defer cleanup()

	sessionKey := "test-session-dequeue"
	ch := make(chan *tools.ToolResult, 4)

	// Register channel manually
	al.registerSubTurnResultChannel(sessionKey, ch)
	defer al.unregisterSubTurnResultChannel(sessionKey)

	// Empty channel returns nil
	if results := al.dequeuePendingSubTurnResults(sessionKey); len(results) != 0 {
		t.Errorf("expected empty results, got %d", len(results))
	}

	// Put 3 results in
	ch <- &tools.ToolResult{ForLLM: "result-1"}
	ch <- &tools.ToolResult{ForLLM: "result-2"}
	ch <- &tools.ToolResult{ForLLM: "result-3"}

	results := al.dequeuePendingSubTurnResults(sessionKey)
	if len(results) != 3 {
		t.Errorf("expected 3 results, got %d", len(results))
	}
	if results[0].ForLLM != "result-1" || results[2].ForLLM != "result-3" {
		t.Error("results order or content mismatch")
	}

	// Channel should be drained now
	if results := al.dequeuePendingSubTurnResults(sessionKey); len(results) != 0 {
		t.Errorf("expected empty after drain, got %d", len(results))
	}

	// Unregistered session returns nil
	al.unregisterSubTurnResultChannel(sessionKey)
	if results := al.dequeuePendingSubTurnResults(sessionKey); results != nil {
		t.Error("expected nil for unregistered session")
	}
}

// ====================== Extra Independent Test: Concurrency Semaphore ======================
func TestSubTurnConcurrencySemaphore(t *testing.T) {
	al, _, _, _, cleanup := newTestAgentLoop(t)
	defer cleanup()

	parent := &turnState{
		ctx:            context.Background(),
		turnID:         "parent-concurrency",
		depth:          0,
		pendingResults: make(chan *tools.ToolResult, 10),
		session:        &ephemeralSessionStore{},
		concurrencySem: make(chan struct{}, 2), // Only allow 2 concurrent children
	}

	cfg := SubTurnConfig{Model: "gpt-4o-mini", Tools: []tools.Tool{}}

	// Spawn 2 children — should succeed immediately
	done := make(chan bool, 3)
	for i := 0; i < 2; i++ {
		go func() {
			_, _ = spawnSubTurn(context.Background(), al, parent, cfg)
			done <- true
		}()
	}

	// Wait a bit to ensure the first 2 are running
	// (In real scenario they'd be blocked in runTurn, but mockProvider returns immediately)
	// So we just verify the semaphore doesn't block when under limit
	<-done
	<-done

	// Verify semaphore is now full (2/2 slots used, but they already released)
	// Since mockProvider returns immediately, semaphore is already released
	// So we can't easily test blocking without a real long-running operation

	// Instead, verify that semaphore exists and has correct capacity
	if cap(parent.concurrencySem) != 2 {
		t.Errorf("expected semaphore capacity 2, got %d", cap(parent.concurrencySem))
	}
}

// ====================== Extra Independent Test: Hard Abort Cascading ======================
func TestHardAbortCascading(t *testing.T) {
	al, _, _, _, cleanup := newTestAgentLoop(t)
	defer cleanup()

	sessionKey := "test-session-abort"
	parentCtx, parentCancel := context.WithCancel(context.Background())
	defer parentCancel()

	rootTS := &turnState{
		ctx:            parentCtx,
		turnID:         sessionKey,
		depth:          0,
		session:        &ephemeralSessionStore{},
		pendingResults: make(chan *tools.ToolResult, 16),
		concurrencySem: make(chan struct{}, 5),
	}

	// Register the root turn state
	al.activeTurnStates.Store(sessionKey, rootTS)
	defer al.activeTurnStates.Delete(sessionKey)

	// Create a child turn state
	childCtx, childCancel := context.WithCancel(rootTS.ctx)
	defer childCancel()
	childTS := &turnState{
		ctx:            childCtx,
		cancelFunc:     childCancel,
		turnID:         "child-1",
		parentTurnID:   sessionKey,
		depth:          1,
		session:        &ephemeralSessionStore{},
		pendingResults: make(chan *tools.ToolResult, 16),
		concurrencySem: make(chan struct{}, 5),
	}

	// Attach cancelFunc to rootTS so Finish() can trigger it
	rootTS.cancelFunc = parentCancel

	// Verify contexts are not canceled yet
	select {
	case <-rootTS.ctx.Done():
		t.Error("root context should not be canceled yet")
	default:
	}
	select {
	case <-childTS.ctx.Done():
		t.Error("child context should not be canceled yet")
	default:
	}

	// Trigger Hard Abort
	err := al.HardAbort(sessionKey)
	if err != nil {
		t.Errorf("HardAbort failed: %v", err)
	}

	// Verify root context is canceled
	select {
	case <-rootTS.ctx.Done():
		// Expected
	default:
		t.Error("root context should be canceled after HardAbort")
	}

	// Verify child context is also canceled (cascading)
	select {
	case <-childTS.ctx.Done():
		// Expected
	default:
		t.Error("child context should be canceled after HardAbort (cascading)")
	}

	// Verify HardAbort on non-existent session returns error
	err = al.HardAbort("non-existent-session")
	if err == nil {
		t.Error("expected error for non-existent session")
	}
}

// TestHardAbortSessionRollback verifies that HardAbort rolls back session history
// to the state before the turn started, discarding all messages added during the turn.
func TestHardAbortSessionRollback(t *testing.T) {
	al, _, _, _, cleanup := newTestAgentLoop(t)
	defer cleanup()

	// Create a session with initial history
	sess := &ephemeralSessionStore{
		history: []providers.Message{
			{Role: "user", Content: "initial message 1"},
			{Role: "assistant", Content: "initial response 1"},
		},
	}

	// Create a root turnState with initialHistoryLength = 2
	rootTS := &turnState{
		ctx:                  context.Background(),
		turnID:               "test-session",
		depth:                0,
		session:              sess,
		initialHistoryLength: 2, // Snapshot: 2 messages
		pendingResults:       make(chan *tools.ToolResult, 16),
		concurrencySem:       make(chan struct{}, 5),
	}

	// Register the turn state
	al.activeTurnStates.Store("test-session", rootTS)

	// Simulate adding messages during the turn (e.g., user input + assistant response)
	sess.AddMessage("", "user", "new user message")
	sess.AddMessage("", "assistant", "new assistant response")

	// Verify history grew to 4 messages
	if len(sess.GetHistory("")) != 4 {
		t.Fatalf("expected 4 messages before abort, got %d", len(sess.GetHistory("")))
	}

	// Trigger HardAbort
	err := al.HardAbort("test-session")
	if err != nil {
		t.Fatalf("HardAbort failed: %v", err)
	}

	// Verify history rolled back to initial 2 messages
	finalHistory := sess.GetHistory("")
	if len(finalHistory) != 2 {
		t.Errorf("expected history to rollback to 2 messages, got %d", len(finalHistory))
	}

	// Verify the content matches the initial state
	if finalHistory[0].Content != "initial message 1" || finalHistory[1].Content != "initial response 1" {
		t.Error("history content does not match initial state after rollback")
	}
}

// TestNestedSubTurnHierarchy verifies that nested SubTurns maintain correct
// parent-child relationships and depth tracking when recursively calling runAgentLoop.
func TestNestedSubTurnHierarchy(t *testing.T) {
	al, _, _, _, cleanup := newTestAgentLoop(t)
	defer cleanup()

	// Track spawned turns and their depths
	type turnInfo struct {
		parentID string
		childID  string
		depth    int
	}
	var spawnedTurns []turnInfo
	var mu sync.Mutex

	// Override MockEventBus to capture spawn events
	originalEmit := MockEventBus.Emit
	defer func() { MockEventBus.Emit = originalEmit }()

	MockEventBus.Emit = func(event any) {
		if spawnEvent, ok := event.(SubTurnSpawnEvent); ok {
			mu.Lock()
			// Extract depth from context (we'll verify this matches expected depth)
			spawnedTurns = append(spawnedTurns, turnInfo{
				parentID: spawnEvent.ParentID,
				childID:  spawnEvent.ChildID,
			})
			mu.Unlock()
		}
	}

	// Create a root turn
	rootSession := &ephemeralSessionStore{}
	rootTS := &turnState{
		ctx:            context.Background(),
		turnID:         "root-turn",
		depth:          0,
		session:        rootSession,
		pendingResults: make(chan *tools.ToolResult, 16),
		concurrencySem: make(chan struct{}, 5),
	}

	// Spawn a child (depth 1)
	childCfg := SubTurnConfig{Model: "gpt-4o-mini"}
	_, err := spawnSubTurn(context.Background(), al, rootTS, childCfg)
	if err != nil {
		t.Fatalf("failed to spawn child: %v", err)
	}

	// Verify we captured the spawn event
	mu.Lock()
	if len(spawnedTurns) != 1 {
		t.Fatalf("expected 1 spawn event, got %d", len(spawnedTurns))
	}
	if spawnedTurns[0].parentID != "root-turn" {
		t.Errorf("expected parent ID 'root-turn', got %s", spawnedTurns[0].parentID)
	}
	mu.Unlock()

	// Verify root turn has the child in its childTurnIDs
	rootTS.mu.Lock()
	if len(rootTS.childTurnIDs) != 1 {
		t.Errorf("expected root to have 1 child, got %d", len(rootTS.childTurnIDs))
	}
	rootTS.mu.Unlock()
}

// TestDeliverSubTurnResultNoDeadlock verifies that deliverSubTurnResult doesn't
// deadlock when multiple goroutines are accessing the parent turnState concurrently.
func TestDeliverSubTurnResultNoDeadlock(t *testing.T) {
	parent := &turnState{
		ctx:            context.Background(),
		turnID:         "parent-deadlock-test",
		depth:          0,
		pendingResults: make(chan *tools.ToolResult, 2), // Small buffer to test blocking
		isFinished:     false,
	}

	// Simulate multiple child turns delivering results concurrently
	var wg sync.WaitGroup
	numChildren := 10

	for i := 0; i < numChildren; i++ {
		wg.Add(1)
		go func(id int) {
			defer wg.Done()
			result := &tools.ToolResult{ForLLM: fmt.Sprintf("result-%d", id)}
			deliverSubTurnResult(parent, fmt.Sprintf("child-%d", id), result)
		}(i)
	}

	// Concurrently read from the channel to prevent blocking
	go func() {
		for i := 0; i < numChildren; i++ {
			select {
			case <-parent.pendingResults:
			case <-time.After(2 * time.Second):
				t.Error("timeout waiting for result")
				return
			}
		}
	}()

	// Wait for all deliveries to complete (with timeout)
	done := make(chan struct{})
	go func() {
		wg.Wait()
		close(done)
	}()

	select {
	case <-done:
		// Success - no deadlock
	case <-time.After(3 * time.Second):
		t.Fatal("deadlock detected: deliverSubTurnResult blocked")
	}
}

// TestHardAbortOrderOfOperations verifies that HardAbort calls Finish() before
// rolling back session history, minimizing the race window where new messages
// could be added after rollback.
func TestHardAbortOrderOfOperations(t *testing.T) {
	al, _, _, _, cleanup := newTestAgentLoop(t)
	defer cleanup()

	sess := &ephemeralSessionStore{
		history: []providers.Message{
			{Role: "user", Content: "initial message"},
			{Role: "assistant", Content: "response 1"},
			{Role: "user", Content: "follow-up"},
		},
	}

	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()

	rootTS := &turnState{
		ctx:                  ctx,
		cancelFunc:           cancel,
		turnID:               "test-session-order",
		depth:                0,
		session:              sess,
		initialHistoryLength: 1, // Snapshot: 1 message
		pendingResults:       make(chan *tools.ToolResult, 16),
		concurrencySem:       make(chan struct{}, 5),
	}

	al.activeTurnStates.Store("test-session-order", rootTS)

	// Trigger HardAbort
	err := al.HardAbort("test-session-order")
	if err != nil {
		t.Fatalf("HardAbort failed: %v", err)
	}

	// Verify context was cancelled (Finish() was called)
	select {
	case <-rootTS.ctx.Done():
		// Good - context was cancelled
	default:
		t.Error("expected context to be cancelled after HardAbort")
	}

	// Verify history was rolled back
	finalHistory := sess.GetHistory("")
	if len(finalHistory) != 1 {
		t.Errorf("expected history to rollback to 1 message, got %d", len(finalHistory))
	}

	if finalHistory[0].Content != "initial message" {
		t.Error("history content does not match initial state after rollback")
	}
}

// TestFinishClosesChannel verifies that Finish() closes the pendingResults channel
// and that deliverSubTurnResult handles closed channels gracefully.
func TestFinishClosesChannel(t *testing.T) {
	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()

	ts := &turnState{
		ctx:            ctx,
		cancelFunc:     cancel,
		turnID:         "test-finish-channel",
		depth:          0,
		pendingResults: make(chan *tools.ToolResult, 2),
		isFinished:     false,
	}

	// Verify channel is open initially
	select {
	case ts.pendingResults <- &tools.ToolResult{ForLLM: "test"}:
		// Good - channel is open
		// Drain the message we just sent
		<-ts.pendingResults
	default:
		t.Fatal("channel should be open initially")
	}

	// Call Finish()
	ts.Finish()

	// Verify channel is closed
	_, ok := <-ts.pendingResults
	if ok {
		t.Error("expected channel to be closed after Finish()")
	}

	// Verify Finish() is idempotent (can be called multiple times)
	ts.Finish() // Should not panic

	// Verify deliverSubTurnResult doesn't panic when sending to closed channel
	result := &tools.ToolResult{ForLLM: "late result"}

	// This should not panic - it should recover and emit OrphanResultEvent
	deliverSubTurnResult(ts, "child-1", result)
}

// TestFinalPollCapturesLateResults verifies that the final poll before Finish()
// captures results that arrive after the last iteration poll.
func TestFinalPollCapturesLateResults(t *testing.T) {
	al, _, _, _, cleanup := newTestAgentLoop(t)
	defer cleanup()

	sessionKey := "test-session-final-poll"
	ch := make(chan *tools.ToolResult, 4)

	// Register the channel
	al.registerSubTurnResultChannel(sessionKey, ch)
	defer al.unregisterSubTurnResultChannel(sessionKey)

	// Simulate results arriving after last iteration poll
	ch <- &tools.ToolResult{ForLLM: "result 1"}
	ch <- &tools.ToolResult{ForLLM: "result 2"}

	// Dequeue should capture both results
	results := al.dequeuePendingSubTurnResults(sessionKey)

	if len(results) != 2 {
		t.Errorf("expected 2 results, got %d", len(results))
	}

	// Verify channel is now empty
	results = al.dequeuePendingSubTurnResults(sessionKey)
	if len(results) != 0 {
		t.Errorf("expected 0 results on second poll, got %d", len(results))
	}
}