Files
picoclaw/pkg/pid/pidfile.go
T
程智超0668000959 a90d8d35ee fix(pid): verify process identity in singleton PID check
isProcessRunning() previously only checked whether a PID existed via signal(0)/OpenProcess, without confirming the process was actually picoclaw. When the PID was reused by an unrelated process (e.g., systemd-resolved after a kill -9), the gateway would refuse to start with 'already running'.

Add isPicoclawProcess() that verifies the process name matches picoclaw:
- Unix: reads /proc/<pid>/comm
- Windows: calls QueryFullProcessImageNameW

If the running process is not picoclaw, treat the PID file as stale and proceed with normal startup. Falls back to trusting the liveness check when identity verification is unavailable (e.g., /proc unreadable, API call fails).

Fixes #2720.
2026-06-04 20:04:51 +08:00

218 lines
6.0 KiB
Go

package pid
import (
"crypto/rand"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"os"
"path/filepath"
"sync"
"time"
"github.com/sipeed/picoclaw/pkg/config"
"github.com/sipeed/picoclaw/pkg/logger"
)
const pidFileName = ".picoclaw.pid"
var errInvalidPidFile = errors.New("invalid pid file")
// PidFileData is the JSON structure stored in the PID file.
type PidFileData struct {
PID int `json:"pid"`
Token string `json:"token"`
Version string `json:"version"`
Port int `json:"port"`
Host string `json:"host"`
}
var pidMu sync.Mutex
// pidFilePath returns the absolute path for the PID file given the home directory.
func pidFilePath(homePath string) string {
return filepath.Join(homePath, pidFileName)
}
// generateToken creates a cryptographically random 32-character hex token.
func generateToken() string {
b := make([]byte, 16)
if _, err := rand.Read(b); err != nil {
// Fallback to something pseudo-random if crypto/rand fails
return fmt.Sprintf("%032x", time.Now().UnixNano())
}
return hex.EncodeToString(b)
}
// WritePidFile creates (or overwrites) the PID file atomically.
// It returns an error if another gateway instance appears to be running
// (a valid PID file exists with a live process).
func WritePidFile(homePath, host string, port int) (*PidFileData, error) {
pidMu.Lock()
defer pidMu.Unlock()
pidPath := pidFilePath(homePath)
// Check for existing PID file → singleton enforcement.
if data, err := readPidFileUnlocked(pidPath); err == nil {
if os.Getpid() != data.PID {
logger.Infof("found pid file (PID: %d, version: %s)", data.PID, data.Version)
// PID 1 is typically init/systemd on the host or the entrypoint
// inside a container. When a container stops and leaves behind a
// PID file on a shared volume, the host's PID 1 (init) would
// pass the isProcessRunning check, blocking new gateway starts.
// Treat recorded PID 1 as always stale.
if data.PID != 1 && isProcessRunning(data.PID) {
// Verify the process is actually a picoclaw instance.
// If the PID was reused by an unrelated process
// (e.g. systemd-resolved after a kill -9), treat
// the PID file as stale and proceed with startup.
if isPicoclawProcess(data.PID) {
return nil, fmt.Errorf("gateway is already running (PID: %d, version: %s)", data.PID, data.Version)
}
logger.Warnf("found pid file (PID: %d) but process is not picoclaw", data.PID)
}
logger.Warnf("not running (PID: %d) so will remove the pid file: %s", data.PID, pidPath)
}
// Stale PID file; process no longer exists → clean up.
os.Remove(pidPath)
}
data := &PidFileData{
PID: os.Getpid(),
Version: config.GetVersion(),
Port: port,
Host: host,
}
token := generateToken()
data.Token = token
raw, err := json.MarshalIndent(data, "", " ")
if err != nil {
return nil, fmt.Errorf("failed to marshal pid file: %w", err)
}
// Ensure parent directory exists.
dir := filepath.Dir(pidPath)
if err := os.MkdirAll(dir, 0o755); err != nil {
return nil, fmt.Errorf("failed to create pid directory: %w", err)
}
// Write atomically via temp file + rename.
tmp := pidPath + ".tmp"
if err := os.WriteFile(tmp, raw, 0o600); err != nil {
return nil, fmt.Errorf("failed to write pid file: %w", err)
}
if err := os.Rename(tmp, pidPath); err != nil {
os.Remove(tmp)
return nil, fmt.Errorf("failed to rename pid file: %w", err)
}
logger.Debugf("wrote pid file: %s success", pidPath)
return data, nil
}
// ReadPidFileWithCheck reads the PID file and additionally checks if
// the recorded process is still alive. Returns nil if the file is
// missing, unreadable, or the process has exited.
func ReadPidFileWithCheck(homePath string) *PidFileData {
pidMu.Lock()
defer pidMu.Unlock()
pidPath := pidFilePath(homePath)
data, err := readPidFileUnlocked(pidPath)
if err != nil {
if os.IsNotExist(err) {
return nil
}
if errors.Is(err, errInvalidPidFile) {
logger.Warnf("invalid pid file, remove it: %s (%v)", pidPath, err)
_ = os.Remove(pidPath)
return nil
}
logger.Debugf("failed to read pid file: %s", err)
return nil
}
// Treat PID 1 as stale when we are not PID 1 ourselves (container
// leftover on a shared volume — host PID 1 is init, not gateway).
if data.PID == 1 && os.Getpid() != 1 {
logger.Debugf("stale container PID 1, remove pid file: %s", pidPath)
os.Remove(pidPath)
return nil
}
if !isProcessRunning(data.PID) {
logger.Debugf("process not running, remove pid file: %s", pidPath)
os.Remove(pidPath)
return nil
}
return data
}
// RemovePidFile deletes the PID file (e.g. on graceful shutdown).
func RemovePidFile(homePath string) {
pidMu.Lock()
defer pidMu.Unlock()
pidPath := pidFilePath(homePath)
// Only remove if the PID matches our own process (avoid deleting
// a file that belongs to a newer gateway instance).
if data, err := readPidFileUnlocked(pidPath); err == nil {
if data.PID != os.Getpid() {
return
}
}
logger.Infof("remove pid file: %s", pidPath)
os.Remove(pidPath)
}
// RemovePidFileIfPID deletes the PID file only when the recorded PID matches
// expectedPID. It returns true when the file is removed successfully.
func RemovePidFileIfPID(homePath string, expectedPID int) bool {
if expectedPID <= 0 {
return false
}
pidMu.Lock()
defer pidMu.Unlock()
pidPath := pidFilePath(homePath)
data, err := readPidFileUnlocked(pidPath)
if err != nil {
return false
}
if data.PID != expectedPID {
return false
}
if err := os.Remove(pidPath); err != nil {
return false
}
return true
}
// readPidFileUnlocked reads the PID file without acquiring the lock.
// Caller must hold pidMu.
func readPidFileUnlocked(pidPath string) (*PidFileData, error) {
raw, err := os.ReadFile(pidPath)
if err != nil {
return nil, err
}
var data PidFileData
if err := json.Unmarshal(raw, &data); err != nil {
return nil, fmt.Errorf("%w: %v", errInvalidPidFile, err)
}
// Validate PID is a positive integer.
if data.PID <= 0 {
return nil, fmt.Errorf("%w: pid=%d", errInvalidPidFile, data.PID)
}
return &data, nil
}