fix: treat PID=1 as stale in PID file singleton check, fix govet shadow, add .gitattributes (#2642)

- pid: When a container stops and leaves behind a PID file with PID 1
  on a shared volume, the host's init process (PID 1) passes the
  isProcessRunning check, blocking new gateway starts. Treat recorded
  PID 1 as always stale in both WritePidFile and ReadPidFileWithCheck.
  Added unit tests covering the PID=1 container leftover scenario.

- isolation: Fix govet shadow warning on platform_windows.go line 105
  where := shadows the outer err variable. Changed to = assignment.

- gitattributes: Enforce LF line endings for shell scripts to prevent
  CRLF issues when checking out on Windows (breaks Docker entrypoint).

Co-authored-by: BeaconCat <BeaconCat@users.noreply.github.com>
This commit is contained in:
BeaconCat
2026-04-24 15:26:34 +08:00
committed by GitHub
parent 293477b02a
commit f334ac6d01
5 changed files with 64 additions and 2 deletions
+3
View File
@@ -0,0 +1,3 @@
# Ensure shell scripts always use LF line endings regardless of OS.
*.sh text eol=lf
docker/entrypoint.sh text eol=lf
+6
View File
@@ -12,4 +12,10 @@ if [ ! -d "${HOME}/.picoclaw/workspace" ] && [ ! -f "${HOME}/.picoclaw/config.js
exit 0
fi
# Remove stale PID file from a previous container run.
# After docker kill / OOM / crash the PID file may linger on the bind-mounted
# volume and block the next gateway start (the recorded PID could collide with
# an unrelated process inside the new container).
rm -f "${HOME}/.picoclaw/.picoclaw.pid"
exec picoclaw gateway "$@"
+1 -1
View File
@@ -102,7 +102,7 @@ func postStartPlatformIsolation(cmd *exec.Cmd, isolation config.IsolationConfig,
return fmt.Errorf("open process for job assignment: %w", err)
}
if err := windows.AssignProcessToJobObject(job, proc); err != nil {
if err = windows.AssignProcessToJobObject(job, proc); err != nil {
_ = windows.CloseHandle(proc)
_ = windows.CloseHandle(job)
if resources.token != 0 {
+14 -1
View File
@@ -58,7 +58,12 @@ func WritePidFile(homePath, host string, port int) (*PidFileData, error) {
if data, err := readPidFileUnlocked(pidPath); err == nil {
if os.Getpid() != data.PID {
logger.Infof("found pid file (PID: %d, version: %s)", data.PID, data.Version)
if isProcessRunning(data.PID) {
// PID 1 is typically init/systemd on the host or the entrypoint
// inside a container. When a container stops and leaves behind a
// PID file on a shared volume, the host's PID 1 (init) would
// pass the isProcessRunning check, blocking new gateway starts.
// Treat recorded PID 1 as always stale.
if data.PID != 1 && isProcessRunning(data.PID) {
return nil, fmt.Errorf("gateway is already running (PID: %d, version: %s)", data.PID, data.Version)
}
logger.Warnf("not running (PID: %d) so will remove the pid file: %s", data.PID, pidPath)
@@ -124,6 +129,14 @@ func ReadPidFileWithCheck(homePath string) *PidFileData {
return nil
}
// Treat PID 1 as stale when we are not PID 1 ourselves (container
// leftover on a shared volume — host PID 1 is init, not gateway).
if data.PID == 1 && os.Getpid() != 1 {
logger.Debugf("stale container PID 1, remove pid file: %s", pidPath)
os.Remove(pidPath)
return nil
}
if !isProcessRunning(data.PID) {
logger.Debugf("process not running, remove pid file: %s", pidPath)
os.Remove(pidPath)
+40
View File
@@ -278,6 +278,46 @@ func TestRemovePidFileIfPIDMismatch(t *testing.T) {
}
}
// TestWritePidFileContainerPID1 verifies that a leftover PID file with PID 1
// (typical container entrypoint) is treated as stale and overwritten.
func TestWritePidFileContainerPID1(t *testing.T) {
dir := tmpDir(t)
stale := PidFileData{PID: 1, Token: "deadbeef12345678deadbeef12345678"}
raw, _ := json.MarshalIndent(stale, "", " ")
os.WriteFile(filepath.Join(dir, pidFileName), raw, 0o600)
data, err := WritePidFile(dir, "127.0.0.1", 18790)
if err != nil {
t.Fatalf("WritePidFile should treat PID 1 as stale, got error: %v", err)
}
if data.PID != os.Getpid() {
t.Errorf("PID = %d, want %d", data.PID, os.Getpid())
}
}
// TestReadPidFileWithCheckContainerPID1 verifies that a leftover PID file
// with PID 1 is treated as stale and cleaned up.
func TestReadPidFileWithCheckContainerPID1(t *testing.T) {
if os.Getpid() == 1 {
t.Skip("test not meaningful when running as PID 1")
}
dir := tmpDir(t)
stale := PidFileData{PID: 1, Token: "deadbeef12345678deadbeef12345678"}
raw, _ := json.MarshalIndent(stale, "", " ")
os.WriteFile(filepath.Join(dir, pidFileName), raw, 0o600)
data := ReadPidFileWithCheck(dir)
if data != nil {
t.Error("expected nil for PID 1 leftover")
}
if _, err := os.Stat(filepath.Join(dir, pidFileName)); !os.IsNotExist(err) {
t.Error("PID 1 leftover file should be removed")
}
}
// TestReadPidFileUnlockedInvalidJSON returns error for malformed content.
func TestReadPidFileUnlockedInvalidJSON(t *testing.T) {
dir := tmpDir(t)