diff --git a/doc/lifecycle-roadmap.md b/doc/lifecycle-roadmap.md new file mode 100644 index 0000000..254c7d5 --- /dev/null +++ b/doc/lifecycle-roadmap.md @@ -0,0 +1,126 @@ +# Process and Session Lifecycle Roadmap + +##### Generated via AI on April 14th, 2026 + +## Why this exists + +The current implementation now has a minimum-correct ownership model: + +- `Session` owns both the proxy server and spawned process. +- `Session.Stop()` directly requests process shutdown and proxy shutdown. +- process exit status is emitted for both success (`ExitCode=0`) and failures. + +This document captures the next iteration so lifecycle behavior remains predictable as the project grows. + +## Current baseline (after the minimal fix) + +- Process startup and waiting are split: + - startup returns a process handle immediately + - waiting happens in a background goroutine +- Unix signaling still targets process groups (`Setpgid` + negative PID) with TERM -> KILL escalation. +- Proxy shutdown is called explicitly by session stop. +- `http.ErrServerClosed` is treated as normal during proxy shutdown. + +## Known limitations we should address next + +1. Exit reason ambiguity + - We emit `ProcessSignaled` when stopping and `ProcessExited` when wait returns, but we do not explicitly encode whether exit was natural, requested by user, or forced by kill escalation. + +2. Platform parity + - Non-Unix builds only signal the direct process and cannot reliably kill full process trees. + +3. Shutdown ordering is not coordinated + - process stop and proxy shutdown are both requested, but there is no single orchestrated timeout policy across the whole session. + +4. Session completion is implicit + - no explicit "session done" event or `Wait()` API to know when all workers have quiesced. + +5. Message channel lifecycle + - channel is currently long-lived and not explicitly closed; this is safe for current flow, but not ideal for future composition/testing. + +## Proposed future design + +### 1) Introduce lifecycle controllers + +Add small controller types with clear contracts: + +- `ProcessController` + - `Start(cmd, env) error` + - `Stop(ctx) error` (TERM then KILL by deadline) + - `Wait() ProcessResult` +- `ProxyController` + - `Start(listener) error` + - `Stop(ctx) error` + - `Wait() error` + +Reasoning: + +- encapsulates resource ownership and synchronization +- avoids session-level ad hoc goroutines +- easier to unit-test + +### 2) Move to context-driven shutdown + +Use a parent context for a session and cancellation for coordinated stop. + +Reasoning: + +- one source of truth for shutdown intent +- natural propagation to future subcomponents +- easier timeout management than cross-goroutine signal channels + +### 3) Add explicit process exit metadata + +Define fields such as: + +- `ExitReason`: `natural`, `signal`, `forced_kill`, `start_failed`, `runtime_error` +- `Signal`: optional signal value when applicable + +Reasoning: + +- accurate UI and logs +- better postmortem behavior for flaky commands + +### 4) Add session-level graceful stop policy + +Implement deterministic sequence with deadlines, for example: + +1. request process graceful stop +2. wait up to `X` for process completion +3. force kill process group if needed +4. shutdown proxy with timeout `Y` +5. wait for goroutines/controllers to finish +6. emit `SessionStopped` + +Reasoning: + +- easier reasoning about final state +- avoids races between TUI exit and backend teardown + +### 5) Improve non-Unix behavior + +If Windows support becomes a requirement, evaluate job objects or equivalent process-tree control. + +Reasoning: + +- current direct-process signaling can leak descendants +- parity with Unix lifecycle expectations + +## Implementation notes for the next pass + +- Keep `internal/process/signal_unix.go` logic as the Unix baseline. +- Rename `Destory` to `Destroy` with a compatibility shim or bulk rename. +- Handle `net.ErrClosed` and `http.ErrServerClosed` as expected proxy shutdown outcomes. +- Add targeted tests: + - clean exit (`ExitCode=0`) + - non-zero exit + - TERM then forced KILL + - spawned child process cleanup on Unix process groups + - proxy shutdown does not emit fatal on normal close + +## Suggested milestones + +1. Controller extraction without behavior change +2. Exit reason metadata and event model updates +3. Context-based coordinated stop +4. Platform-specific process-tree improvements (if needed) diff --git a/internal/app/process.go b/internal/app/process.go index 0b7a1bd..42ef765 100644 --- a/internal/app/process.go +++ b/internal/app/process.go @@ -3,7 +3,6 @@ package app import ( "errors" "fmt" - "os" "os/exec" "syscall" "time" @@ -12,7 +11,7 @@ import ( "termtap.dev/internal/process" ) -func StartProcess(cmd model.Command, addr string, ch chan<- model.Message, sigCh <-chan os.Signal) { +func StartProcess(cmd model.Command, addr string, ch chan<- model.Message) (*model.Process, error) { ch <- model.Message{ Type: model.MessageTypeProcessStarting, Body: fmt.Sprintf("spawning process '%s'", process.CommandString(cmd)), @@ -21,43 +20,46 @@ func StartProcess(cmd model.Command, addr string, ch chan<- model.Message, sigCh proc := process.NewProcess(cmd, addr, ch) if err := proc.Exec.Start(); err != nil { - ch <- model.Message{ - Type: model.MessageTypeProcessExited, - Body: fmt.Sprintf("%q", err), - } - return + return nil, fmt.Errorf("start process: %w", err) } process.UpdateStatus(proc, true, ch) - // Listen for SIGTERM from main process + go waitForProcessExit(proc, ch) + + return proc, nil +} + +func StopProcess(proc *model.Process, ch chan<- model.Message, sig syscall.Signal) { + if proc == nil || proc.Exec == nil || proc.Exec.Process == nil { + return + } + + ch <- model.Message{ + Type: model.MessageTypeProcessSignaled, + Body: fmt.Sprintf("process with pid '%d' is being killed", proc.Exec.Process.Pid), + PID: proc.Exec.Process.Pid, + } + + _ = process.SignalProcess(proc.Exec, sig) + go func() { - sig := <-sigCh - - ch <- model.Message{ - Type: model.MessageTypeProcessSignaled, - Body: fmt.Sprintf("process with pid '%d' is being killed", proc.Exec.Process.Pid), - PID: proc.Exec.Process.Pid, - } - - if proc.Exec != nil { - _ = process.SignalProcess(proc.Exec, sig) - - go func() { - time.Sleep(1500 * time.Millisecond) - if process.ProcessAlive(proc.Exec) { - _ = process.SignalProcess(proc.Exec, syscall.SIGKILL) - } - }() - - process.UpdateStatus(proc, false, ch) + time.Sleep(1500 * time.Millisecond) + if process.ProcessAlive(proc.Exec) { + _ = process.SignalProcess(proc.Exec, syscall.SIGKILL) } }() +} + +func waitForProcessExit(proc *model.Process, ch chan<- model.Message) { + if proc == nil || proc.Exec == nil { + return + } if err := proc.Exec.Wait(); err != nil { if exitErr, ok := errors.AsType[*exec.ExitError](err); ok { ch <- model.Message{ Type: model.MessageTypeProcessExited, - Body: fmt.Sprintf("process pid '%d' exited by itself", proc.Exec.Process.Pid), + Body: fmt.Sprintf("process pid '%d' exited", proc.Exec.Process.Pid), PID: proc.Exec.Process.Pid, ExitCode: exitErr.ExitCode(), } @@ -73,4 +75,11 @@ func StartProcess(cmd model.Command, addr string, ch chan<- model.Message, sigCh return } + ch <- model.Message{ + Type: model.MessageTypeProcessExited, + Body: fmt.Sprintf("process pid '%d' exited", proc.Exec.Process.Pid), + PID: proc.Exec.Process.Pid, + ExitCode: 0, + } + process.UpdateStatus(proc, false, ch) } diff --git a/internal/app/proxy.go b/internal/app/proxy.go index 002be92..867fd52 100644 --- a/internal/app/proxy.go +++ b/internal/app/proxy.go @@ -1,32 +1,31 @@ package app import ( + "errors" "fmt" + "net/http" "termtap.dev/internal/model" - "termtap.dev/internal/proxy" ) -func StartProxy(addr string, ch chan<- model.Message) { - ps, err := proxy.NewProxyServer(addr, ch) - if err != nil { - ch <- model.Message{ - Type: model.MessageTypeFatal, - Body: fmt.Sprintf("%q", err), - } +func StartProxy(ps *model.ProxyServer, ch chan<- model.Message) { + if ps == nil || ps.Server == nil || ps.Listener == nil { return } - defer proxy.Destory(ps, ch) ch <- model.Message{ Type: model.MessageTypeProxyStarting, - Body: fmt.Sprintf("proxy server started on %s", addr), + Body: fmt.Sprintf("proxy server started on %s", (*ps.Listener).Addr().String()), } if err := ps.Server.Serve(*ps.Listener); err != nil { + if errors.Is(err, http.ErrServerClosed) { + return + } + ch <- model.Message{ Type: model.MessageTypeFatal, - Body: fmt.Sprintf("%q", err), + Body: fmt.Sprintf("fatal error in proxy server: %q", err), } return } diff --git a/internal/app/session.go b/internal/app/session.go index 415b967..ed45c95 100644 --- a/internal/app/session.go +++ b/internal/app/session.go @@ -1,32 +1,43 @@ package app import ( - "os" - "os/signal" "sync" "syscall" "termtap.dev/internal/model" + "termtap.dev/internal/proxy" ) type Session struct { Messages <-chan model.Message - sigCh chan os.Signal + msgCh chan model.Message + proxy *model.ProxyServer + proc *model.Process stopOnce sync.Once } func StartSession(cmd model.Command, addr string) (*Session, error) { msgs := make(chan model.Message, 256) - sigCh := make(chan os.Signal, 1) - signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM) - go StartProxy(addr, msgs) - go StartProcess(cmd, addr, msgs, sigCh) + ps, err := proxy.NewProxyServer(addr, msgs) + if err != nil { + return nil, err + } + + go StartProxy(ps, msgs) + + proc, err := StartProcess(cmd, addr, msgs) + if err != nil { + proxy.Destory(ps, msgs) + return nil, err + } return &Session{ Messages: msgs, - sigCh: sigCh, + msgCh: msgs, + proxy: ps, + proc: proc, }, nil } @@ -36,11 +47,7 @@ func (s *Session) Stop() { } s.stopOnce.Do(func() { - signal.Stop(s.sigCh) - - select { - case s.sigCh <- syscall.SIGTERM: - default: - } + StopProcess(s.proc, s.msgCh, syscall.SIGTERM) + proxy.Destory(s.proxy, s.msgCh) }) } diff --git a/internal/tui/view.go b/internal/tui/view.go index 9d1cd27..0f2c509 100644 --- a/internal/tui/view.go +++ b/internal/tui/view.go @@ -7,6 +7,7 @@ import ( "termtap.dev/internal/model" ) +// TODO: This is all temporary func (m Model) View() string { eventLines := m.renderEvents(8) requestLines := m.renderRequests(12) @@ -29,10 +30,7 @@ func (m Model) renderEvents(limit int) string { return " (none yet)" } - start := len(m.events) - limit - if start < 0 { - start = 0 - } + start := max(len(m.events)-limit, 0) rows := make([]string, 0, len(m.events)-start) for i := start; i < len(m.events); i++ {