diff --git a/doc/test-plan.md b/doc/test-plan.md index aa0d2c5..5b07997 100644 --- a/doc/test-plan.md +++ b/doc/test-plan.md @@ -1341,6 +1341,7 @@ _Document results here as tests are completed_ | 2.3.x | ✅ PASSED | 2026-04-07 | OpenCode | 2-minute timeout behavior and network failure handling validated | | 2.4.x | ✅ PASSED | 2026-04-07 | OpenCode | JSON parsing success/failure/empty choices/missing fields covered | | 2.5.x | ✅ PASSED | 2026-04-07 | OpenCode | Upstream 500/429 and malformed content error handling validated | +| 2.x live consistency | ⏭️ READY (manual run) | 2026-04-07 | OpenCode | Added opt-in live test `TestCallLLM_Live_ScoreConsistencyPlusMinus10` gated by `RUN_LIVE_OPENAI_TESTS=1` | | 3.1.x | ✅ PASSED | 2026-04-07 | OpenCode | Overall score range and consistency scenarios covered with deterministic mocks | | 3.2.x | ✅ PASSED | 2026-04-07 | OpenCode | Criteria population, score bounds, evidence/comments presence verified | | 3.3.x | ✅ PASSED | 2026-04-07 | OpenCode | Strengths/weaknesses/missing information structure checks validated | @@ -1397,7 +1398,7 @@ _Document any test failures here with details_ **Next Steps:** 1. Add CI step to run `go test ./...` before image build/push -2. Tighten manual validation notes for production-like OpenAI calls +2. Run opt-in live consistency test (`RUN_LIVE_OPENAI_TESTS=1`) and log observed score distribution 3. Backfill health-check endpoint or document non-AI endpoint strategy 4. Keep the intentional 1.2.6 skip documented until encrypted-PDF fixtures are added diff --git a/internal/services/analyzer_live_test.go b/internal/services/analyzer_live_test.go new file mode 100644 index 0000000..684a0ec --- /dev/null +++ b/internal/services/analyzer_live_test.go @@ -0,0 +1,77 @@ +package services + +import ( + "math" + "os" + "testing" + "time" +) + +// TestCallLLM_Live_ScoreConsistencyPlusMinus10 validates SRD_NonFuncReq_0006 / +// SRD_QualAssurReq_0001 against the live OpenAI API. +// +// This test is opt-in to avoid API cost/flakiness in default runs: +// RUN_LIVE_OPENAI_TESTS=1 OPENAI_API_KEY=... go test ./internal/services -run TestCallLLM_Live_ScoreConsistencyPlusMinus10 -v +func TestCallLLM_Live_ScoreConsistencyPlusMinus10(t *testing.T) { + if os.Getenv("RUN_LIVE_OPENAI_TESTS") != "1" { + t.Skip("set RUN_LIVE_OPENAI_TESTS=1 to run live OpenAI consistency test") + } + if os.Getenv("OPENAI_API_KEY") == "" { + t.Skip("OPENAI_API_KEY is required for live OpenAI test") + } + + resume := `Senior Software Engineer with 7 years of experience building Go backend services. +Led microservice migrations, improved API latency by 35%, and maintained CI/CD pipelines. +Experience includes Kubernetes, Docker, PostgreSQL, and cloud deployments on AWS.` + + job := `We are hiring a Senior Go Backend Engineer with strong API design skills, +production Kubernetes experience, and ownership of scalable distributed systems. +Candidates should demonstrate measurable impact, collaboration, and code quality.` + + const runs = 10 + scores := make([]int, 0, runs) + + for i := range runs { + result, err := callLLM(resume, job) + if err != nil { + t.Fatalf("run %d failed: %v", i+1, err) + } + if result.OverallScore < 0 || result.OverallScore > 100 { + t.Fatalf("run %d produced out-of-range score: %d", i+1, result.OverallScore) + } + + scores = append(scores, result.OverallScore) + t.Logf("run %d score: %d", i+1, result.OverallScore) + + if i < runs-1 { + time.Sleep(300 * time.Millisecond) + } + } + + baseline := scores[0] + for i, score := range scores { + delta := score - baseline + if delta < 0 { + delta = -delta + } + if delta > 10 { + t.Fatalf("run %d score %d exceeded +/-10 bound from baseline %d", i+1, score, baseline) + } + } + + var sum float64 + for _, s := range scores { + sum += float64(s) + } + mean := sum / float64(len(scores)) + + var variance float64 + for _, s := range scores { + d := float64(s) - mean + variance += d * d + } + variance /= float64(len(scores)) + stddev := math.Sqrt(variance) + + t.Logf("baseline=%d scores=%v mean=%.2f stddev=%.2f", baseline, scores, mean, stddev) +}