package services import ( "math" "os" "testing" "time" ) // TestCallLLM_Live_ScoreConsistencyPlusMinus10 validates SRD_NonFuncReq_0006 / // SRD_QualAssurReq_0001 against the live OpenAI API. // // This test is opt-in to avoid API cost/flakiness in default runs: // RUN_LIVE_OPENAI_TESTS=1 OPENAI_API_KEY=... go test ./internal/services -run TestCallLLM_Live_ScoreConsistencyPlusMinus10 -v func TestCallLLM_Live_ScoreConsistencyPlusMinus10(t *testing.T) { if os.Getenv("RUN_LIVE_OPENAI_TESTS") != "1" { t.Skip("set RUN_LIVE_OPENAI_TESTS=1 to run live OpenAI consistency test") } if os.Getenv("OPENAI_API_KEY") == "" { t.Skip("OPENAI_API_KEY is required for live OpenAI test") } resume := `Senior Software Engineer with 7 years of experience building Go backend services. Led microservice migrations, improved API latency by 35%, and maintained CI/CD pipelines. Experience includes Kubernetes, Docker, PostgreSQL, and cloud deployments on AWS.` job := `We are hiring a Senior Go Backend Engineer with strong API design skills, production Kubernetes experience, and ownership of scalable distributed systems. Candidates should demonstrate measurable impact, collaboration, and code quality.` const runs = 10 scores := make([]int, 0, runs) for i := range runs { result, err := callLLM(resume, job) if err != nil { t.Fatalf("run %d failed: %v", i+1, err) } if result.OverallScore < 0 || result.OverallScore > 100 { t.Fatalf("run %d produced out-of-range score: %d", i+1, result.OverallScore) } scores = append(scores, result.OverallScore) t.Logf("run %d score: %d", i+1, result.OverallScore) if i < runs-1 { time.Sleep(300 * time.Millisecond) } } baseline := scores[0] for i, score := range scores { delta := score - baseline if delta < 0 { delta = -delta } if delta > 10 { t.Fatalf("run %d score %d exceeded +/-10 bound from baseline %d", i+1, score, baseline) } } var sum float64 for _, s := range scores { sum += float64(s) } mean := sum / float64(len(scores)) var variance float64 for _, s := range scores { d := float64(s) - mean variance += d * d } variance /= float64(len(scores)) stddev := math.Sqrt(variance) t.Logf("baseline=%d scores=%v mean=%.2f stddev=%.2f", baseline, scores, mean, stddev) }