ResumeLens/internal/services/analyzer_live_test.go

package services

import (
	"math"
	"os"
	"testing"
	"time"
)

// TestCallLLM_Live_ScoreConsistencyPlusMinus10 validates SRD_NonFuncReq_0006 /
// SRD_QualAssurReq_0001 against the live OpenAI API.
//
// This test is opt-in to avoid API cost/flakiness in default runs:
// RUN_LIVE_OPENAI_TESTS=1 OPENAI_API_KEY=... go test ./internal/services -run TestCallLLM_Live_ScoreConsistencyPlusMinus10 -v
func TestCallLLM_Live_ScoreConsistencyPlusMinus10(t *testing.T) {
	if os.Getenv("RUN_LIVE_OPENAI_TESTS") != "1" {
		t.Skip("set RUN_LIVE_OPENAI_TESTS=1 to run live OpenAI consistency test")
	}
	if os.Getenv("OPENAI_API_KEY") == "" {
		t.Skip("OPENAI_API_KEY is required for live OpenAI test")
	}

	resume := `Senior Software Engineer with 7 years of experience building Go backend services.
Led microservice migrations, improved API latency by 35%, and maintained CI/CD pipelines.
Experience includes Kubernetes, Docker, PostgreSQL, and cloud deployments on AWS.`

	job := `We are hiring a Senior Go Backend Engineer with strong API design skills,
production Kubernetes experience, and ownership of scalable distributed systems.
Candidates should demonstrate measurable impact, collaboration, and code quality.`

	const runs = 10
	scores := make([]int, 0, runs)

	for i := range runs {
		result, err := callLLM(resume, job)
		if err != nil {
			t.Fatalf("run %d failed: %v", i+1, err)
		}
		if result.OverallScore < 0 || result.OverallScore > 100 {
			t.Fatalf("run %d produced out-of-range score: %d", i+1, result.OverallScore)
		}

		scores = append(scores, result.OverallScore)
		t.Logf("run %d score: %d", i+1, result.OverallScore)

		if i < runs-1 {
			time.Sleep(300 * time.Millisecond)
		}
	}

	baseline := scores[0]
	for i, score := range scores {
		delta := score - baseline
		if delta < 0 {
			delta = -delta
		}
		if delta > 10 {
			t.Fatalf("run %d score %d exceeded +/-10 bound from baseline %d", i+1, score, baseline)
		}
	}

	var sum float64
	for _, s := range scores {
		sum += float64(s)
	}
	mean := sum / float64(len(scores))

	var variance float64
	for _, s := range scores {
		d := float64(s) - mean
		variance += d * d
	}
	variance /= float64(len(scores))
	stddev := math.Sqrt(variance)

	t.Logf("baseline=%d scores=%v mean=%.2f stddev=%.2f", baseline, scores, mean, stddev)
}