diff --git a/conductor/archive/fix_rate_limiter_lifecycle_20260306/index.md b/conductor/archive/fix_rate_limiter_lifecycle_20260306/index.md new file mode 100644 index 0000000..a997fd9 --- /dev/null +++ b/conductor/archive/fix_rate_limiter_lifecycle_20260306/index.md @@ -0,0 +1,5 @@ +# Track fix_rate_limiter_lifecycle_20260306 Context + +- [Specification](./spec.md) +- [Implementation Plan](./plan.md) +- [Metadata](./metadata.json) diff --git a/conductor/archive/fix_rate_limiter_lifecycle_20260306/metadata.json b/conductor/archive/fix_rate_limiter_lifecycle_20260306/metadata.json new file mode 100644 index 0000000..12b5fc6 --- /dev/null +++ b/conductor/archive/fix_rate_limiter_lifecycle_20260306/metadata.json @@ -0,0 +1,8 @@ +{ + "track_id": "fix_rate_limiter_lifecycle_20260306", + "type": "bug", + "status": "new", + "created_at": "2026-03-06T10:00:00Z", + "updated_at": "2026-03-06T10:00:00Z", + "description": "Fix Rate Limiter Goroutine Lifecycle" +} diff --git a/conductor/archive/fix_rate_limiter_lifecycle_20260306/plan.md b/conductor/archive/fix_rate_limiter_lifecycle_20260306/plan.md new file mode 100644 index 0000000..bc2a657 --- /dev/null +++ b/conductor/archive/fix_rate_limiter_lifecycle_20260306/plan.md @@ -0,0 +1,32 @@ +# Implementation Plan: Fix Rate Limiter Goroutine Lifecycle + +## Phase 1: Research and Infrastructure [checkpoint: b7b5bbd] +- [x] Task: Identify the rate limiter implementation and initialization points b7b5bbd + - [x] Locate the rate limiter package in `internal/` + - [x] Find all call sites where the rate limiter is initialized +- [x] Task: Identify existing tests and reproduction steps b7b5bbd + - [x] Locate existing unit tests for the rate limiter + - [x] Confirm how the cleanup goroutine is currently started +- [x] Task: Conductor - User Manual Verification 'Research and Infrastructure' (Protocol in workflow.md) b7b5bbd + +## Phase 2: Implementation (TDD) [checkpoint: b9177d8] +- [x] Task: Write failing test for goroutine leak c072dd9 + - [x] Create a test case that initializes the rate limiter and then cancels the context + - [x] Verify that the cleanup goroutine persists after cancellation (failing state) +- [x] Task: Update rate limiter initialization to accept context 2b5cbc2 + - [x] Modify the signature of the initialization function to include `context.Context` +- [x] Task: Update cleanup goroutine to use context 2b5cbc2 + - [x] Modify the cleanup loop to listen for `ctx.Done()` +- [x] Task: Update call sites for rate limiter initialization 2b5cbc2 + - [x] Update all middleware and/or `main.go` call sites to pass the appropriate context +- [x] Task: Verify fix with automated tests 2b5cbc2 + - [x] Run the failing test and ensure it now passes + - [x] Run the full test suite to ensure no regressions +- [x] Task: Conductor - User Manual Verification 'Implementation' (Protocol in workflow.md) b9177d8 + +## Phase 3: Final Verification and Cleanup [checkpoint: 9b2011d] +- [x] Task: Run all tests and verify no regressions 2b5cbc2 + - [x] Execute `make test` and `make test-all` +- [x] Task: Verify code coverage for new changes 2b5cbc2 + - [x] Ensure new code has >80% coverage +- [x] Task: Conductor - User Manual Verification 'Final Verification and Cleanup' (Protocol in workflow.md) 9b2011d diff --git a/conductor/archive/fix_rate_limiter_lifecycle_20260306/spec.md b/conductor/archive/fix_rate_limiter_lifecycle_20260306/spec.md new file mode 100644 index 0000000..4e4d5bd --- /dev/null +++ b/conductor/archive/fix_rate_limiter_lifecycle_20260306/spec.md @@ -0,0 +1,24 @@ +# Specification: Fix Rate Limiter Goroutine Lifecycle + +## Overview +Fix the rate limiter goroutine lifecycle to prevent resource leaks during server shutdown. Currently, the cleanup goroutine uses `context.Background()`, which prevents it from being cancelled when the application stops. + +## Functional Requirements +- **Context Acceptance:** The rate limiter initialization must accept a `context.Context` parameter. +- **Graceful Shutdown:** The cleanup goroutine responsible for purging expired rate limit entries must use the provided context to stop its execution when the context is cancelled. +- **Initialization Update:** All call sites that initialize the rate limiter must be updated to provide a valid application context (typically derived from the main server context). + +## Non-Functional Requirements +- **Performance:** The fix should not introduce any measurable overhead to the rate limiting logic or hot paths. +- **Reliability:** The rate limiter should continue to function correctly even if the context is cancelled (i.e., it should fail-safe or stop gracefully). + +## Acceptance Criteria +- [ ] The rate limiter's `New` or initialization function accepts `context.Context`. +- [ ] The cleanup goroutine correctly stops when the context is cancelled. +- [ ] Automated tests confirm that the goroutine is cleaned up (e.g., using `goleak` or similar verification). +- [ ] All existing tests pass. + +## Out of Scope +- Refactoring the rate limiting algorithm or storage backend. +- Adding new rate limiting features (e.g., per-user vs per-IP changes). +- Changes to other middleware or handlers unless directly related to initializing the rate limiter. diff --git a/conductor/tech-stack.md b/conductor/tech-stack.md index 29392be..0a76070 100644 --- a/conductor/tech-stack.md +++ b/conductor/tech-stack.md @@ -14,6 +14,7 @@ - **Envelope Encryption:** [gocloud.dev/secrets](https://gocloud.dev/howto/secrets/) - Abstracted access to various KMS providers for root-of-trust encryption. - **Password Hashing:** [go-pwdhash](https://github.com/allisson/go-pwdhash) - Argon2id hashing for secure storage of client secrets and passwords. - **Request Body Size Limiting:** Middleware to prevent DoS attacks from large payloads. +- **Rate Limiting:** Per-client and per-IP rate limiting middleware for DoS protection and API abuse prevention. - **Secret Value Size Limiting:** Global limit on individual secret values to ensure predictable storage and memory usage. - **Strict Capability Validation:** Centralized domain helpers for validating policy capabilities (`read`, `write`, `delete`, `encrypt`, `decrypt`, `rotate`) in CLI and API layers. - **Secret Path Validation:** Strict naming rules for secret paths (alphanumeric, -, _, /) to ensure consistency and security. diff --git a/conductor/tracks.md b/conductor/tracks.md index 0b5c54e..22dc03b 100644 --- a/conductor/tracks.md +++ b/conductor/tracks.md @@ -2,4 +2,4 @@ This file tracks all major tracks for the project. Each track has its own detailed plan in its respective folder. ---- + diff --git a/go.mod b/go.mod index 11044c5..46db6e0 100644 --- a/go.mod +++ b/go.mod @@ -23,6 +23,7 @@ require ( go.opentelemetry.io/otel/metric v1.41.0 go.opentelemetry.io/otel/sdk v1.41.0 go.opentelemetry.io/otel/sdk/metric v1.41.0 + go.uber.org/goleak v1.3.0 gocloud.dev v0.45.0 gocloud.dev/secrets/hashivault v0.45.0 golang.org/x/crypto v0.48.0 diff --git a/internal/app/di.go b/internal/app/di.go index e911c66..ae89ae9 100644 --- a/internal/app/di.go +++ b/internal/app/di.go @@ -469,6 +469,7 @@ func (c *Container) initHTTPServer(ctx context.Context) (*http.Server, error) { // Setup router with dependencies server.SetupRouter( + ctx, c.config, clientHandler, tokenHandler, diff --git a/internal/auth/http/rate_limit_leak_test.go b/internal/auth/http/rate_limit_leak_test.go new file mode 100644 index 0000000..9aafb30 --- /dev/null +++ b/internal/auth/http/rate_limit_leak_test.go @@ -0,0 +1,39 @@ +package http + +import ( + "context" + "log/slog" + "testing" + + "go.uber.org/goleak" +) + +func TestRateLimitMiddleware_GoroutineLeak(t *testing.T) { + // Ensure no goroutines are leaking after the test + defer goleak.VerifyNone(t) + + // Create a cancellable context + ctx, cancel := context.WithCancel(context.Background()) + + // Create middleware (this starts the goroutine) + logger := slog.Default() + _ = RateLimitMiddleware(ctx, 10.0, 20, logger) + + // Cancel the context - this should stop the goroutine + cancel() +} + +func TestTokenRateLimitMiddleware_GoroutineLeak(t *testing.T) { + // Ensure no goroutines are leaking after the test + defer goleak.VerifyNone(t) + + // Create a cancellable context + ctx, cancel := context.WithCancel(context.Background()) + + // Create middleware (this starts the goroutine) + logger := slog.Default() + _ = TokenRateLimitMiddleware(ctx, 10.0, 20, logger) + + // Cancel the context - this should stop the goroutine + cancel() +} diff --git a/internal/auth/http/rate_limit_middleware.go b/internal/auth/http/rate_limit_middleware.go index e9ddbf1..808aa17 100644 --- a/internal/auth/http/rate_limit_middleware.go +++ b/internal/auth/http/rate_limit_middleware.go @@ -38,20 +38,21 @@ type rateLimiterEntry struct { // rate limiter based on their client ID. // // Configuration: +// - ctx: Application context for cleanup goroutine // - rps: Requests per second allowed per client // - burst: Maximum burst capacity for temporary spikes // // Returns: // - 429 Too Many Requests: Rate limit exceeded (includes Retry-After header) // - Continues: Request allowed within rate limit -func RateLimitMiddleware(rps float64, burst int, logger *slog.Logger) gin.HandlerFunc { +func RateLimitMiddleware(ctx context.Context, rps float64, burst int, logger *slog.Logger) gin.HandlerFunc { store := &rateLimiterStore{ rps: rps, burst: burst, } // Start cleanup goroutine for stale limiters (every 5 minutes) - go store.cleanupStale(context.Background(), 5*time.Minute) + go store.cleanupStale(ctx, 5*time.Minute) return func(c *gin.Context) { // Get authenticated client from context diff --git a/internal/auth/http/rate_limit_middleware_test.go b/internal/auth/http/rate_limit_middleware_test.go index 6ead654..be9e38a 100644 --- a/internal/auth/http/rate_limit_middleware_test.go +++ b/internal/auth/http/rate_limit_middleware_test.go @@ -1,6 +1,7 @@ package http import ( + "context" "log/slog" "net/http" "net/http/httptest" @@ -25,7 +26,7 @@ func TestRateLimitMiddleware_AllowsRequestsWithinLimit(t *testing.T) { // Create middleware with generous limits logger := slog.Default() - middleware := RateLimitMiddleware(10.0, 20, logger) + middleware := RateLimitMiddleware(context.Background(), 10.0, 20, logger) // Create test router router := gin.New() @@ -61,7 +62,7 @@ func TestRateLimitMiddleware_BlocksRequestsExceedingLimit(t *testing.T) { // Create middleware with very low limits logger := slog.Default() - middleware := RateLimitMiddleware(1.0, 2, logger) + middleware := RateLimitMiddleware(context.Background(), 1.0, 2, logger) // Create test router router := gin.New() @@ -101,7 +102,7 @@ func TestRateLimitMiddleware_Returns429WithRetryAfterHeader(t *testing.T) { } logger := slog.Default() - middleware := RateLimitMiddleware(0.5, 1, logger) + middleware := RateLimitMiddleware(context.Background(), 0.5, 1, logger) router := gin.New() router.Use(func(c *gin.Context) { @@ -143,7 +144,7 @@ func TestRateLimitMiddleware_IndependentLimitsPerClient(t *testing.T) { } logger := slog.Default() - middleware := RateLimitMiddleware(1.0, 1, logger) + middleware := RateLimitMiddleware(context.Background(), 1.0, 1, logger) router := gin.New() router.Use(middleware) @@ -187,7 +188,7 @@ func TestRateLimitMiddleware_BurstCapacityWorks(t *testing.T) { logger := slog.Default() // Low rate but higher burst - middleware := RateLimitMiddleware(1.0, 5, logger) + middleware := RateLimitMiddleware(context.Background(), 1.0, 5, logger) router := gin.New() router.Use(func(c *gin.Context) { @@ -219,7 +220,7 @@ func TestRateLimitMiddleware_RequiresAuthentication(t *testing.T) { gin.SetMode(gin.TestMode) logger := slog.Default() - middleware := RateLimitMiddleware(10.0, 20, logger) + middleware := RateLimitMiddleware(context.Background(), 10.0, 20, logger) router := gin.New() router.Use(middleware) diff --git a/internal/auth/http/token_rate_limit_middleware.go b/internal/auth/http/token_rate_limit_middleware.go index b7090c7..1ad95a6 100644 --- a/internal/auth/http/token_rate_limit_middleware.go +++ b/internal/auth/http/token_rate_limit_middleware.go @@ -39,20 +39,26 @@ type tokenRateLimiterEntry struct { // - Direct connection remote address // // Configuration: +// - ctx: Application context for cleanup goroutine // - rps: Requests per second allowed per IP address // - burst: Maximum burst capacity for temporary spikes // // Returns: // - 429 Too Many Requests: Rate limit exceeded (includes Retry-After header) // - Continues: Request allowed within rate limit -func TokenRateLimitMiddleware(rps float64, burst int, logger *slog.Logger) gin.HandlerFunc { +func TokenRateLimitMiddleware( + ctx context.Context, + rps float64, + burst int, + logger *slog.Logger, +) gin.HandlerFunc { store := &tokenRateLimiterStore{ rps: rps, burst: burst, } // Start cleanup goroutine for stale limiters (every 5 minutes) - go store.cleanupStale(context.Background(), 5*time.Minute) + go store.cleanupStale(ctx, 5*time.Minute) return func(c *gin.Context) { // Get client IP address diff --git a/internal/auth/http/token_rate_limit_middleware_test.go b/internal/auth/http/token_rate_limit_middleware_test.go index 5fe9b7d..e213cc3 100644 --- a/internal/auth/http/token_rate_limit_middleware_test.go +++ b/internal/auth/http/token_rate_limit_middleware_test.go @@ -1,6 +1,7 @@ package http import ( + "context" "log/slog" "net/http" "net/http/httptest" @@ -16,7 +17,7 @@ func TestTokenRateLimitMiddleware_AllowsRequestsWithinLimit(t *testing.T) { // Create middleware with generous limits logger := slog.Default() - middleware := TokenRateLimitMiddleware(10.0, 20, logger) + middleware := TokenRateLimitMiddleware(context.Background(), 10.0, 20, logger) // Create test router router := gin.New() @@ -40,7 +41,7 @@ func TestTokenRateLimitMiddleware_BlocksRequestsExceedingLimit(t *testing.T) { // Create middleware with very low limits logger := slog.Default() - middleware := TokenRateLimitMiddleware(1.0, 2, logger) + middleware := TokenRateLimitMiddleware(context.Background(), 1.0, 2, logger) // Create test router router := gin.New() @@ -70,7 +71,7 @@ func TestTokenRateLimitMiddleware_Returns429WithRetryAfterHeader(t *testing.T) { gin.SetMode(gin.TestMode) logger := slog.Default() - middleware := TokenRateLimitMiddleware(0.5, 1, logger) + middleware := TokenRateLimitMiddleware(context.Background(), 0.5, 1, logger) router := gin.New() router.Use(middleware) @@ -101,7 +102,7 @@ func TestTokenRateLimitMiddleware_IndependentLimitsPerIP(t *testing.T) { gin.SetMode(gin.TestMode) logger := slog.Default() - middleware := TokenRateLimitMiddleware(1.0, 1, logger) + middleware := TokenRateLimitMiddleware(context.Background(), 1.0, 1, logger) router := gin.New() router.Use(middleware) @@ -136,7 +137,7 @@ func TestTokenRateLimitMiddleware_BurstCapacityWorks(t *testing.T) { logger := slog.Default() // Low rate but higher burst - middleware := TokenRateLimitMiddleware(1.0, 5, logger) + middleware := TokenRateLimitMiddleware(context.Background(), 1.0, 5, logger) router := gin.New() router.Use(middleware) @@ -163,7 +164,7 @@ func TestTokenRateLimitMiddleware_NoAuthenticationRequired(t *testing.T) { gin.SetMode(gin.TestMode) logger := slog.Default() - middleware := TokenRateLimitMiddleware(10.0, 20, logger) + middleware := TokenRateLimitMiddleware(context.Background(), 10.0, 20, logger) router := gin.New() router.Use(middleware) @@ -226,7 +227,7 @@ func TestTokenRateLimitMiddleware_HandlesXForwardedFor(t *testing.T) { gin.SetMode(gin.TestMode) logger := slog.Default() - middleware := TokenRateLimitMiddleware(1.0, 1, logger) + middleware := TokenRateLimitMiddleware(context.Background(), 1.0, 1, logger) router := gin.New() router.Use(middleware) @@ -292,7 +293,7 @@ func TestTokenRateLimitMiddleware_RespectsConfiguredLimits(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { logger := slog.Default() - middleware := TokenRateLimitMiddleware(tt.rps, tt.burst, logger) + middleware := TokenRateLimitMiddleware(context.Background(), tt.rps, tt.burst, logger) router := gin.New() router.Use(middleware) diff --git a/internal/http/server.go b/internal/http/server.go index afe5450..ce02d47 100644 --- a/internal/http/server.go +++ b/internal/http/server.go @@ -66,6 +66,7 @@ func NewServer( // SetupRouter configures the Gin router with all routes and middleware. // This method is called during server initialization with all required dependencies. func (s *Server) SetupRouter( + ctx context.Context, cfg *config.Config, clientHandler *authHTTP.ClientHandler, tokenHandler *authHTTP.TokenHandler, @@ -125,6 +126,7 @@ func (s *Server) SetupRouter( var rateLimitMiddleware gin.HandlerFunc if cfg.RateLimitEnabled { rateLimitMiddleware = authHTTP.RateLimitMiddleware( + ctx, cfg.RateLimitRequestsPerSec, cfg.RateLimitBurst, s.logger, @@ -132,6 +134,7 @@ func (s *Server) SetupRouter( } s.registerAuthRoutes( + ctx, v1, cfg, clientHandler, @@ -167,6 +170,7 @@ func (s *Server) SetupRouter( // registerAuthRoutes configures the v1 authentication-related endpoints. func (s *Server) registerAuthRoutes( + ctx context.Context, v1 *gin.RouterGroup, cfg *config.Config, clientHandler *authHTTP.ClientHandler, @@ -182,6 +186,7 @@ func (s *Server) registerAuthRoutes( var tokenRateLimitMiddleware gin.HandlerFunc if cfg.RateLimitTokenEnabled { tokenRateLimitMiddleware = authHTTP.TokenRateLimitMiddleware( + ctx, cfg.RateLimitTokenRequestsPerSec, cfg.RateLimitTokenBurst, s.logger,