services LifecycleWarmupScheduler.ts

100% Statements 48/48
86.66% Branches 13/15
90% Functions 9/10
100% Lines 45/45
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256  
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19x
19x
 
19x
19x
19x
19x
19x
19x
 
 
 
 
 
 
 
 
19x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8x
7x
5x
 
 
6x
 
 
 
 
5x
5x
 
5x
 
 
 
 
 
 
 
 
8x
4x
4x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15x
2x
 
13x
13x
13x
13x
 
 
9x
9x
9x
9x
9x
 
 
 
9x
 
 
 
 
 
 
3x
3x
3x
3x
3x
3x
3x
 
 
3x
 
12x
 
 
13x
13x
 
 
 
 
34x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3x
 
 
 
 
 
 
5x
  /**
 * Lifecycle-Statistics Cache Warmup Scheduler.
 *
 * Out-of-band refresh job that keeps the corpus-wide lifecycle-statistics
 * cache populated so {@link import('../tools/monitorLegislativePipeline.js')
 * monitor_legislative_pipeline} — which reads from the cache only on the
 * request path — never has to degrade to `INSUFFICIENT_DATA` forecasts
 * because of a cold cache.
 *
 * **Why a separate scheduler instead of an on-request rebuild?**
 * `monitor_legislative_pipeline` cannot afford to race the corpus rebuild
 * (`/procedures` + up to 500 `/procedures/{id}/events`) against its own
 * rate-limited `/events` fan-out: the token-bucket budget would be exhausted
 * before the tool's own queries land. The scheduler runs the rebuild
 * independently of any request, giving the cache a steady-state warm window.
 *
 * **Concurrency.** The scheduler deduplicates concurrent `refreshNow()` calls
 * through its own `inFlight` promise so callers share a single warmup attempt.
 * Each warmup uses `getLifecycleStatistics({ forceRefresh: true })` to rebuild
 * the corpus out-of-band without relying on request-path cache misses.
 *
 * **Test hermeticity.** `start()` accepts a `{ disable: true }` flag so unit
 * tests can opt out of the interval timer entirely. Internally the timer is
 * `unref()`'d so production/CLI process exits are not blocked.
 *
 * ISMS Policy: AU-002 (Audit Logging), AC-003 (Least Privilege),
 *   SC-002 (Input Validation), A.8.16 (Monitoring activities)
 *
 * @module services/LifecycleWarmupScheduler
 */
 
import {
  getLifecycleStatistics,
  type LifecycleStatisticsModel,
} from '../utils/lifecycleStatistics.js';
import {
  DEFAULT_LIFECYCLE_WARMUP_INTERVAL_MS,
  resolveLifecycleWarmupIntervalMs,
} from '../config.js';
import { toErrorMessage } from '../utils/auditLogger.js';
 
/** Options accepted by {@link LifecycleWarmupScheduler.start}. */
export interface LifecycleWarmupSchedulerStartOptions {
  /**
   * Disable the interval timer entirely. Intended for unit tests that need
   * deterministic, hermetic execution without background work. When `true`,
   * `start()` is a no-op and {@link LifecycleWarmupScheduler.getStatus}
   * continues to reflect the existing cache.
   */
  disable?: boolean;
}
 
/**
 * Outcome of a single warmup attempt. Exposed for tests and for the
 * scheduler's own status reporting.
 */
export type LifecycleWarmupOutcome =
  | { kind: 'success'; corpusSize: number; totalObservations: number; durationMs: number }
  | { kind: 'in-flight' }
  | { kind: 'error'; errorMessage: string; durationMs: number };
 
/** Observable status of the scheduler. */
export interface LifecycleWarmupSchedulerStatus {
  /** Whether the interval timer is currently scheduling refreshes. */
  running: boolean;
  /** Effective interval (ms) used by the timer; reflects clamped env value. */
  intervalMs: number;
  /** Total number of warmup attempts that have completed since `start()`. */
  totalAttempts: number;
  /** Successful warmups (no error thrown by `getLifecycleStatistics`). */
  successfulAttempts: number;
  /** Warmups that ended in a thrown error. */
  failedAttempts: number;
  /**
   * ISO-8601 timestamp of the most recent successful warmup, or `null` when
   * no warmup has ever succeeded.
   */
  lastSuccessAt: string | null;
  /**
   * ISO-8601 timestamp of the most recent failed warmup, or `null` when no
   * warmup has ever failed.
   */
  lastRefreshErrorAt: string | null;
  /**
   * Sanitised error message from the most recent failure, or `null` when no
   * warmup has failed.
   */
  lastRefreshErrorMessage: string | null;
}
 
/**
 * Out-of-band warmup scheduler for the lifecycle-statistics cache.
 *
 * Typical lifecycle:
 * ```typescript
 * const scheduler = new LifecycleWarmupScheduler();
 * scheduler.start();                 // production
 * scheduler.start({ disable: true }); // tests
 * await scheduler.refreshNow();       // explicit, returns once the
 *                                     // in-flight rebuild settles
 * scheduler.dispose();                // shutdown
 * ```
 *
 * `refreshNow()` returns a promise that callers can await for explicit
 * priming on startup; failures are logged but never thrown so a transient
 * EP-API outage does not crash the server.
 */
export class LifecycleWarmupScheduler {
  private timer: NodeJS.Timeout | null = null;
  private inFlight: Promise<LifecycleWarmupOutcome> | null = null;
  private readonly intervalMs: number;
  private totalAttempts = 0;
  private successfulAttempts = 0;
  private failedAttempts = 0;
  private lastSuccessAt: string | null = null;
  private lastRefreshErrorAt: string | null = null;
  private lastRefreshErrorMessage: string | null = null;
 
  /**
   * @param intervalMs - Optional override for the warmup interval. When
   *   omitted, the value is resolved from `EP_LIFECYCLE_WARMUP_INTERVAL_MS`
   *   via {@link resolveLifecycleWarmupIntervalMs} (clamped to
   *   `[60_000, 3_600_000]`). Tests may pass a small value directly.
   */
  constructor(intervalMs?: number) {
    this.intervalMs =
      intervalMs ??
      (process.env['EP_LIFECYCLE_WARMUP_INTERVAL_MS'] !== undefined
        ? resolveLifecycleWarmupIntervalMs()
        : DEFAULT_LIFECYCLE_WARMUP_INTERVAL_MS);
  }
 
  /**
   * Start the periodic refresh. The first refresh runs as soon as the timer
   * fires (callers wanting immediate priming should `await refreshNow()`
   * first). Calling `start()` more than once is a no-op once running.
   *
   * The interval is `unref()`'d so it does not keep the Node.js event loop
   * alive on its own — CLI scripts and tests can still exit cleanly without
   * an explicit `dispose()`.
   */
  start(options: LifecycleWarmupSchedulerStartOptions = {}): void {
    if (options.disable === true) return;
    if (this.timer !== null) return;
    const timer = setInterval(() => {
      // Errors inside refreshNow are already caught; this keeps the timer
      // resilient to unexpected throws.
      void this.refreshNow().catch(() => {
        /* already logged inside refreshNow */
      });
    }, this.intervalMs);
    // `unref` is guarded — some custom timer mocks may not implement it.
    Eif (typeof timer.unref === 'function') {
      timer.unref();
    }
    this.timer = timer;
  }
 
  /**
   * Stop the periodic refresh and clear the timer. Any in-flight refresh is
   * left to settle on its own (the underlying `getLifecycleStatistics`
   * promise is not cancellable). Safe to call multiple times.
   */
  dispose(): void {
    if (this.timer !== null) {
      clearInterval(this.timer);
      this.timer = null;
    }
  }
 
  /**
   * Run a single warmup attempt immediately.
   *
   * If a warmup is already in-flight the existing promise is returned so
   * concurrent callers share the rebuild (de-duplication mirrors the
   * `getLifecycleStatistics` in-flight mutex). Errors are logged via
   * `console.error` and returned as `{ kind: 'error' }`; they never throw
   * out of this method so the scheduler stays alive across transient
   * EP-API failures.
   *
   * @returns Outcome of the attempt (success counts, in-flight share, or
   *   sanitised error message).
   */
  refreshNow(): Promise<LifecycleWarmupOutcome> {
    if (this.inFlight !== null) {
      return this.inFlight.then(() => ({ kind: 'in-flight' }));
    }
    const start = Date.now();
    const promise = (async (): Promise<LifecycleWarmupOutcome> => {
      try {
        const model: LifecycleStatisticsModel = await getLifecycleStatistics({
          forceRefresh: true,
        });
        const durationMs = Date.now() - start;
        this.totalAttempts++;
        this.successfulAttempts++;
        this.lastSuccessAt = new Date().toISOString();
        console.error(
          `[lifecycle-warmup] success corpusSize=${String(model.corpusSize)}`
            + ` observations=${String(model.totalObservations)} durationMs=${String(durationMs)}`,
        );
        return {
          kind: 'success',
          corpusSize: model.corpusSize,
          totalObservations: model.totalObservations,
          durationMs,
        };
      } catch (error: unknown) {
        const durationMs = Date.now() - start;
        const sanitized = sanitizeErrorMessage(error);
        this.totalAttempts++;
        this.failedAttempts++;
        this.lastRefreshErrorAt = new Date().toISOString();
        this.lastRefreshErrorMessage = sanitized;
        console.error(
          `[lifecycle-warmup] failure durationMs=${String(durationMs)} error=${sanitized}`,
        );
        return { kind: 'error', errorMessage: sanitized, durationMs };
      } finally {
        this.inFlight = null;
      }
    })();
    this.inFlight = promise;
    return promise;
  }
 
  /** Diagnostic snapshot of the scheduler — consumed by `get_server_health`. */
  getStatus(): LifecycleWarmupSchedulerStatus {
    return {
      running: this.timer !== null,
      intervalMs: this.intervalMs,
      totalAttempts: this.totalAttempts,
      successfulAttempts: this.successfulAttempts,
      failedAttempts: this.failedAttempts,
      lastSuccessAt: this.lastSuccessAt,
      lastRefreshErrorAt: this.lastRefreshErrorAt,
      lastRefreshErrorMessage: this.lastRefreshErrorMessage,
    };
  }
}
 
/**
 * Sanitise a thrown value into a single-line error message suitable for
 * logs and the health-status payload. Defends against accidental leakage of
 * stack traces or non-string error bodies into observability surfaces.
 */
function sanitizeErrorMessage(error: unknown): string {
  return toErrorMessage(error).replace(/\s+/g, ' ').slice(0, 200);
}
 
/**
 * Process-wide singleton used by the server bootstrap and the
 * `get_server_health` tool. Tests may construct their own instances.
 */
export const lifecycleWarmupScheduler = new LifecycleWarmupScheduler();