All files / src/tools/shared feedUtils.ts

100% Statements 25/25
96.77% Branches 30/31
100% Functions 6/6
100% Lines 25/25

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317                                                                                                                                                                                                                                8x 8x 4x   4x                             18x                                                               18x                 41x                                       80x                                                                                                 122x 122x   122x   2x     122x       122x 11x 11x   11x 11x   111x 27x 27x   84x     122x                                                                   36x 36x                            
/**
 * Shared utilities for feed tool handlers.
 *
 * The EP Open Data Portal feed endpoints have several known behaviours
 * that require graceful handling:
 *
 * 1. **HTTP 404** — returned during recess or low-activity periods when
 *    no records were updated within the requested timeframe.
 * 2. **HTTP 204 No Content** — returned by feeds that have no updates
 *    (empty body, no Content-Type header).  Handled by baseClient.
 * 3. **HTTP 200 with error-in-body** — the EP API sometimes returns
 *    HTTP 200 with a JSON body containing an `error` field (e.g.
 *    `"error": "404 Not Found from POST …"`).  This happens when the
 *    EP API's internal enrichment/POST step fails.  These look like
 *    successful responses but contain no `data` array.
 *
 * These helpers convert all three cases into uniform MCP responses so
 * downstream consumers always receive the same envelope shape.
 *
 * ## Uniform feed response envelope
 *
 * Every feed handler emits the same body shape, regardless of whether
 * the upstream call succeeded, returned no data, or failed in-band:
 *
 * ```json
 * {
 *   "status": "operational" | "degraded" | "unavailable",
 *   "generatedAt": "2026-04-18T07:12:00Z",
 *   "items": [],
 *   "itemCount": 0,
 *   "reason": "Optional string when status !== 'operational'",
 *   "data": [],
 *   "@context": [],
 *   "dataQualityWarnings": []
 * }
 * ```
 *
 * - `status` — `"operational"` for fresh data without warnings,
 *   `"degraded"` for fresh data accompanied by data-quality warnings,
 *   `"unavailable"` when the upstream returned a 404 / error-in-body /
 *   empty body and we have no fresh data to report.
 * - `items` — canonical array field in the response contract.
 * - `data` — legacy compatibility field, normalized so serialized
 *   responses always expose it as an array with the same contents as
 *   `items` (note: responses are JSON-serialized, so consumers see
 *   structural equality, not referential identity).
 * - `itemCount` — length of `items`.
 * - `generatedAt` — ISO-8601 timestamp of when the response was built
 *   (not "last upstream success" — that would require state/caching).
 * - `reason` — present only when `status !== "operational"`.
 *
 * The legacy `data` / `@context` / `dataQualityWarnings` fields are
 * preserved so existing consumers continue to work unchanged.
 *
 * Reserve HTTP 4xx / 5xx for genuine transport errors (auth, rate
 * limit, gateway timeout, endpoint removed permanently). Empty
 * timeframes / temporary upstream outages always yield HTTP 200 with
 * `status: "unavailable"`.
 *
 * ISMS Policy: SC-002 (Input Validation), AC-003 (Least Privilege)
 */
 
import { APIError } from '../../clients/ep/baseClient.js';
import { buildToolResponse } from './responseBuilder.js';
import type { ToolResult } from './types.js';
 
/** Operational status for a feed response under the uniform contract. */
export type FeedStatus = 'operational' | 'degraded' | 'unavailable';
 
/**
 * Machine-readable error code for a feed failure.
 *
 * Enables programmatic retry/skip/fallback logic by downstream consumers:
 * - `ENRICHMENT_FAILED` — EP API internal enrichment/POST step returned an
 *   error-in-body (HTTP 200 with `error` field and no `data` array).
 * - `UPSTREAM_TIMEOUT` — The upstream request exceeded the configured timeout.
 * - `UPSTREAM_ERROR` — A non-timeout, non-rate-limit upstream error occurred.
 * - `RATE_LIMIT` — The upstream API returned HTTP 429 (too many requests).
 */
export type FeedErrorCode = 'ENRICHMENT_FAILED' | 'UPSTREAM_TIMEOUT' | 'UPSTREAM_ERROR' | 'RATE_LIMIT';
 
/**
 * Optional machine-readable metadata attached to an empty/failed feed response.
 *
 * Allows downstream consumers to classify the failure and decide whether to
 * retry the request, fall back to a non-feed endpoint, or skip entirely.
 */
export interface FeedErrorMeta {
  /** Machine-readable failure classification. */
  errorCode?: FeedErrorCode;
  /** Whether the failure is transient and the request should be retried. */
  retryable?: boolean;
  /** Information about the upstream error, when available. */
  upstream?: {
    /** HTTP status code parsed from the upstream error message, if present. */
    statusCode?: number;
    /** Raw error message from the upstream response body. */
    errorMessage?: string;
  };
}
 
/**
 * Parse an HTTP status code from an EP API error-in-body message.
 *
 * EP API error-in-body messages often embed the HTTP status code in the error
 * string, e.g. `"404 Not Found from POST …"` or `"502 Bad Gateway from …"`.
 * This function extracts the first three-digit integer in the range 100–599.
 *
 * @param errorMessage - Raw error string from the EP API response body
 * @returns The numeric HTTP status code, or `undefined` if none is found
 */
export function extractUpstreamStatusCode(errorMessage: string): number | undefined {
  const match = /\b([1-5]\d{2})\b/.exec(errorMessage);
  if (match?.[1] !== undefined) {
    return parseInt(match[1], 10);
  }
  return undefined;
}
 
/**
 * Shared MCP `tools/list` inputSchema for fixed-window feed tools
 * (Group A: `get_documents_feed`, `get_plenary_documents_feed`, etc.).
 *
 * The underlying EP API endpoints do not accept `timeframe` / `startDate`
 * / `limit` / `offset` and always serve a server-defined default window
 * (typically one month).  For contract uniformity with the sliding-window
 * feed tools (Group B), this schema still advertises those parameters —
 * they are accepted by the Zod validator but silently ignored at the
 * upstream call site.  See `FixedWindowFeedSchema` in
 * `src/schemas/ep/feed.ts`.
 */
export const FIXED_WINDOW_FEED_INPUT_SCHEMA = {
  type: 'object' as const,
  properties: {
    timeframe: {
      type: 'string',
      description:
        'Informational-only — this feed uses a server-defined default window (typically one month) and ignores this parameter. Accepted for contract uniformity with sliding-window feed tools.',
      enum: ['today', 'one-day', 'one-week', 'one-month', 'custom'],
    },
    startDate: {
      type: 'string',
      description:
        'Informational-only — ignored by this fixed-window feed. Accepted for contract uniformity with sliding-window feed tools.',
    },
    limit: {
      type: 'number',
      description:
        'Informational-only — the upstream EP API does not paginate this fixed-window feed. Accepted for contract uniformity.',
      minimum: 1,
      maximum: 100,
    },
    offset: {
      type: 'number',
      description:
        'Informational-only — the upstream EP API does not paginate this fixed-window feed. Accepted for contract uniformity.',
      minimum: 0,
    },
  },
};
 
/** Default reason surfaced when an empty/no-data feed response is built. */
const EMPTY_FEED_REASON =
  'EP Open Data Portal returned no data for this feed — likely no updates in the requested timeframe';
 
/**
 * Check whether an error is an upstream EP API 404.
 *
 * The EP Open Data Portal returns 404 for feed endpoints that have no
 * recent updates within the requested timeframe (e.g. during recess).
 */
export function isUpstream404(error: unknown): boolean {
  return error instanceof APIError && error.statusCode === 404;
}
 
/**
 * Detect an EP API "error-in-body" response.
 *
 * Some feed endpoints return HTTP 200 but with a JSON body that contains
 * an `error` field and no `data` array.  Example:
 * ```json
 * {
 *   "@id": "https://data.europarl.europa.eu/eli/dl/…",
 *   "error": "404 Not Found from POST …",
 *   "@context": { "error": { … } }
 * }
 * ```
 *
 * This function returns `true` when the parsed response has this shape,
 * allowing callers to convert it to an empty feed response.
 */
export function isErrorInBody(result: Record<string, unknown>): boolean {
  return (
    typeof result['error'] === 'string' &&
    result['error'] !== '' &&
    !Array.isArray(result['data'])
  );
}
 
/**
 * Wrap a successful upstream feed result in the uniform envelope.
 *
 * The original payload (typically `{ data, '@context' }`) is preserved
 * verbatim and augmented with the uniform contract fields:
 *
 * - `status` is **derived** from `items.length` and warnings:
 *   - `"unavailable"` when `items.length === 0` (covers the
 *     "HTTP 200 + empty array" upstream case);
 *   - `"degraded"` when items are present **and** any
 *     `dataQualityWarnings` are present (either supplied via `result`
 *     or passed explicitly);
 *   - `"operational"` when items are present and no warnings.
 * - `data` is normalized so that, after JSON serialization, consumers
 *   reading the legacy `data` field always see an array with the same
 *   contents as `items` (structural equality across the wire, not
 *   referential identity).
 * - Existing `dataQualityWarnings` from `result` are preserved and
 *   merged with any explicitly-supplied warnings (rather than
 *   clobbered). When `status` is `"unavailable"`, the empty-feed
 *   reason is appended for backwards compatibility with consumers
 *   reading the legacy field.
 * - `reason` is set whenever `status !== "operational"`.
 *
 * @param result - Raw upstream response payload (may contain `data`,
 *                 `@context`, and optionally `dataQualityWarnings`)
 * @param warnings - Optional extra data-quality warnings to merge into
 *                   the response.
 * @param customEmptyReason - Optional human-readable reason to use instead
 *                            of the shared {@link EMPTY_FEED_REASON} when
 *                            `items.length === 0`.  Useful when a specific
 *                            tool wants to surface a more descriptive message
 *                            while still preserving the upstream JSON-LD
 *                            payload (e.g. `@context`).  When omitted the
 *                            default shared reason is used.
 * @returns MCP-compliant ToolResult containing the uniform envelope
 */
export function buildFeedSuccessResponse(
  result: unknown,
  warnings: readonly string[] = [],
  customEmptyReason?: string,
): ToolResult {
  const source = (result ?? {}) as Record<string, unknown>;
  const items = Array.isArray(source['data']) ? (source['data'] as unknown[]) : [];
 
  const existingWarnings = Array.isArray(source['dataQualityWarnings'])
    ? (source['dataQualityWarnings'] as unknown[]).filter(
        (w): w is string => typeof w === 'string',
      )
    : [];
  const mergedWarnings = [...existingWarnings, ...warnings];
 
  let status: FeedStatus;
  let reason: string | undefined;
  if (items.length === 0) {
    status = 'unavailable';
    reason = customEmptyReason ?? EMPTY_FEED_REASON;
    // Surface the empty-feed reason in dataQualityWarnings for legacy consumers.
    Eif (!mergedWarnings.includes(reason)) {
      mergedWarnings.push(reason);
    }
  } else if (mergedWarnings.length > 0) {
    status = 'degraded';
    reason = mergedWarnings.join('; ');
  } else {
    status = 'operational';
  }
 
  return buildToolResponse({
    ...source,
    status,
    generatedAt: new Date().toISOString(),
    items,
    itemCount: items.length,
    data: items,
    ...(reason !== undefined ? { reason } : {}),
    dataQualityWarnings: mergedWarnings,
  });
}
 
/**
 * Build an empty feed response under the uniform contract.
 *
 * Returns the same envelope shape as {@link buildFeedSuccessResponse}
 * with `status: "unavailable"` and `items: []`. This helper is used
 * when the upstream returned 404 / empty body / error-in-body and we
 * have no fresh data to report.
 *
 * `"degraded"` is intentionally **not** accepted here because it
 * denotes "partial data with warnings" — for that case, call
 * {@link buildFeedSuccessResponse} with the partial payload and the
 * warnings (`status` will be derived as `"degraded"`).
 *
 * @param reason - Human-readable reason describing why the feed is empty
 *                 (also surfaced in `dataQualityWarnings` for backwards
 *                 compatibility with consumers reading the legacy field).
 * @param meta   - Optional machine-readable failure metadata. When provided,
 *                 `errorCode`, `retryable`, and `upstream` are included in the
 *                 response envelope so downstream consumers can classify the
 *                 failure and decide whether to retry, fall back, or skip.
 */
export function buildEmptyFeedResponse(reason = EMPTY_FEED_REASON, meta?: FeedErrorMeta): ToolResult {
  const items: unknown[] = [];
  return buildToolResponse({
    status: 'unavailable' satisfies FeedStatus,
    generatedAt: new Date().toISOString(),
    items,
    itemCount: 0,
    reason,
    ...(meta?.errorCode !== undefined ? { errorCode: meta.errorCode } : {}),
    ...(meta?.retryable !== undefined ? { retryable: meta.retryable } : {}),
    ...(meta?.upstream !== undefined ? { upstream: meta.upstream } : {}),
    data: items,
    '@context': [],
    dataQualityWarnings: [reason],
  });
}