All files / src/tools getProceduresFeed.ts

94.38% Statements 84/89
81.25% Branches 65/80
100% Functions 8/8
96.05% Lines 73/76

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334                                                                                                                          3x 3x 2x 3x 3x 2x 2x   2x       3x             3x 3x   1x                             1x   1x           1x         1x 1x                               4x   3x 1x             2x 1x                         1x                             14x     14x 14x 14x 1x   13x 13x 9x   4x     4x 4x 4x                               11x 14x 14x 13x   2x     10x                                         13x 13x 13x 11x 11x 11x 11x   10x     13x                                   21x 21x   1x 1x 1x                     20x 20x 20x 20x 20x 20x 16x 3x 3x         3x 1x   13x             13x 13x   4x 4x 1x                   4x                                          
/**
 * MCP Tool: get_procedures_feed
 *
 * Get recently updated European Parliament procedures from the feed.
 *
 * **EP API Endpoint:**
 * - `GET /procedures/feed`
 *
 * ISMS Policy: SC-002 (Input Validation), AC-003 (Least Privilege)
 */
 
import { GetProceduresFeedSchema } from '../schemas/europeanParliament.js';
import { epClient } from '../clients/europeanParliamentClient.js';
import { ToolError } from './shared/errors.js';
import {
  isUpstream404,
  buildEmptyFeedResponse,
  isErrorInBody,
  buildFeedSuccessResponse,
  extractUpstreamStatusCode,
  type FeedErrorMeta,
} from './shared/feedUtils.js';
import { APIError } from '../clients/ep/baseClient.js';
import { TimeoutError } from '../utils/timeout.js';
import { z } from 'zod';
import type { ToolResult } from './shared/types.js';
 
/** Parameters passed to the degraded fallback, used to build an accurate warning. */
interface FallbackParams {
  timeframe?: string | undefined;
  startDate?: string | undefined;
  processType?: string | undefined;
}
 
/**
 * Attempt to fetch a degraded fallback from the non-feed procedures endpoint.
 *
 * Called when the feed's enrichment step fails (error-in-body). Returns a
 * degraded feed response with a warning, or `null` if the fallback also fails.
 *
 * **Note on envelope shape:** `GET /procedures` returns a
 * {@link PaginatedResponse} without the JSON-LD `@context` field that the
 * feed envelope normally carries.  To keep the uniform feed envelope stable
 * for downstream consumers, we spread the fallback payload into an object
 * with a default empty `@context: []` before passing it to
 * {@link buildFeedSuccessResponse}.
 *
 * **Note on filters:** the fallback uses `GET /procedures` which does not
 * accept `timeframe`, `startDate`, or `processType`.  Any caller-supplied
 * filters are listed in the warning so consumers do not misinterpret the
 * degraded payload as a properly-filtered result.
 *
 * @param rawError - The raw error string from the error-in-body payload
 * @param params   - Caller-supplied parameters (surfaced in the warning)
 * @returns Degraded ToolResult on success, or `null` on failure
 * @internal
 */
async function tryProceduresFallback(
  rawError: string,
  params: FallbackParams,
): Promise<ToolResult | null> {
  try {
    const fallbackResult = await epClient.getProcedures({ limit: 50, offset: 0 });
    const errorSuffix = rawError ? ` (upstream: ${rawError})` : '';
    const ignoredFilters: string[] = [];
    if (params.timeframe !== undefined) ignoredFilters.push(`timeframe="${params.timeframe}"`);
    Iif (params.startDate !== undefined) ignoredFilters.push(`startDate="${params.startDate}"`);
    if (params.processType !== undefined) ignoredFilters.push(`processType="${params.processType}"`);
    const ignoredSuffix =
      ignoredFilters.length > 0
        ? ` The following caller-supplied filters are NOT applied in degraded mode: ${ignoredFilters.join(', ')}.`
        : '';
    const fallbackWarning =
      `ENRICHMENT_FAILED: EP API enrichment step failed${errorSuffix}. ` +
      `Degraded mode: showing recent procedures from GET /procedures (non-feed endpoint).` +
      ` Items are procedure summaries rather than feed entries and may differ in shape from normal feed items.${ignoredSuffix}` +
      ` Consider retrying get_procedures_feed.`;
    // GET /procedures returns a PaginatedResponse without the JSON-LD `@context`
    // that the uniform feed envelope normally carries; inject an empty default
    // so the envelope shape stays stable for downstream consumers.
    const envelope = { '@context': [] as unknown[], ...fallbackResult };
    return buildFeedSuccessResponse(envelope, [fallbackWarning]);
  } catch {
    return null;
  }
}
 
/**
 * Build an in-band response for an error-in-body reply.
 *
 * Classifies the failure as `ENRICHMENT_FAILED`, parses any upstream
 * HTTP status code from the error message, and returns the full
 * machine-readable envelope.
 *
 * @param rawError - The raw error string from the EP API response body
 * @internal
 */
function buildEnrichmentFailedResponse(rawError: string): ToolResult {
  const upstreamStatusCode = extractUpstreamStatusCode(rawError);
  const upstream =
    upstreamStatusCode !== undefined || rawError !== ''
      ? {
          ...(upstreamStatusCode !== undefined && { statusCode: upstreamStatusCode }),
          ...(rawError !== '' && { errorMessage: rawError }),
        }
      : undefined;
  const meta: FeedErrorMeta = {
    errorCode: 'ENRICHMENT_FAILED',
    retryable: true,
    ...(upstream !== undefined ? { upstream } : {}),
  };
  const errorSuffix = rawError ? ` (upstream: ${rawError})` : '';
  return buildEmptyFeedResponse(
    `EP API returned an error-in-body response for get_procedures_feed — the upstream enrichment step may have failed${errorSuffix}.`,
    meta,
  );
}
 
/**
 * Classify and handle a caught upstream error, returning an in-band feed response
 * for well-known transient failure modes (404, timeout, rate limit).
 * Returns `null` for unclassified errors that should be re-thrown.
 *
 * @param error - The caught error
 * @returns In-band ToolResult for known transient failures, or `null`
 * @internal
 */
function handleUpstreamCatchError(error: unknown): ToolResult | null {
  if (isUpstream404(error)) return buildEmptyFeedResponse();
 
  if (error instanceof TimeoutError || (error instanceof Error && error.message.includes('timed out'))) {
    return buildEmptyFeedResponse(
      `EP API request timed out for get_procedures_feed — the endpoint is known to be slow. ` +
        `Consider retrying or using get_procedures with a limit parameter instead.`,
      { errorCode: 'UPSTREAM_TIMEOUT', retryable: true },
    );
  }
 
  if (error instanceof APIError && error.statusCode === 429) {
    return buildEmptyFeedResponse(
      `EP API rate limit reached for get_procedures_feed — retry after a short delay.`,
      {
        errorCode: 'RATE_LIMIT',
        retryable: true,
        upstream: {
          statusCode: 429,
          ...(error.message ? { errorMessage: error.message } : {}),
        },
      },
    );
  }
 
  return null;
}
 
/**
 * Inspect a single procedure-like item and return whether it carries a
 * current-year token in either `dateLastActivity` or its `reference`. Also
 * reports the oldest-observed reference year for diagnostic context.
 *
 * @internal
 */
function inspectProcedureItem(
  item: unknown,
  yearStr: string,
  refRegex: RegExp,
): { hasCurrentYear: boolean; observedYear: number | undefined } {
  Iif (item === null || typeof item !== 'object') {
    return { hasCurrentYear: false, observedYear: undefined };
  }
  const obj = item as Record<string, unknown>;
  const dateLastActivity = obj['dateLastActivity'];
  if (typeof dateLastActivity === 'string' && dateLastActivity.startsWith(yearStr)) {
    return { hasCurrentYear: true, observedYear: undefined };
  }
  const reference = obj['reference'];
  if (typeof reference !== 'string') {
    return { hasCurrentYear: false, observedYear: undefined };
  }
  Iif (refRegex.test(reference)) {
    return { hasCurrentYear: true, observedYear: undefined };
  }
  const m = /^(\d{4})\//.exec(reference);
  Iif (m?.[1] === undefined) return { hasCurrentYear: false, observedYear: undefined };
  return { hasCurrentYear: false, observedYear: parseInt(m[1], 10) };
}
 
/**
 * Scan a list of procedure-like items to determine whether any carries a
 * current-year token, and report the oldest reference year observed across
 * non-matching items (used purely for diagnostic context in the warning).
 *
 * @internal
 */
function scanProceduresForCurrentYear(
  items: readonly unknown[],
  yearStr: string,
  refRegex: RegExp,
): { hasCurrentYear: boolean; oldestYearObserved: number | undefined } {
  let oldestYearObserved: number | undefined;
  for (const item of items) {
    const { hasCurrentYear: cy, observedYear } = inspectProcedureItem(item, yearStr, refRegex);
    if (cy) return { hasCurrentYear: true, oldestYearObserved: undefined };
    if (observedYear !== undefined &&
        (oldestYearObserved === undefined || observedYear < oldestYearObserved)) {
      oldestYearObserved = observedYear;
    }
  }
  return { hasCurrentYear: false, oldestYearObserved };
}
 
/**
 * Build STALENESS_WARNING entries when the procedures-feed payload contains
 * no items dated within the current calendar year.
 *
 * Background: the Hack23/euparliamentmonitor 2026-04-24 breaking-run
 * reliability audit §1.4 reported that `get_procedures_feed` was returning
 * historical-tail ordering (1972/0003, 1980/0013) instead of date-sorted
 * newest-first results — even though the envelope was structurally healthy.
 * That means consumers applying the JSON envelope alone could not tell
 * whether the result was current. We inspect the canonical
 * `dateLastActivity` field and the procedure `reference` (`YYYY/NNNN(...)`)
 * for the current calendar year and emit a structured warning when neither
 * surfaces a current-year token. The check is conservative: any single
 * current-year item suppresses the warning.
 *
 * @internal
 */
function buildStalenessWarnings(result: unknown): readonly string[] {
  const source = (result ?? {}) as Record<string, unknown>;
  const items = Array.isArray(source['data']) ? (source['data'] as unknown[]) : [];
  if (items.length === 0) return [];
  const yearStr = String(new Date().getUTCFullYear());
  const refRegex = new RegExp(`^${yearStr}/`);
  const { hasCurrentYear, oldestYearObserved } = scanProceduresForCurrentYear(items, yearStr, refRegex);
  if (hasCurrentYear) return [];
  const ageSuffix =
    oldestYearObserved !== undefined
      ? ` Oldest reference observed in payload: ${String(oldestYearObserved)}.`
      : '';
  return [
    `STALENESS_WARNING: EP /procedures/feed returned ${String(items.length)} item(s) but none carry a ` +
      `${yearStr} reference or dateLastActivity. The upstream feed has been observed returning historical-tail ` +
      `ordering instead of date-sorted newest-first results.${ageSuffix} Consider falling back to ` +
      `get_procedures(limit=100) and sorting client-side by dateLastActivity descending.`,
  ];
}
 
/**
 * Handles the get_procedures_feed MCP tool request.
 *
 * @param args - Raw tool arguments, validated against {@link GetProceduresFeedSchema}
 * @returns MCP tool result containing recently updated procedure data
 * @security Input is validated with Zod before any API call.
 */
export async function handleGetProceduresFeed(args: unknown): Promise<ToolResult> {
  // Validate input — ZodErrors here are client mistakes (non-retryable)
  let params: ReturnType<typeof GetProceduresFeedSchema.parse>;
  try {
    params = GetProceduresFeedSchema.parse(args);
  } catch (error: unknown) {
    Eif (error instanceof z.ZodError) {
      const fieldErrors = error.issues.map((e) => `${e.path.join('.')}: ${e.message}`).join('; ');
      throw new ToolError({
        toolName: 'get_procedures_feed',
        operation: 'validateInput',
        message: `Invalid parameters: ${fieldErrors}`,
        isRetryable: false,
        cause: error,
      });
    }
    throw error;
  }
 
  try {
    const apiParams: Record<string, unknown> = {};
    apiParams['timeframe'] = params.timeframe;
    if (params.startDate !== undefined) apiParams['startDate'] = params.startDate;
    if (params.processType !== undefined) apiParams['processType'] = params.processType;
    const result = await epClient.getProceduresFeed(apiParams);
    if (isErrorInBody(result)) {
      const rawError = typeof result['error'] === 'string' ? result['error'] : '';
      const fallback = await tryProceduresFallback(rawError, {
        timeframe: params.timeframe,
        startDate: params.startDate,
        processType: params.processType,
      });
      if (fallback !== null) return fallback;
      return buildEnrichmentFailedResponse(rawError);
    }
    const emptyReason = `EP API procedures/feed returned no data for timeframe '${params.timeframe}' — no procedures were updated in the requested period. This is expected during parliamentary recess or low-activity weeks. Use get_procedures (with limit/offset) to browse a paginated list of procedures as a reliable fallback.`;
    // Detect the historical-tail-ordering regression flagged in the
    // Hack23/euparliamentmonitor 2026-04-24 breaking audit §1.4: the EP API
    // sometimes returns 1972/1980 procedure IDs first instead of date-sorted
    // newest-first. When no item carries a current-year reference / activity
    // date we surface a STALENESS_WARNING so consumers can detect the
    // regression mechanically rather than by parsing prose.
    const stalenessWarnings = buildStalenessWarnings(result);
    return buildFeedSuccessResponse(result, stalenessWarnings, emptyReason);
  } catch (error: unknown) {
    const inBand = handleUpstreamCatchError(error);
    if (inBand !== null) return inBand;
    throw new ToolError({
      toolName: 'get_procedures_feed',
      operation: 'fetchData',
      message: 'Failed to retrieve procedures feed',
      isRetryable: true,
      cause: error,
    });
  }
}
/** Tool metadata for get_procedures_feed */
export const getProceduresFeedToolMetadata = {
  name: 'get_procedures_feed',
  description:
    'Get recently updated European Parliament procedures from the feed. Returns procedures published or updated during the specified timeframe. Data source: European Parliament Open Data Portal. NOTE: The EP API procedures/feed endpoint is significantly slower than other feeds — "one-month" queries may take around 120 seconds and can still time out. If you see timeouts, increase the global timeout with --timeout or EP_REQUEST_TIMEOUT_MS. When no procedures were updated in the requested timeframe (common during parliamentary recess or low-activity periods), the response will have status:"unavailable" and empty items — this is expected behaviour, not an error. In that case, use get_procedures (with limit/offset) to browse a paginated list of procedures as a reliable fallback. The response also surfaces a STALENESS_WARNING entry in dataQualityWarnings whenever the upstream returns historical-tail ordering with no current-year items (a known degraded-upstream pattern), so consumers can detect the regression programmatically.',
  inputSchema: {
    type: 'object' as const,
    properties: {
      timeframe: {
        type: 'string',
        description: 'Timeframe for the feed (today, one-day, one-week, one-month, custom)',
        enum: ['today', 'one-day', 'one-week', 'one-month', 'custom'],
        default: 'one-week',
      },
      startDate: {
        type: 'string',
        description: 'Start date (YYYY-MM-DD) — required when timeframe is "custom"',
      },
      processType: { type: 'string', description: 'Process type filter' },
    },
  },
};