All files / src/utils credibilityCheck.ts

100% Statements 8/8
100% Branches 9/9
100% Functions 1/1
100% Lines 7/7

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71                    1x                           1x               1x                                             19x         16x         6x     10x    
/**
 * Data credibility checks for EP API values.
 *
 * Used by `scripts/generate-stats.ts --update` to prevent overwriting
 * curated statistics with incomplete EP API data.
 *
 * @module utils/credibilityCheck
 */
 
/** API values below this are suspicious when stored value is much larger. */
export const MIN_CREDIBLE_VALUE = 10;
 
/**
 * Maximum allowed percentage drop from stored value before the API value
 * is treated as incomplete/unreliable.
 *
 * EP API endpoints sometimes return partial datasets due to:
 * - Server-side pagination issues or timeouts
 * - Data reorganisation or migration
 * - Incomplete database loads
 *
 * Set to 50% to catch clearly incomplete data (e.g. 80% drops in speeches,
 * 73% drops in documents) while still allowing genuine corrections.
 */
export const MAX_ALLOWED_DROP_PERCENT = 50;
 
/**
 * Minimum stored value before the "significant drop" guard activates.
 *
 * Small stored values (≤ 100) are allowed to fluctuate freely since
 * even large percentage changes represent small absolute differences.
 */
export const MIN_STORED_FOR_DROP_CHECK = 100;
 
/**
 * Check whether an API value is credible enough to overwrite the stored value.
 *
 * Returns false when the API clearly returned incomplete data:
 *
 * **Guard 1 — tiny API value:** API value is below {@link MIN_CREDIBLE_VALUE}
 * AND stored value is much larger (> 5× the API value).
 *
 * **Guard 2 — significant drop:** Stored value is substantial
 * (> {@link MIN_STORED_FOR_DROP_CHECK}) AND the API value represents a drop
 * of more than {@link MAX_ALLOWED_DROP_PERCENT}% from stored. This catches
 * scenarios where the EP API returns a plausible-looking number (e.g. 1998
 * speeches) that is nonetheless far below the known count (10000), indicating
 * incomplete pagination or partial data loads.
 *
 * Both guards protect curated data from being overwritten by incomplete
 * EP API responses while still allowing genuine corrections (increases
 * and small decreases).
 */
export function isCredibleApiValue(apiValue: number, storedValue: number): boolean {
  // Guard 1: Very small API value when stored is much larger
  if (apiValue < MIN_CREDIBLE_VALUE && storedValue > apiValue * 5) return false;
 
  // Guard 2: Significant drop from a substantial stored value.
  // Increases are always trusted (API has more data than stored).
  // Only decreases beyond the threshold are flagged.
  if (
    storedValue > MIN_STORED_FOR_DROP_CHECK &&
    apiValue < storedValue &&
    ((storedValue - apiValue) / storedValue) * 100 > MAX_ALLOWED_DROP_PERCENT
  ) {
    return false;
  }
 
  return true;
}