Optimization strategies for maximum performance
Caching, rate limiting, query optimization, and monitoring
| Metric | Target | Acceptable | Critical |
|---|---|---|---|
| Cached response time | <1ms | <5ms | >10ms |
| API response time | <200ms | <500ms | >1000ms |
| Cache hit rate | >80% | >60% | <40% |
| Throughput (cached) | >10,000 req/s | >5,000 req/s | <1,000 req/s |
| Throughput (API) | >5 req/s | >2 req/s | <1 req/s |
| Memory usage | <256MB | <512MB | >1GB |
Optimal Settings:
const cache = new LRUCache<string, any>({
max: 500, // Maximum entries
ttl: 15 * 60 * 1000, // 15 minutes
allowStale: false, // Don't return stale data
updateAgeOnGet: false // Don't refresh TTL on access
});
Cache Size Calculation:
// Estimate memory usage
const avgEntrySize = 5 * 1024; // 5KB per entry
const maxEntries = 500;
const estimatedMemory = avgEntrySize * maxEntries / (1024 * 1024);
console.log(`Estimated cache memory: ${estimatedMemory}MB`);
Good cache keys:
// Unique and deterministic
function getCacheKey(method: string, params: Record<string, any>): string {
const sorted = Object.keys(params)
.sort()
.reduce((acc, key) => {
acc[key] = params[key];
return acc;
}, {} as Record<string, any>);
return `${method}:${JSON.stringify(sorted)}`;
}
// Example keys
getCacheKey('getMEPs', { country: 'SE', limit: 50 });
// => "getMEPs:{"country":"SE","limit":50}"
getCacheKey('getMEPs', { limit: 50, country: 'SE' });
// => "getMEPs:{"country":"SE","limit":50}" (same key!)
Time-based (TTL):
// Set per-entry TTL
cache.set('key', value, { ttl: 10 * 60 * 1000 }); // 10 minutes
// Or use default TTL from config
cache.set('key', value); // Uses default 15 minutes
Manual invalidation:
// Clear specific entry
cache.delete('key');
// Clear all entries matching pattern
for (const key of cache.keys()) {
if (key.startsWith('getMEPs:')) {
cache.delete(key);
}
}
// Clear entire cache
cache.clear();
Event-based invalidation:
// Invalidate when data changes
async function updateMEP(mepId: string, data: any) {
await epClient.updateMEP(mepId, data);
// Invalidate related cache entries
cache.delete(`mep:${mepId}`);
// Invalidate list caches that might include this MEP
for (const key of cache.keys()) {
if (key.startsWith('meps:')) {
cache.delete(key);
}
}
}
// Monitor cache statistics
setInterval(() => {
console.log({
size: cache.size,
max: cache.max,
hitRate: calculateHitRate()
});
}, 60000); // Every minute
let hits = 0;
let misses = 0;
function calculateHitRate(): number {
const total = hits + misses;
return total > 0 ? (hits / total) * 100 : 0;
}
// Track hits/misses
function get(key: string) {
const value = cache.get(key);
if (value !== undefined) {
hits++;
} else {
misses++;
}
return value;
}
Configuration:
class RateLimiter {
constructor(
private maxTokens: number = 100,
private refillRate: number = 100, // tokens per window
private windowMs: number = 15 * 60 * 1000 // 15 minutes
) {}
async tryRemoveTokens(count: number = 1): Promise<boolean> {
this.refill();
if (this.tokens >= count) {
this.tokens -= count;
return true;
}
return false;
}
private refill() {
const now = Date.now();
const elapsed = now - this.lastRefill;
const tokensToAdd = (elapsed / this.windowMs) * this.refillRate;
this.tokens = Math.min(this.maxTokens, this.tokens + tokensToAdd);
this.lastRefill = now;
}
}
Client-side throttling:
class RequestQueue {
private queue: Array<() => Promise<any>> = [];
private processing = false;
private readonly delayMs = 1000; // 1 request per second
async enqueue<T>(fn: () => Promise<T>): Promise<T> {
return new Promise((resolve, reject) => {
this.queue.push(async () => {
try {
const result = await fn();
resolve(result);
} catch (error) {
reject(error);
}
});
if (!this.processing) {
this.process();
}
});
}
private async process() {
this.processing = true;
while (this.queue.length > 0) {
const fn = this.queue.shift();
if (fn) {
await fn();
await new Promise(resolve => setTimeout(resolve, this.delayMs));
}
}
this.processing = false;
}
}
Combine multiple requests:
// Bad: Sequential individual requests
async function getMEPsFromCountries(countries: string[]) {
const results = [];
for (const country of countries) {
const meps = await client.callTool('get_meps', { country });
results.push(meps);
}
return results;
}
// Good: Parallel batch with concurrency limit
async function getMEPsFromCountriesBatch(
countries: string[],
concurrency: number = 5
) {
const results = [];
for (let i = 0; i < countries.length; i += concurrency) {
const batch = countries.slice(i, i + concurrency);
const promises = batch.map(country =>
client.callTool('get_meps', { country })
);
const batchResults = await Promise.all(promises);
results.push(...batchResults);
}
return results;
}
Use appropriate page sizes:
// Too small: Many requests, high overhead
const result1 = await client.callTool('get_meps', { limit: 10 });
// Optimal: Balance between response size and requests
const result2 = await client.callTool('get_meps', { limit: 50 });
// Too large: Large payloads, slow parsing
const result3 = await client.callTool('get_meps', { limit: 100 });
Efficient pagination:
async function getAllMEPs(): Promise<MEP[]> {
const allMEPs: MEP[] = [];
let offset = 0;
const limit = 50;
while (true) {
const result = await client.callTool('get_meps', { limit, offset });
const data = JSON.parse(result.content[0].text);
allMEPs.push(...data.data);
// Stop if we got fewer results than requested
if (data.data.length < limit) {
break;
}
offset += limit;
}
return allMEPs;
}
Push filters to API:
// Bad: Fetch all, filter locally
const allMEPs = await client.callTool('get_meps', { limit: 100 });
const swedishMEPs = allMEPs.data.filter(mep => mep.country === 'SE');
// Good: Filter at API level
const swedishMEPs = await client.callTool('get_meps', {
country: 'SE',
limit: 50
});
Combine independent queries:
// Bad: Sequential
const meps = await client.callTool('get_meps', { country: 'SE' });
const sessions = await client.callTool('get_plenary_sessions', {});
const committee = await client.callTool('get_committee_info', {
abbreviation: 'ENVI'
});
// Good: Parallel
const [meps, sessions, committee] = await Promise.all([
client.callTool('get_meps', { country: 'SE' }),
client.callTool('get_plenary_sessions', {}),
client.callTool('get_committee_info', { abbreviation: 'ENVI' })
]);
MetricsService usage:
import { MetricsService } from '../services/MetricsService.js';
const metrics = new MetricsService();
// Counter: Total requests
metrics.incrementCounter('tool_requests_total', {
tool: 'get_meps',
status: 'success'
});
// Gauge: Cache size
metrics.setGauge('cache_size', cache.size);
// Histogram: Response times
const start = Date.now();
const result = await fetchData();
const duration = Date.now() - start;
metrics.observeHistogram('response_time_ms', duration, { tool: 'get_meps' });
Track these metrics:
// Response time percentiles
const p50 = metrics.getPercentile('response_time_ms', 50);
const p95 = metrics.getPercentile('response_time_ms', 95);
const p99 = metrics.getPercentile('response_time_ms', 99);
console.log(`Response times: P50=${p50}ms, P95=${p95}ms, P99=${p99}ms`);
// Cache performance
const cacheHits = metrics.getCounter('cache_hits');
const cacheMisses = metrics.getCounter('cache_misses');
const hitRate = (cacheHits / (cacheHits + cacheMisses)) * 100;
console.log(`Cache hit rate: ${hitRate.toFixed(2)}%`);
// Throughput
const requests = metrics.getCounter('tool_requests_total');
const duration = 60; // seconds
const throughput = requests / duration;
console.log(`Throughput: ${throughput.toFixed(2)} req/s`);
Structured logging:
import { logger } from './utils/logger.js';
// Log slow requests
const threshold = 500; // ms
if (duration > threshold) {
logger.warn('Slow request detected', {
tool: 'get_meps',
duration,
params: { country: 'SE' }
});
}
// Log cache statistics
logger.info('Cache statistics', {
size: cache.size,
hitRate,
evictions: metrics.getCounter('cache_evictions')
});
Cached request benchmark:
// tests/performance/cached-requests.test.ts
import { describe, it, expect } from 'vitest';
describe('Cached Request Performance', () => {
it('should respond in <1ms for cached requests', async () => {
// Warm cache
await client.callTool('get_meps', { country: 'SE' });
// Measure cached response
const start = performance.now();
await client.callTool('get_meps', { country: 'SE' });
const duration = performance.now() - start;
expect(duration).toBeLessThan(1); // <1ms
});
});
Concurrent request benchmark:
it('should handle 50 concurrent requests', async () => {
const requests = Array(50).fill(null).map((_, i) =>
client.callTool('get_meps', {
country: 'SE',
offset: i * 10,
limit: 10
})
);
const start = performance.now();
await Promise.all(requests);
const duration = performance.now() - start;
expect(duration).toBeLessThan(10000); // <10s for 50 requests
});
Using autocannon:
# Install autocannon
npm install -g autocannon
# Basic load test
autocannon -c 10 -d 30 http://localhost:3000/tools/get_meps
# Results:
# Latency:
# Avg: 125ms
# Stdev: 45ms
# Max: 250ms
# Req/Sec: 80
# Total: 2400 requests
Using k6:
// load-test.js
import http from 'k6/http';
import { check, sleep } from 'k6';
export const options = {
vus: 10,
duration: '30s',
};
export default function () {
const payload = JSON.stringify({
jsonrpc: '2.0',
method: 'tools/call',
params: {
name: 'get_meps',
arguments: { country: 'SE' }
},
id: 1
});
const res = http.post('http://localhost:3000', payload, {
headers: { 'Content-Type': 'application/json' },
});
check(res, {
'status is 200': (r) => r.status === 200,
'response time < 200ms': (r) => r.timings.duration < 200,
});
sleep(1);
}
Diagnosis:
// Add timing logs
console.time('total');
console.time('validation');
const params = schema.parse(args);
console.timeEnd('validation'); // ~1-3ms
console.time('cache_check');
const cached = cache.get(key);
console.timeEnd('cache_check'); // <1ms
console.time('api_call');
const data = await epClient.getData(params);
console.timeEnd('api_call'); // ~150-200ms
console.timeEnd('total');
Solutions:
Diagnosis:
# Monitor memory
node --expose-gc dist/index.js &
PID=$!
while true; do
ps -p $PID -o rss,vsz,pmem,cmd
sleep 5
done
Solutions:
// 1. Reduce cache size
const cache = new LRUCache({ max: 100 }); // Reduce from 500
// 2. Use shorter TTL
const cache = new LRUCache({
max: 500,
ttl: 5 * 60 * 1000 // 5 minutes instead of 15
});
// 3. Manual garbage collection
if (global.gc) {
setInterval(() => {
global.gc();
}, 60000); // Every minute
}
Diagnosis:
// Track cache statistics
let hits = 0;
let misses = 0;
const originalGet = cache.get.bind(cache);
cache.get = (key: string) => {
const value = originalGet(key);
if (value !== undefined) hits++;
else misses++;
return value;
};
setInterval(() => {
const total = hits + misses;
const hitRate = total > 0 ? (hits / total) * 100 : 0;
console.log(`Cache hit rate: ${hitRate.toFixed(2)}%`);
}, 10000);
Solutions:
Built with ❤️ by Hack23 AB
ISMS-compliant performance optimization demonstrating excellence