When You Need Screenshots at Scale
Some use cases demand thousands or tens of thousands of screenshots: monitoring a large portfolio of websites, generating thumbnails for a directory, creating visual archives, or running visual regression tests across hundreds of pages.
Processing this volume requires more than a simple for-loop. You need concurrency control, error handling, rate limiting, and progress tracking.
The Naive Approach (Don’t Do This)
// ❌ Sequential — painfully slow
for (const url of urls) {
const screenshot = await takeScreenshot(url);
saveScreenshot(screenshot);
}
// 10,000 URLs × 3 seconds each = 8+ hours
The Right Approach: Controlled Concurrency
Node.js with p-limit
const axios = require('axios');
const pLimit = require('p-limit');
const fs = require('fs');
const API_KEY = process.env.DEVTOOLBOX_API_KEY;
const CONCURRENCY = 10; // Parallel requests
const limit = pLimit(CONCURRENCY);
async function takeScreenshot(url, retries = 3) {
for (let attempt = 1; attempt <= retries; attempt++) {
try {
const response = await axios.post(
'https://api.toolcenter.dev/v1/screenshot',
{ url, width: 1280, height: 800, format: 'png' },
{
headers: { 'Authorization': `Bearer ${API_KEY}` },
responseType: 'arraybuffer',
timeout: 30000,
}
);
return { url, success: true, data: response.data };
} catch (error) {
if (attempt === retries) {
return { url, success: false, error: error.message };
}
// Exponential backoff
await sleep(Math.pow(2, attempt) * 1000);
}
}
}
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function processUrls(urls) {
let completed = 0;
const results = await Promise.all(
urls.map(url =>
limit(async () => {
const result = await takeScreenshot(url);
completed++;
if (completed % 100 === 0) {
console.log(`Progress: ${completed}/${urls.length} (${((completed/urls.length)*100).toFixed(1)}%)`);
}
if (result.success) {
const filename = urlToFilename(url);
fs.writeFileSync(`./screenshots/${filename}`, result.data);
}
return result;
})
)
);
const succeeded = results.filter(r => r.success).length;
const failed = results.filter(r => !r.success).length;
console.log(`\nComplete: ${succeeded} succeeded, ${failed} failed`);
return results;
}
function urlToFilename(url) {
return url.replace(/https?:\/\//, '').replace(/[^a-zA-Z0-9]/g, '_').slice(0, 100) + '.png';
}
Python with asyncio
import asyncio
import aiohttp
import os
from urllib.parse import urlparse
API_KEY = os.environ['DEVTOOLBOX_API_KEY']
CONCURRENCY = 10
semaphore = asyncio.Semaphore(CONCURRENCY)
async def take_screenshot(session, url, retries=3):
async with semaphore:
for attempt in range(retries):
try:
async with session.post(
'https://api.toolcenter.dev/v1/screenshot',
json={'url': url, 'width': 1280, 'height': 800, 'format': 'png'},
headers={'Authorization': f'Bearer {API_KEY}'},
timeout=aiohttp.ClientTimeout(total=30)
) as response:
if response.status == 200:
data = await response.read()
return {'url': url, 'success': True, 'data': data}
elif response.status == 429:
# Rate limited — wait and retry
await asyncio.sleep(2 ** (attempt + 1))
continue
else:
return {'url': url, 'success': False, 'error': f'HTTP {response.status}'}
except Exception as e:
if attempt == retries - 1:
return {'url': url, 'success': False, 'error': str(e)}
await asyncio.sleep(2 ** attempt)
async def process_urls(urls):
os.makedirs('screenshots', exist_ok=True)
completed = 0
async with aiohttp.ClientSession() as session:
tasks = [take_screenshot(session, url) for url in urls]
results = []
for coro in asyncio.as_completed(tasks):
result = await coro
completed += 1
if result['success']:
filename = url_to_filename(result['url'])
with open(f'screenshots/{filename}', 'wb') as f:
f.write(result['data'])
if completed % 100 == 0:
print(f'Progress: {completed}/{len(urls)}')
results.append(result)
succeeded = sum(1 for r in results if r['success'])
print(f'Done: {succeeded}/{len(urls)} succeeded')
return results
def url_to_filename(url):
parsed = urlparse(url)
name = f"{parsed.netloc}{parsed.path}".replace('/', '_')[:100]
return f"{name}.png"
# Run it
urls = open('urls.txt').read().strip().split('\n')
asyncio.run(process_urls(urls))
Rate Limiting and Backoff
Respect API rate limits to avoid getting blocked:
class RateLimiter {
constructor(maxRequests, windowMs) {
this.maxRequests = maxRequests;
this.windowMs = windowMs;
this.requests = [];
}
async waitForSlot() {
const now = Date.now();
this.requests = this.requests.filter(t => t > now - this.windowMs);
if (this.requests.length >= this.maxRequests) {
const oldestExpiry = this.requests[0] + this.windowMs;
const waitTime = oldestExpiry - now;
await sleep(waitTime);
}
this.requests.push(Date.now());
}
}
// 100 requests per minute
const rateLimiter = new RateLimiter(100, 60000);
async function rateLimitedScreenshot(url) {
await rateLimiter.waitForSlot();
return takeScreenshot(url);
}
Resumable Processing
For very large batches, save progress to resume after failures:
const fs = require('fs');
class BatchProcessor {
constructor(progressFile = 'progress.json') {
this.progressFile = progressFile;
this.progress = this.loadProgress();
}
loadProgress() {
try {
return JSON.parse(fs.readFileSync(this.progressFile, 'utf-8'));
} catch {
return { completed: [], failed: [] };
}
}
saveProgress() {
fs.writeFileSync(this.progressFile, JSON.stringify(this.progress, null, 2));
}
async process(urls) {
const remaining = urls.filter(
url => !this.progress.completed.includes(url)
);
console.log(`${remaining.length} URLs remaining (${this.progress.completed.length} already done)`);
for (const url of remaining) {
const result = await takeScreenshot(url);
if (result.success) {
this.progress.completed.push(url);
} else {
this.progress.failed.push({ url, error: result.error });
}
// Save progress every 50 URLs
if ((this.progress.completed.length + this.progress.failed.length) % 50 === 0) {
this.saveProgress();
}
}
this.saveProgress();
}
}
Storing Results Efficiently
Upload to S3
const { S3Client, PutObjectCommand } = require('@aws-sdk/client-s3');
const s3 = new S3Client({ region: 'us-east-1' });
async function uploadToS3(key, data) {
await s3.send(new PutObjectCommand({
Bucket: 'my-screenshots-bucket',
Key: `screenshots/${key}`,
Body: data,
ContentType: 'image/png',
}));
}
Compress Before Storage
const sharp = require('sharp');
async function compressScreenshot(pngBuffer) {
return sharp(pngBuffer)
.resize(1280, 800, { fit: 'inside' })
.webp({ quality: 80 })
.toBuffer();
}
Monitoring and Alerting
Track your batch processing metrics:
class BatchMetrics {
constructor() {
this.startTime = Date.now();
this.succeeded = 0;
this.failed = 0;
this.totalBytes = 0;
}
record(result) {
if (result.success) {
this.succeeded++;
this.totalBytes += result.data.length;
} else {
this.failed++;
}
}
summary() {
const elapsed = (Date.now() - this.startTime) / 1000;
const total = this.succeeded + this.failed;
return {
total,
succeeded: this.succeeded,
failed: this.failed,
successRate: `${((this.succeeded / total) * 100).toFixed(1)}%`,
elapsed: `${elapsed.toFixed(0)}s`,
rate: `${(total / elapsed * 60).toFixed(0)} screenshots/min`,
totalSize: `${(this.totalBytes / 1024 / 1024).toFixed(1)} MB`,
};
}
}
Performance Tips
- Tune concurrency — Start with 10 parallel requests and increase until you hit rate limits
- Use WebP format — 30-50% smaller than PNG with minimal quality loss
- Skip full-page — Viewport-only screenshots are faster than full-page
- Batch by domain — Group URLs by domain to benefit from connection reuse
- Use regional endpoints — Choose an API region closest to your target sites
Conclusion
Processing screenshots at scale requires controlled concurrency, robust error handling, and efficient storage. The ToolCenter handles the rendering complexity — your job is to orchestrate requests efficiently. With the patterns in this guide, you can process tens of thousands of screenshots reliably, whether it’s a one-time batch or a recurring pipeline.