Process Thousands of Screenshots: Bulk API Guide

When You Need Screenshots at Scale

Some use cases demand thousands or tens of thousands of screenshots: monitoring a large portfolio of websites, generating thumbnails for a directory, creating visual archives, or running visual regression tests across hundreds of pages.

Processing this volume requires more than a simple for-loop. You need concurrency control, error handling, rate limiting, and progress tracking.

The Naive Approach (Don’t Do This)

// ❌ Sequential — painfully slow
for (const url of urls) {
  const screenshot = await takeScreenshot(url);
  saveScreenshot(screenshot);
}
// 10,000 URLs × 3 seconds each = 8+ hours

The Right Approach: Controlled Concurrency

Node.js with p-limit

const axios = require('axios');
const pLimit = require('p-limit');
const fs = require('fs');

const API_KEY = process.env.DEVTOOLBOX_API_KEY;
const CONCURRENCY = 10; // Parallel requests
const limit = pLimit(CONCURRENCY);

async function takeScreenshot(url, retries = 3) {
  for (let attempt = 1; attempt <= retries; attempt++) {
    try {
      const response = await axios.post(
        'https://api.toolcenter.dev/v1/screenshot',
        { url, width: 1280, height: 800, format: 'png' },
        {
          headers: { 'Authorization': `Bearer ${API_KEY}` },
          responseType: 'arraybuffer',
          timeout: 30000,
        }
      );
      return { url, success: true, data: response.data };
    } catch (error) {
      if (attempt === retries) {
        return { url, success: false, error: error.message };
      }
      // Exponential backoff
      await sleep(Math.pow(2, attempt) * 1000);
    }
  }
}

function sleep(ms) {
  return new Promise(resolve => setTimeout(resolve, ms));
}

async function processUrls(urls) {
  let completed = 0;

  const results = await Promise.all(
    urls.map(url =>
      limit(async () => {
        const result = await takeScreenshot(url);
        completed++;

        if (completed % 100 === 0) {
          console.log(`Progress: ${completed}/${urls.length} (${((completed/urls.length)*100).toFixed(1)}%)`);
        }

        if (result.success) {
          const filename = urlToFilename(url);
          fs.writeFileSync(`./screenshots/${filename}`, result.data);
        }

        return result;
      })
    )
  );

  const succeeded = results.filter(r => r.success).length;
  const failed = results.filter(r => !r.success).length;

  console.log(`\nComplete: ${succeeded} succeeded, ${failed} failed`);
  return results;
}

function urlToFilename(url) {
  return url.replace(/https?:\/\//, '').replace(/[^a-zA-Z0-9]/g, '_').slice(0, 100) + '.png';
}

Python with asyncio

import asyncio
import aiohttp
import os
from urllib.parse import urlparse

API_KEY = os.environ['DEVTOOLBOX_API_KEY']
CONCURRENCY = 10
semaphore = asyncio.Semaphore(CONCURRENCY)

async def take_screenshot(session, url, retries=3):
    async with semaphore:
        for attempt in range(retries):
            try:
                async with session.post(
                    'https://api.toolcenter.dev/v1/screenshot',
                    json={'url': url, 'width': 1280, 'height': 800, 'format': 'png'},
                    headers={'Authorization': f'Bearer {API_KEY}'},
                    timeout=aiohttp.ClientTimeout(total=30)
                ) as response:
                    if response.status == 200:
                        data = await response.read()
                        return {'url': url, 'success': True, 'data': data}
                    elif response.status == 429:
                        # Rate limited — wait and retry
                        await asyncio.sleep(2 ** (attempt + 1))
                        continue
                    else:
                        return {'url': url, 'success': False, 'error': f'HTTP {response.status}'}
            except Exception as e:
                if attempt == retries - 1:
                    return {'url': url, 'success': False, 'error': str(e)}
                await asyncio.sleep(2 ** attempt)

async def process_urls(urls):
    os.makedirs('screenshots', exist_ok=True)
    completed = 0

    async with aiohttp.ClientSession() as session:
        tasks = [take_screenshot(session, url) for url in urls]
        results = []

        for coro in asyncio.as_completed(tasks):
            result = await coro
            completed += 1

            if result['success']:
                filename = url_to_filename(result['url'])
                with open(f'screenshots/{filename}', 'wb') as f:
                    f.write(result['data'])

            if completed % 100 == 0:
                print(f'Progress: {completed}/{len(urls)}')

            results.append(result)

    succeeded = sum(1 for r in results if r['success'])
    print(f'Done: {succeeded}/{len(urls)} succeeded')
    return results

def url_to_filename(url):
    parsed = urlparse(url)
    name = f"{parsed.netloc}{parsed.path}".replace('/', '_')[:100]
    return f"{name}.png"

# Run it
urls = open('urls.txt').read().strip().split('\n')
asyncio.run(process_urls(urls))

Rate Limiting and Backoff

Respect API rate limits to avoid getting blocked:

class RateLimiter {
  constructor(maxRequests, windowMs) {
    this.maxRequests = maxRequests;
    this.windowMs = windowMs;
    this.requests = [];
  }

  async waitForSlot() {
    const now = Date.now();
    this.requests = this.requests.filter(t => t > now - this.windowMs);

    if (this.requests.length >= this.maxRequests) {
      const oldestExpiry = this.requests[0] + this.windowMs;
      const waitTime = oldestExpiry - now;
      await sleep(waitTime);
    }

    this.requests.push(Date.now());
  }
}

// 100 requests per minute
const rateLimiter = new RateLimiter(100, 60000);

async function rateLimitedScreenshot(url) {
  await rateLimiter.waitForSlot();
  return takeScreenshot(url);
}

Resumable Processing

For very large batches, save progress to resume after failures:

const fs = require('fs');

class BatchProcessor {
  constructor(progressFile = 'progress.json') {
    this.progressFile = progressFile;
    this.progress = this.loadProgress();
  }

  loadProgress() {
    try {
      return JSON.parse(fs.readFileSync(this.progressFile, 'utf-8'));
    } catch {
      return { completed: [], failed: [] };
    }
  }

  saveProgress() {
    fs.writeFileSync(this.progressFile, JSON.stringify(this.progress, null, 2));
  }

  async process(urls) {
    const remaining = urls.filter(
      url => !this.progress.completed.includes(url)
    );

    console.log(`${remaining.length} URLs remaining (${this.progress.completed.length} already done)`);

    for (const url of remaining) {
      const result = await takeScreenshot(url);

      if (result.success) {
        this.progress.completed.push(url);
      } else {
        this.progress.failed.push({ url, error: result.error });
      }

      // Save progress every 50 URLs
      if ((this.progress.completed.length + this.progress.failed.length) % 50 === 0) {
        this.saveProgress();
      }
    }

    this.saveProgress();
  }
}

Storing Results Efficiently

Upload to S3

const { S3Client, PutObjectCommand } = require('@aws-sdk/client-s3');

const s3 = new S3Client({ region: 'us-east-1' });

async function uploadToS3(key, data) {
  await s3.send(new PutObjectCommand({
    Bucket: 'my-screenshots-bucket',
    Key: `screenshots/${key}`,
    Body: data,
    ContentType: 'image/png',
  }));
}

Compress Before Storage

const sharp = require('sharp');

async function compressScreenshot(pngBuffer) {
  return sharp(pngBuffer)
    .resize(1280, 800, { fit: 'inside' })
    .webp({ quality: 80 })
    .toBuffer();
}

Monitoring and Alerting

Track your batch processing metrics:

class BatchMetrics {
  constructor() {
    this.startTime = Date.now();
    this.succeeded = 0;
    this.failed = 0;
    this.totalBytes = 0;
  }

  record(result) {
    if (result.success) {
      this.succeeded++;
      this.totalBytes += result.data.length;
    } else {
      this.failed++;
    }
  }

  summary() {
    const elapsed = (Date.now() - this.startTime) / 1000;
    const total = this.succeeded + this.failed;
    return {
      total,
      succeeded: this.succeeded,
      failed: this.failed,
      successRate: `${((this.succeeded / total) * 100).toFixed(1)}%`,
      elapsed: `${elapsed.toFixed(0)}s`,
      rate: `${(total / elapsed * 60).toFixed(0)} screenshots/min`,
      totalSize: `${(this.totalBytes / 1024 / 1024).toFixed(1)} MB`,
    };
  }
}

Performance Tips

Tune concurrency — Start with 10 parallel requests and increase until you hit rate limits
Use WebP format — 30-50% smaller than PNG with minimal quality loss
Skip full-page — Viewport-only screenshots are faster than full-page
Batch by domain — Group URLs by domain to benefit from connection reuse
Use regional endpoints — Choose an API region closest to your target sites

Conclusion

Processing screenshots at scale requires controlled concurrency, robust error handling, and efficient storage. The ToolCenter handles the rendering complexity — your job is to orchestrate requests efficiently. With the patterns in this guide, you can process tens of thousands of screenshots reliably, whether it’s a one-time batch or a recurring pipeline.

When You Need Screenshots at Scale#

The Naive Approach (Don’t Do This)#

The Right Approach: Controlled Concurrency#

Node.js with p-limit#

Python with asyncio#

Rate Limiting and Backoff#

Resumable Processing#

Storing Results Efficiently#

Upload to S3#

Compress Before Storage#

Monitoring and Alerting#

Performance Tips#

Conclusion#