// sniff_puppeteer_save.js // npm install puppeteer // node sniff_puppeteer_save.js const fs = require('fs').promises; const path = require('path'); const puppeteer = require('puppeteer'); //// CONFIG //// // Set to '' to disable prefix filtering const targetUrl = 'https://sushiscan.net/la-gardienne-des-concubines-volume-1/' const targetPrefix = 'https://c.sushiscan.net/wp-content/'; // Extensions to allow (set to [] to disable extension filtering) // include the leading dot, lowercase const extensions = ['.webp']; // e.g. ['.webp', '.jpg'] or [] for no extension filter // Where to save files const DATA_DIR = path.join(__dirname, 'data'); //// Helpers //// function hasExtension(url, exts) { if (!exts || exts.length === 0) return true; try { const p = new URL(url).pathname.toLowerCase(); return exts.some(ext => p.endsWith(ext.toLowerCase())); } catch (e) { return false; } } function lastPathSegment(url) { try { const p = new URL(url).pathname; // split by '/', filter out empty segments so trailing slash -> last segment before slash const segs = p.split('/').filter(Boolean); if (segs.length === 0) return 'index'; return segs[segs.length - 1]; } catch (e) { // fallback const noQuery = url.split('?')[0].split('#')[0]; const segs = noQuery.split('/').filter(Boolean); return segs.length ? segs[segs.length - 1] : 'index'; } } function sanitizeFilename(name) { // allow alphanum, dot, dash, underscore; replace others with underscore const maxLen = 200; let safe = name.replace(/[^a-zA-Z0-9.\-_]/g, '_'); if (safe.length > maxLen) safe = safe.slice(0, maxLen); // avoid filenames starting with dot (hidden) if (safe.startsWith('.')) safe = 'file' + safe; if (!safe) safe = 'index'; return safe; } async function uniqueFilename(dir, base) { // base already sanitized let candidate = base; let i = 0; while (true) { const full = path.join(dir, candidate); try { await fs.access(full); // exists -> bump i += 1; const ext = path.extname(base); const nameOnly = path.basename(base, ext); candidate = `${nameOnly}-${i}${ext}`; } catch (err) { // does not exist -> good return candidate; } } } async function ensureDataDir() { await fs.mkdir(DATA_DIR, { recursive: true }); } //// Main //// (async () => { await ensureDataDir(); const browser = await puppeteer.launch({ headless: false, // headless:true won't have a visible window executablePath: 'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe', // path to your Chrome defaultViewport: null, // disables the default small viewport args: [ '--start-maximized', '--disable-blink-features=AutomationControlled', // hides navigator.webdriver ], ignoreDefaultArgs: ['--enable-automation'], // remove automation flag }); const page = await browser.newPage(); page.on('response', async (response) => { const url = response.url(); try { if (targetPrefix && targetPrefix.length && !url.startsWith(targetPrefix)) return; if (!hasExtension(url, extensions)) return; // get last path segment const rawName = lastPathSegment(url); let filename = sanitizeFilename(rawName); // ensure we preserve extension if the path segment lacks it but content-type indicates one: const contentType = response.headers()['content-type'] || ''; const extFromPath = path.extname(filename); if (!extFromPath && contentType) { // minimal mapping for common image types; extend as needed if (contentType.includes('image/webp')) filename += '.webp'; else if (contentType.includes('image/png')) filename += '.png'; else if (contentType.includes('image/jpeg')) filename += '.jpg'; else if (contentType.includes('text/html')) filename += '.html'; else if (contentType.includes('application/json')) filename += '.json'; // else leave as-is } filename = await uniqueFilename(DATA_DIR, filename); const outPath = path.join(DATA_DIR, filename); // read body as buffer (works for binary and text) let buffer; try { buffer = await response.buffer(); } catch (err) { console.error('Could not read response body for', url, err.message); return; } // write file await fs.writeFile(outPath, buffer); // write metadata const meta = { url, status: response.status(), headers: response.headers(), savedAt: new Date().toISOString(), filename, }; await fs.writeFile(outPath + '.meta.json', JSON.stringify(meta, null, 2)); console.log(`Saved: ${outPath} (bytes: ${buffer.length})`); } catch (err) { console.error('Error handling response', url, err); } }); // optional: log requests too (for debugging) page.on('request', (req) => { // console.log('REQ', req.method(), req.url()); }); // navigate to a page that will produce the requests you want to capture // change to a real page that triggers the resources await page.goto("https://google.com", { waitUntil: 'networkidle2', timeout: 300_000 }); console.log('Listening for responses. Press Ctrl+C to stop.'); })();