{"id":130,"date":"2020-12-19T17:15:37","date_gmt":"2020-12-19T08:15:37","guid":{"rendered":"http:\/\/localhost:8000\/?p=130"},"modified":"2021-04-16T09:16:14","modified_gmt":"2021-04-16T00:16:14","slug":"puppeteer-file-download","status":"publish","type":"post","link":"http:\/\/localhost:8000\/2020\/12\/puppeteer-file-download.html","title":{"rendered":"puppeteer\u3067\u30d5\u30a1\u30a4\u30eb\u3092\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9"},"content":{"rendered":"

\u3053\u306e\u8a18\u4e8b\u306f Node.js Advent Calendar 2020<\/a> \u306e\uff12\u65e5\u76ee\u306e\u8a18\u4e8b\u3067\u3059\u3002<\/strong><\/p>\n

puppeteer<\/a> \u3067\u30d5\u30a1\u30a4\u30eb\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3059\u308b\u65b9\u6cd5\u306f\u3053\u3061\u3089\u306e issue<\/a> \u304c\u307e\u3060open\u306a\u3053\u3068\u304b\u3089\u3082\u5206\u304b\u308b\u3088\u3046\u306b\u3001\u3059\u3093\u306a\u308a\u5b9f\u73fe\u3067\u304d\u308b\u516c\u5f0f\u306e\u65b9\u6cd5\u306f\u306a\u3055\u305d\u3046\u3067\u3059\u3002
\n\u3068\u306f\u3044\u3063\u3066\u3082\u3001\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u3092\u3057\u3066\u3066\u3001\u30d5\u30a1\u30a4\u30eb\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3057\u305f\u3044\u30b1\u30fc\u30b9\u306f\u3042\u308b\u308f\u3051\u3067\u3001\u5b9f\u969b\u306b\u8a66\u3057\u3066\u307f\u3088\u3046\u3068\u601d\u3044\u307e\u3059\u3002<\/p>\n

\u524d\u6e96\u5099<\/h2>\n

\u3068\u308a\u3042\u3048\u305a\u3001puppeteer\u3092\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3057\u307e\u3059\u3002puppeteer\u4ee5\u5916\u306b\u5fc5\u8981\u306a\u30e9\u30a4\u30d6\u30e9\u30ea\u306f\u3001\u500b\u5225\u306b\u8ffd\u8a18\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n

npm install puppeteer --save<\/code><\/pre>\n

\u5b9f\u884c\u65b9\u6cd5<\/h2>\n

\u30b5\u30f3\u30d7\u30eb\u30bd\u30fc\u30b9\u3092\u9069\u5f53\u306a\u540d\u524d\u306e\u30d5\u30a1\u30a4\u30eb\uff08\u305f\u3068\u3048\u3070sample.js\uff09\u306b\u30b3\u30d4\u30da\u3057\u3066node\u30b3\u30de\u30f3\u30c9\u3067\u5b9f\u884c\u3059\u308b\u3060\u3051\u3067\u3001\u5b9f\u969b\u306b\u52d5\u304b\u3059\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n

node sample.js<\/code><\/pre>\n

\u30d5\u30a1\u30a4\u30eb\u3092\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3059\u308b<\/h2>\n

\u65b9\u6cd51\u3010\u25b3\u3011: \u5358\u7d14\u306b\u30d6\u30e9\u30a6\u30b6\u306e\u30d5\u30a1\u30a4\u30eb\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u51e6\u7406\u3092\u5b9f\u884c<\/h3>\n

Chromium\u306ePage.setDownloadBehavior<\/a>\u30e1\u30bd\u30c3\u30c9\u3092\u5229\u7528\u3057\u3066\u3001\u30d6\u30e9\u30a6\u30b6\u306e\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u51e6\u7406\u3092\u8a31\u53ef\u3059\u308b\u65b9\u6cd5\u3067\u3059\u3002
\n\u6982\u306d\u3046\u307e\u304f\u52d5\u304d\u307e\u3059\u304c\u3001\u30d6\u30e9\u30a6\u30b6\u306e\u30d3\u30e5\u30fc\u30a2\u30fc\u3067\u8868\u793a\u3059\u308b\u30b1\u30fc\u30b9\u306f\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3067\u304d\u307e\u305b\u3093\u3002<\/u><\/p>\n

const puppeteer = require('puppeteer');\n\n(async () => {\n    async function sleep(msec) {\n        setTimeout(() => { }, msec);\n    }\n    async function download(pageUrl, selector) {\n        const browser = await puppeteer.launch();\n        const page = await browser.newPage();\n        await page.goto(pageUrl);\n        client = await page.target().createCDPSession();\n        client.send('Page.setDownloadBehavior', {\n            behavior: 'allow', \/\/ \u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3092\u8a31\u53ef\n            downloadPath: 'downloads', \/\/ \u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u5148\u306e\u30d5\u30a9\u30eb\u30c0\u3092\u6307\u5b9a\n        });\n        await page.click(selector);\n        await sleep(5000); \/\/ \u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u5b8c\u4e86\u3092\u5f85\u3064\n        await browser.close();\n    }\n    await download('https:\/\/www.soumu.go.jp\/toukei_toukatsu\/index\/seido\/9-5.htm', 'a[href*=".csv"]'); \/\/ OK\n    await download('https:\/\/www.soumu.go.jp\/toukei_toukatsu\/index\/seido\/9-5.htm', 'a[href*=".pdf"]'); \/\/ OK\n    await download('https:\/\/pdf-xml-download-test.vercel.app\/', '#link-pdf'); \/\/ OK\n    await download('https:\/\/pdf-xml-download-test.vercel.app\/', '#link-xml'); \/\/ NG\n})();<\/code><\/pre>\n

\u65b9\u6cd52\u3010\u25cb\u3011: \u5f37\u5236\u7684\u306b\u30d6\u30e9\u30a6\u30b6\u306e\u30d5\u30a1\u30a4\u30eb\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u51e6\u7406\u3092\u5b9f\u884c<\/h3>\n

\u3053\u3061\u3089<\/a>\u306b\u8a18\u8f09\u3055\u308c\u3066\u3044\u308b\u65b9\u6cd5\u3092\u53c2\u8003\u306b\u3001\u65b9\u6cd51\u3092\u6539\u826f\u3057\u3066\u3044\u307e\u3059\u3002
\n
Fetch<\/a>\u3092\u5229\u7528\u3057\u3066\u3001xml\u3084pdf\u306e\u5834\u5408\u306fResponseHeader\u306bcontent-disposition: attachment<\/code>\u3092\u8ffd\u52a0\u3059\u308b\u3053\u3068\u3067\u3001\u7121\u7406\u3084\u308a\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3055\u305b\u308b\u65b9\u6cd5\u3067\u3059\u3002
\n\u3053\u306e\u65b9\u6cd5\u306f\u3044\u304f\u3064\u304b\u306e\u30b5\u30a4\u30c8\u3067\u8a66\u3057\u3066\u898b\u305f\u3068\u3053\u308d\u554f\u984c\u306a\u304f\u52d5\u3044\u3066\u304f\u308c\u307e\u3057\u305f\u304c\u3001\uff12\u70b9\u307b\u3069\u554f\u984c\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n