From 7dc4106064cecb952a7d0cb1cd88b757809b6ab5 Mon Sep 17 00:00:00 2001 From: Robin van der Linde Date: Thu, 15 Feb 2024 21:20:52 +0100 Subject: [PATCH] Add some handling for pages --- scripts/downloadCommons.ts | 59 +++++++++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/scripts/downloadCommons.ts b/scripts/downloadCommons.ts index 78cb8e217c..22391f7f94 100644 --- a/scripts/downloadCommons.ts +++ b/scripts/downloadCommons.ts @@ -69,6 +69,30 @@ interface CategoryQueryAPIResponse { } } +interface ImagesQueryAPIResponse { + continue: { + imcontinue: string + continue: string + } + query: { + normalized?: { + from: string + to: string + }[] + pages: { + [key: string]: { + pageid: number + ns: number + title: string + images?: { + ns: number + title: string + }[] + } + } + } +} + interface TemplateQueryAPIResponse { batchcomplete: string query: { @@ -102,7 +126,7 @@ async function main(args: string[]) { if (args.length < 2) { console.log("Usage: downloadCommons.ts .. ") console.log( - "Example: npx vite-node downloadCommons.ts -- assets/svg https://commons.wikimedia.org/wiki/File:Example.jpg" + "Example: npx vite-node scripts/downloadCommons.ts -- assets/svg https://commons.wikimedia.org/wiki/File:Example.jpg" ) process.exit(1) } @@ -128,8 +152,24 @@ async function main(args: string[]) { for (const member of apiDetails.query.categorymembers) { await downloadImage(member.title, outputFolder, baseUrl) } - } else { + } else if (url.includes("File:")) { await downloadImage(commonsFileName, outputFolder, baseUrl) + } else { + // Probably a page url, try to get all images from the page + const apiUrl = `${baseUrl}/w/api.php?action=query&format=json&prop=images&titles=${commonsFileName}&imlimit=250` + const response = await fetch(apiUrl) + const apiDetails: ImagesQueryAPIResponse = await response.json() + const page = apiDetails.query.pages[Object.keys(apiDetails.query.pages)[0]] + if (page.images) { + for (const image of page.images) { + await downloadImage(image.title, outputFolder, baseUrl) + } + } else { + console.log( + "\x1b[31m%s\x1b[0m", + `URL ${url} doesn't seem to contain any images! Skipping...` + ) + } } } else { console.log( @@ -154,6 +194,12 @@ async function downloadImage(filename: string, outputFolder: string, baseUrl: st const apiDetails: ImageQueryAPIResponse = await response.json() const missingPage = apiDetails.query.pages["-1"] + // Check if the local file already exists, if it does, skip it + if (existsSync(`${outputFolder}/${filename}`)) { + console.log(`\x1b[33m%s\x1b[0m`, `${filename} already exists, skipping...`) + return + } + // Check if the file exists, locally or externally if (missingPage !== undefined) { // Image does not exist locally, check if it exists externally @@ -271,8 +317,8 @@ async function downloadImage(filename: string, outputFolder: string, baseUrl: st // Save the license information const licenseInfo: SmallLicense = { path: cleanFileName, - license: licenseMapping[license] || license, - authors: [author], + license: licenseMapping[license] || license.replace("CC BY", "CC-BY"), + authors: [removeLinks(author)], sources: [wikiUrl], } @@ -293,4 +339,9 @@ async function downloadImage(filename: string, outputFolder: string, baseUrl: st } } +function removeLinks(text: string): string { + // Remove tags + return text.replace(/(.*?)<\/a>/g, "$1") +} + main(process.argv.slice(2))