forked from MapComplete/MapComplete
Add some handling for pages
This commit is contained in:
parent
e882436300
commit
7dc4106064
1 changed files with 55 additions and 4 deletions
|
@ -69,6 +69,30 @@ interface CategoryQueryAPIResponse {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface ImagesQueryAPIResponse {
|
||||||
|
continue: {
|
||||||
|
imcontinue: string
|
||||||
|
continue: string
|
||||||
|
}
|
||||||
|
query: {
|
||||||
|
normalized?: {
|
||||||
|
from: string
|
||||||
|
to: string
|
||||||
|
}[]
|
||||||
|
pages: {
|
||||||
|
[key: string]: {
|
||||||
|
pageid: number
|
||||||
|
ns: number
|
||||||
|
title: string
|
||||||
|
images?: {
|
||||||
|
ns: number
|
||||||
|
title: string
|
||||||
|
}[]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
interface TemplateQueryAPIResponse {
|
interface TemplateQueryAPIResponse {
|
||||||
batchcomplete: string
|
batchcomplete: string
|
||||||
query: {
|
query: {
|
||||||
|
@ -102,7 +126,7 @@ async function main(args: string[]) {
|
||||||
if (args.length < 2) {
|
if (args.length < 2) {
|
||||||
console.log("Usage: downloadCommons.ts <output folder> <url> <?url> <?url> .. ")
|
console.log("Usage: downloadCommons.ts <output folder> <url> <?url> <?url> .. ")
|
||||||
console.log(
|
console.log(
|
||||||
"Example: npx vite-node downloadCommons.ts -- assets/svg https://commons.wikimedia.org/wiki/File:Example.jpg"
|
"Example: npx vite-node scripts/downloadCommons.ts -- assets/svg https://commons.wikimedia.org/wiki/File:Example.jpg"
|
||||||
)
|
)
|
||||||
process.exit(1)
|
process.exit(1)
|
||||||
}
|
}
|
||||||
|
@ -128,8 +152,24 @@ async function main(args: string[]) {
|
||||||
for (const member of apiDetails.query.categorymembers) {
|
for (const member of apiDetails.query.categorymembers) {
|
||||||
await downloadImage(member.title, outputFolder, baseUrl)
|
await downloadImage(member.title, outputFolder, baseUrl)
|
||||||
}
|
}
|
||||||
} else {
|
} else if (url.includes("File:")) {
|
||||||
await downloadImage(commonsFileName, outputFolder, baseUrl)
|
await downloadImage(commonsFileName, outputFolder, baseUrl)
|
||||||
|
} else {
|
||||||
|
// Probably a page url, try to get all images from the page
|
||||||
|
const apiUrl = `${baseUrl}/w/api.php?action=query&format=json&prop=images&titles=${commonsFileName}&imlimit=250`
|
||||||
|
const response = await fetch(apiUrl)
|
||||||
|
const apiDetails: ImagesQueryAPIResponse = await response.json()
|
||||||
|
const page = apiDetails.query.pages[Object.keys(apiDetails.query.pages)[0]]
|
||||||
|
if (page.images) {
|
||||||
|
for (const image of page.images) {
|
||||||
|
await downloadImage(image.title, outputFolder, baseUrl)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.log(
|
||||||
|
"\x1b[31m%s\x1b[0m",
|
||||||
|
`URL ${url} doesn't seem to contain any images! Skipping...`
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
console.log(
|
console.log(
|
||||||
|
@ -154,6 +194,12 @@ async function downloadImage(filename: string, outputFolder: string, baseUrl: st
|
||||||
const apiDetails: ImageQueryAPIResponse = await response.json()
|
const apiDetails: ImageQueryAPIResponse = await response.json()
|
||||||
const missingPage = apiDetails.query.pages["-1"]
|
const missingPage = apiDetails.query.pages["-1"]
|
||||||
|
|
||||||
|
// Check if the local file already exists, if it does, skip it
|
||||||
|
if (existsSync(`${outputFolder}/${filename}`)) {
|
||||||
|
console.log(`\x1b[33m%s\x1b[0m`, `${filename} already exists, skipping...`)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
// Check if the file exists, locally or externally
|
// Check if the file exists, locally or externally
|
||||||
if (missingPage !== undefined) {
|
if (missingPage !== undefined) {
|
||||||
// Image does not exist locally, check if it exists externally
|
// Image does not exist locally, check if it exists externally
|
||||||
|
@ -271,8 +317,8 @@ async function downloadImage(filename: string, outputFolder: string, baseUrl: st
|
||||||
// Save the license information
|
// Save the license information
|
||||||
const licenseInfo: SmallLicense = {
|
const licenseInfo: SmallLicense = {
|
||||||
path: cleanFileName,
|
path: cleanFileName,
|
||||||
license: licenseMapping[license] || license,
|
license: licenseMapping[license] || license.replace("CC BY", "CC-BY"),
|
||||||
authors: [author],
|
authors: [removeLinks(author)],
|
||||||
sources: [wikiUrl],
|
sources: [wikiUrl],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -293,4 +339,9 @@ async function downloadImage(filename: string, outputFolder: string, baseUrl: st
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function removeLinks(text: string): string {
|
||||||
|
// Remove <a> tags
|
||||||
|
return text.replace(/<a.*?>(.*?)<\/a>/g, "$1")
|
||||||
|
}
|
||||||
|
|
||||||
main(process.argv.slice(2))
|
main(process.argv.slice(2))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue