forked from MapComplete/MapComplete
Slighty rework commons download script
This commit is contained in:
parent
24013c65e8
commit
c6e70da598
1 changed files with 148 additions and 81 deletions
|
|
@ -101,13 +101,17 @@ const templateMapping = {
|
||||||
async function main(args: string[]) {
|
async function main(args: string[]) {
|
||||||
if (args.length < 2) {
|
if (args.length < 2) {
|
||||||
console.log("Usage: downloadCommons.ts <output folder> <url> <?url> <?url> .. ")
|
console.log("Usage: downloadCommons.ts <output folder> <url> <?url> <?url> .. ")
|
||||||
return
|
process.exit(1)
|
||||||
}
|
}
|
||||||
const [outputFolder, ...urls] = args
|
const [outputFolder, ...urls] = args
|
||||||
|
|
||||||
for (const url of urls) {
|
for (const url of urls) {
|
||||||
// Download details from the API
|
// Download details from the API
|
||||||
const commonsFileName = url.split("/").pop().split("?").shift()
|
const commonsFileNamePath = url.split("/").pop()
|
||||||
|
if (commonsFileNamePath !== undefined) {
|
||||||
|
const commonsFileName = commonsFileNamePath.split("?").shift()
|
||||||
|
|
||||||
|
if (commonsFileName !== undefined) {
|
||||||
console.log(`Processing ${commonsFileName}...`)
|
console.log(`Processing ${commonsFileName}...`)
|
||||||
|
|
||||||
const baseUrl = url.split("/").slice(0, 3).join("/")
|
const baseUrl = url.split("/").slice(0, 3).join("/")
|
||||||
|
|
@ -124,6 +128,20 @@ async function main(args: string[]) {
|
||||||
} else {
|
} else {
|
||||||
await downloadImage(commonsFileName, outputFolder, baseUrl)
|
await downloadImage(commonsFileName, outputFolder, baseUrl)
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
console.log(
|
||||||
|
"\x1b[31m%s\x1b[0m",
|
||||||
|
`URL ${url} doesn't seem to contain a filename or category! Skipping...`
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.log(
|
||||||
|
"\x1b[31m%s\x1b[0m",
|
||||||
|
`URL ${url} doesn't seem to be a valid URL! Skipping...`
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -131,25 +149,63 @@ async function downloadImage(filename: string, outputFolder: string, baseUrl: st
|
||||||
const apiUrl = `${baseUrl}/w/api.php?action=query&format=json&prop=imageinfo&iiprop=url|extmetadata|user&iimetadataversion=latest&titles=${filename}`
|
const apiUrl = `${baseUrl}/w/api.php?action=query&format=json&prop=imageinfo&iiprop=url|extmetadata|user&iimetadataversion=latest&titles=${filename}`
|
||||||
const response = await fetch(apiUrl)
|
const response = await fetch(apiUrl)
|
||||||
const apiDetails: ImageQueryAPIResponse = await response.json()
|
const apiDetails: ImageQueryAPIResponse = await response.json()
|
||||||
|
const missingPage = apiDetails.query.pages["-1"]
|
||||||
|
|
||||||
// Check if the file exists, locally or externally
|
// Check if the file exists, locally or externally
|
||||||
if (apiDetails.query.pages["-1"]) {
|
if (missingPage !== undefined) {
|
||||||
// Image does not exist locally, check if it exists externally
|
// Image does not exist locally, check if it exists externally
|
||||||
if (apiDetails.query.pages["-1"].imagerepository !== "local" && apiDetails.query.pages["-1"].imagerepository !== "") {
|
if (
|
||||||
const externalUrl = apiDetails.query.pages["-1"].imageinfo[0].descriptionurl
|
apiDetails.query.pages["-1"].imagerepository !== "local" &&
|
||||||
|
apiDetails.query.pages["-1"].imagerepository !== ""
|
||||||
|
) {
|
||||||
|
// Check if we actually have image info
|
||||||
|
if (missingPage.imageinfo?.length !== undefined && missingPage.imageinfo.length > 0) {
|
||||||
|
const externalUrl = missingPage.imageinfo[0].descriptionurl
|
||||||
const externalBase = externalUrl.split("/").slice(0, 3).join("/")
|
const externalBase = externalUrl.split("/").slice(0, 3).join("/")
|
||||||
const externalFilename = externalUrl.split("/").pop().split("?").shift()
|
|
||||||
console.log(`\x1b[33m%s\x1b[0m`, `${filename} is external, re-running with ${externalUrl}...`)
|
const externalFilenamePath = externalUrl.split("/").pop()
|
||||||
|
if (externalFilenamePath !== undefined) {
|
||||||
|
const externalFilename = externalFilenamePath.split("?").shift()
|
||||||
|
console.log(
|
||||||
|
`\x1b[33m%s\x1b[0m`,
|
||||||
|
`${filename} is external, re-running with ${externalUrl}...`
|
||||||
|
)
|
||||||
|
if (externalFilename !== undefined) {
|
||||||
await downloadImage(externalFilename, outputFolder, externalBase)
|
await downloadImage(externalFilename, outputFolder, externalBase)
|
||||||
return
|
return
|
||||||
|
} else {
|
||||||
|
// Edge case
|
||||||
|
console.log(
|
||||||
|
`\x1b[33m%s\x1b[0m`,
|
||||||
|
`External URL ${externalUrl} doesn't seem to contain a filename or category! Skipping...`
|
||||||
|
)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Edge case
|
||||||
|
console.log(
|
||||||
|
`\x1b[33m%s\x1b[0m`,
|
||||||
|
`External URL ${externalUrl} doesn't seem to be a valid URL! Skipping...`
|
||||||
|
)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.log(
|
||||||
|
`\x1b[33m%s\x1b[0m`,
|
||||||
|
`${filename} does not have image info!, skipping...`
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
console.log(`\x1b[33m%s\x1b[0m`, `${filename} does not exist!, skipping...`)
|
console.log(`\x1b[33m%s\x1b[0m`, `${filename} does not exist!, skipping...`)
|
||||||
} else {
|
} else {
|
||||||
// Harvest useful information
|
// Harvest useful information
|
||||||
const wikiPage = apiDetails.query.pages[Object.keys(apiDetails.query.pages)[0]]
|
const wikiPage = apiDetails.query.pages[Object.keys(apiDetails.query.pages)[0]]
|
||||||
|
|
||||||
|
// Check if we actually have image info
|
||||||
|
if (wikiPage.imageinfo?.length !== undefined && wikiPage.imageinfo.length > 0) {
|
||||||
const wikiUrl = wikiPage.imageinfo[0].descriptionurl
|
const wikiUrl = wikiPage.imageinfo[0].descriptionurl
|
||||||
const fileUrl = wikiPage.imageinfo[0].url
|
const fileUrl = wikiPage.imageinfo[0].url
|
||||||
const author = wikiPage.imageinfo[0].extmetadata?.Artist?.value || wikiPage.imageinfo[0].user
|
const author =
|
||||||
|
wikiPage.imageinfo[0].extmetadata?.Artist?.value || wikiPage.imageinfo[0].user
|
||||||
let license = wikiPage.imageinfo[0].extmetadata?.LicenseShortName?.value || null
|
let license = wikiPage.imageinfo[0].extmetadata?.LicenseShortName?.value || null
|
||||||
|
|
||||||
// Check if the output folder exists
|
// Check if the output folder exists
|
||||||
|
|
@ -166,17 +222,22 @@ async function downloadImage(filename: string, outputFolder: string, baseUrl: st
|
||||||
|
|
||||||
// Check if the license is present
|
// Check if the license is present
|
||||||
if (!license) {
|
if (!license) {
|
||||||
console.log(`${filename} does not have a license, falling back to checking template...`)
|
console.log(
|
||||||
|
`${filename} does not have a license, falling back to checking template...`
|
||||||
|
)
|
||||||
const templateUrl = `${baseUrl}/w/api.php?action=query&format=json&prop=templates&titles=${filename}&tllimit=500`
|
const templateUrl = `${baseUrl}/w/api.php?action=query&format=json&prop=templates&titles=${filename}&tllimit=500`
|
||||||
const templateResponse = await fetch(templateUrl)
|
const templateResponse = await fetch(templateUrl)
|
||||||
const templateDetails: TemplateQueryAPIResponse = await templateResponse.json()
|
const templateDetails: TemplateQueryAPIResponse = await templateResponse.json()
|
||||||
|
|
||||||
// Loop through all templates and check if one of them is a license
|
// Loop through all templates and check if one of them is a license
|
||||||
const wikiPage = templateDetails.query.pages[Object.keys(templateDetails.query.pages)[0]]
|
const wikiPage =
|
||||||
|
templateDetails.query.pages[Object.keys(templateDetails.query.pages)[0]]
|
||||||
if (wikiPage.templates) {
|
if (wikiPage.templates) {
|
||||||
for (const template of wikiPage.templates) {
|
for (const template of wikiPage.templates) {
|
||||||
if (templateMapping[template.title]) {
|
if (templateMapping[template.title]) {
|
||||||
console.log(`Found license ${templateMapping[template.title]} for ${filename}`)
|
console.log(
|
||||||
|
`Found license ${templateMapping[template.title]} for ${filename}`
|
||||||
|
)
|
||||||
license = templateMapping[template.title]
|
license = templateMapping[template.title]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -185,7 +246,10 @@ async function downloadImage(filename: string, outputFolder: string, baseUrl: st
|
||||||
// If no license was found, skip the file
|
// If no license was found, skip the file
|
||||||
if (!license) {
|
if (!license) {
|
||||||
// Log in yellow
|
// Log in yellow
|
||||||
console.log(`\x1b[33m%s\x1b[0m`, `No license found for ${filename}, skipping...`)
|
console.log(
|
||||||
|
`\x1b[33m%s\x1b[0m`,
|
||||||
|
`No license found for ${filename}, skipping...`
|
||||||
|
)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -220,6 +284,9 @@ async function downloadImage(filename: string, outputFolder: string, baseUrl: st
|
||||||
licenseData.push(licenseInfo)
|
licenseData.push(licenseInfo)
|
||||||
writeFileSync(licensePath, JSON.stringify(licenseData, null, 2))
|
writeFileSync(licensePath, JSON.stringify(licenseData, null, 2))
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
console.log(`\x1b[33m%s\x1b[0m`, `${filename} does not have image info!, skipping...`)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue