Add support for other wikis, external files and more
- templates - coloured logging
This commit is contained in:
parent
4b5f01c74f
commit
e0447c3428
1 changed files with 131 additions and 62 deletions
|
@ -28,26 +28,27 @@ interface ImageQueryAPIResponse {
|
||||||
ns: number
|
ns: number
|
||||||
title: string
|
title: string
|
||||||
imagerepository: string
|
imagerepository: string
|
||||||
imageinfo: {
|
imageinfo?: {
|
||||||
|
user: string
|
||||||
url: string
|
url: string
|
||||||
descriptionurl: string
|
descriptionurl: string
|
||||||
descriptionshorturl: string
|
descriptionshorturl: string
|
||||||
extmetadata: {
|
extmetadata?: {
|
||||||
DateTime: ExtMetadataProp
|
DateTime: ExtMetadataProp
|
||||||
ObjectName: ExtMetadataProp
|
ObjectName: ExtMetadataProp
|
||||||
CommonsMetadataExtension: ExtMetadataProp
|
CommonsMetadataExtension?: ExtMetadataProp
|
||||||
Categories: ExtMetadataProp
|
Categories?: ExtMetadataProp
|
||||||
Assessments: ExtMetadataProp
|
Assessments?: ExtMetadataProp
|
||||||
ImageDescription: ExtMetadataProp
|
ImageDescription?: ExtMetadataProp
|
||||||
DateTimeOriginal: ExtMetadataProp
|
DateTimeOriginal?: ExtMetadataProp
|
||||||
Credit: ExtMetadataProp
|
Credit?: ExtMetadataProp
|
||||||
Artist: ExtMetadataProp
|
Artist?: ExtMetadataProp
|
||||||
LicenseShortName: ExtMetadataProp
|
LicenseShortName?: ExtMetadataProp
|
||||||
UsageTerms: ExtMetadataProp
|
UsageTerms?: ExtMetadataProp
|
||||||
AttributionRequired: ExtMetadataProp
|
AttributionRequired?: ExtMetadataProp
|
||||||
Copyrighted: ExtMetadataProp
|
Copyrighted?: ExtMetadataProp
|
||||||
Restrictions: ExtMetadataProp
|
Restrictions?: ExtMetadataProp
|
||||||
License: ExtMetadataProp
|
License?: ExtMetadataProp
|
||||||
}
|
}
|
||||||
}[]
|
}[]
|
||||||
}
|
}
|
||||||
|
@ -68,9 +69,35 @@ interface CategoryQueryAPIResponse {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface TemplateQueryAPIResponse {
|
||||||
|
batchcomplete: string
|
||||||
|
query: {
|
||||||
|
normalized?: {
|
||||||
|
from: string
|
||||||
|
to: string
|
||||||
|
}[]
|
||||||
|
pages: {
|
||||||
|
[key: string]: {
|
||||||
|
pageid: number
|
||||||
|
ns: number
|
||||||
|
title: string
|
||||||
|
templates?: {
|
||||||
|
ns: number
|
||||||
|
title: string
|
||||||
|
}[]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Map license names of Wikimedia Commons to different names
|
// Map license names of Wikimedia Commons to different names
|
||||||
const licenseMapping = {}
|
const licenseMapping = {}
|
||||||
|
|
||||||
|
// Map template names to license names
|
||||||
|
const templateMapping = {
|
||||||
|
"Template:PD": "Public Domain",
|
||||||
|
}
|
||||||
|
|
||||||
async function main(args: string[]) {
|
async function main(args: string[]) {
|
||||||
if (args.length < 2) {
|
if (args.length < 2) {
|
||||||
console.log("Usage: downloadCommons.ts <output folder> <url> <?url> <?url> .. ")
|
console.log("Usage: downloadCommons.ts <output folder> <url> <?url> <?url> .. ")
|
||||||
|
@ -83,74 +110,116 @@ async function main(args: string[]) {
|
||||||
const commonsFileName = url.split("/").pop().split("?").shift()
|
const commonsFileName = url.split("/").pop().split("?").shift()
|
||||||
console.log(`Processing ${commonsFileName}...`)
|
console.log(`Processing ${commonsFileName}...`)
|
||||||
|
|
||||||
|
const baseUrl = url.split("/").slice(0, 3).join("/")
|
||||||
|
|
||||||
// Check if it is a file or a category
|
// Check if it is a file or a category
|
||||||
if (url.includes("Category:")) {
|
if (url.includes("Category:")) {
|
||||||
// Download all files in the category
|
// Download all files in the category
|
||||||
const apiUrl = `https://commons.wikimedia.org/w/api.php?action=query&format=json&list=categorymembers&cmtitle=${commonsFileName}&cmlimit=250&cmtype=file`
|
const apiUrl = `${baseUrl}/w/api.php?action=query&format=json&list=categorymembers&cmtitle=${commonsFileName}&cmlimit=250&cmtype=file`
|
||||||
const response = await fetch(apiUrl)
|
const response = await fetch(apiUrl)
|
||||||
const apiDetails: CategoryQueryAPIResponse = await response.json()
|
const apiDetails: CategoryQueryAPIResponse = await response.json()
|
||||||
for (const member of apiDetails.query.categorymembers) {
|
for (const member of apiDetails.query.categorymembers) {
|
||||||
await downloadImage(member.title, outputFolder)
|
await downloadImage(member.title, outputFolder, baseUrl)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
await downloadImage(commonsFileName, outputFolder)
|
await downloadImage(commonsFileName, outputFolder, baseUrl)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function downloadImage(filename: string, outputFolder: string) {
|
async function downloadImage(filename: string, outputFolder: string, baseUrl: string) {
|
||||||
const apiUrl = `https://commons.wikimedia.org/w/api.php?action=query&format=json&prop=imageinfo&iiprop=url|extmetadata&titles=${filename}`
|
const apiUrl = `${baseUrl}/w/api.php?action=query&format=json&prop=imageinfo&iiprop=url|extmetadata|user&iimetadataversion=latest&titles=${filename}`
|
||||||
const response = await fetch(apiUrl)
|
const response = await fetch(apiUrl)
|
||||||
const apiDetails: ImageQueryAPIResponse = await response.json()
|
const apiDetails: ImageQueryAPIResponse = await response.json()
|
||||||
|
|
||||||
// Harvest useful information
|
// Check if the file exists, locally or externally
|
||||||
const wikiPage = apiDetails.query.pages[Object.keys(apiDetails.query.pages)[0]]
|
if (apiDetails.query.pages["-1"]) {
|
||||||
const wikiUrl = wikiPage.imageinfo[0].descriptionurl
|
// Image does not exist locally, check if it exists externally
|
||||||
const fileUrl = wikiPage.imageinfo[0].url
|
if (apiDetails.query.pages["-1"].imagerepository !== "local" && apiDetails.query.pages["-1"].imagerepository !== "") {
|
||||||
const author = wikiPage.imageinfo[0].extmetadata.Artist.value
|
const externalUrl = apiDetails.query.pages["-1"].imageinfo[0].descriptionurl
|
||||||
const license = wikiPage.imageinfo[0].extmetadata.LicenseShortName.value
|
const externalBase = externalUrl.split("/").slice(0, 3).join("/")
|
||||||
|
const externalFilename = externalUrl.split("/").pop().split("?").shift()
|
||||||
|
console.log(`\x1b[33m%s\x1b[0m`, `${filename} is external, re-running with ${externalUrl}...`)
|
||||||
|
await downloadImage(externalFilename, outputFolder, externalBase)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
console.log(`\x1b[33m%s\x1b[0m`, `${filename} does not exist!, skipping...`)
|
||||||
|
} else {
|
||||||
|
// Harvest useful information
|
||||||
|
const wikiPage = apiDetails.query.pages[Object.keys(apiDetails.query.pages)[0]]
|
||||||
|
const wikiUrl = wikiPage.imageinfo[0].descriptionurl
|
||||||
|
const fileUrl = wikiPage.imageinfo[0].url
|
||||||
|
const author = wikiPage.imageinfo[0].extmetadata?.Artist?.value || wikiPage.imageinfo[0].user
|
||||||
|
let license = wikiPage.imageinfo[0].extmetadata?.LicenseShortName?.value || null
|
||||||
|
|
||||||
// Check if the output folder exists
|
// Check if the output folder exists
|
||||||
if (!existsSync(outputFolder)) {
|
if (!existsSync(outputFolder)) {
|
||||||
const parts = outputFolder.split("/")
|
const parts = outputFolder.split("/")
|
||||||
for (var i = 0; i < parts.length; i++) {
|
for (var i = 0; i < parts.length; i++) {
|
||||||
const part = parts.slice(0, i + 1).join("/")
|
const part = parts.slice(0, i + 1).join("/")
|
||||||
if (!existsSync(part)) {
|
if (!existsSync(part)) {
|
||||||
console.log(`Creating folder ${part}`)
|
console.log(`Creating folder ${part}`)
|
||||||
mkdirSync(part)
|
mkdirSync(part)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Download the file and save it
|
// Check if the license is present
|
||||||
const cleanFileName = unescape(filename).replace("File:", "")
|
if (!license) {
|
||||||
console.log(
|
console.log(`${filename} does not have a license, falling back to checking template...`)
|
||||||
`Downloading ${cleanFileName} from ${fileUrl} and saving it to ${outputFolder}/${cleanFileName}...`
|
const templateUrl = `${baseUrl}/w/api.php?action=query&format=json&prop=templates&titles=${filename}&tllimit=500`
|
||||||
)
|
const templateResponse = await fetch(templateUrl)
|
||||||
const fileResponse = await fetch(fileUrl)
|
const templateDetails: TemplateQueryAPIResponse = await templateResponse.json()
|
||||||
const fileBuffer = await fileResponse.arrayBuffer()
|
|
||||||
const file = Buffer.from(fileBuffer)
|
|
||||||
const filePath = `${outputFolder}/${cleanFileName}`
|
|
||||||
writeFileSync(filePath, file)
|
|
||||||
|
|
||||||
// Save the license information
|
// Loop through all templates and check if one of them is a license
|
||||||
const licenseInfo: SmallLicense = {
|
const wikiPage = templateDetails.query.pages[Object.keys(templateDetails.query.pages)[0]]
|
||||||
path: cleanFileName,
|
if (wikiPage.templates) {
|
||||||
license: licenseMapping[license] || license,
|
for (const template of wikiPage.templates) {
|
||||||
authors: [author],
|
if (templateMapping[template.title]) {
|
||||||
sources: [wikiUrl],
|
console.log(`Found license ${templateMapping[template.title]} for ${filename}`)
|
||||||
}
|
license = templateMapping[template.title]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const licensePath = `${outputFolder}/license_info.json`
|
// If no license was found, skip the file
|
||||||
if (!existsSync(licensePath)) {
|
if (!license) {
|
||||||
// Create the file if it doesn't exist
|
// Log in yellow
|
||||||
writeFileSync(licensePath, JSON.stringify([licenseInfo], null, 2))
|
console.log(`\x1b[33m%s\x1b[0m`, `No license found for ${filename}, skipping...`)
|
||||||
} else {
|
return
|
||||||
// Append to the file if it does exist
|
}
|
||||||
const licenseFile = await readFileSync(licensePath, "utf8")
|
}
|
||||||
const licenseData = JSON.parse(licenseFile)
|
|
||||||
licenseData.push(licenseInfo)
|
// Download the file and save it
|
||||||
writeFileSync(licensePath, JSON.stringify(licenseData, null, 2))
|
const cleanFileName = unescape(filename).replace("File:", "")
|
||||||
|
console.log(
|
||||||
|
`Downloading ${cleanFileName} from ${fileUrl} and saving it to ${outputFolder}/${cleanFileName}...`
|
||||||
|
)
|
||||||
|
const fileResponse = await fetch(fileUrl)
|
||||||
|
const fileBuffer = await fileResponse.arrayBuffer()
|
||||||
|
const file = Buffer.from(fileBuffer)
|
||||||
|
const filePath = `${outputFolder}/${cleanFileName}`
|
||||||
|
writeFileSync(filePath, file)
|
||||||
|
|
||||||
|
// Save the license information
|
||||||
|
const licenseInfo: SmallLicense = {
|
||||||
|
path: cleanFileName,
|
||||||
|
license: licenseMapping[license] || license,
|
||||||
|
authors: [author],
|
||||||
|
sources: [wikiUrl],
|
||||||
|
}
|
||||||
|
|
||||||
|
const licensePath = `${outputFolder}/license_info.json`
|
||||||
|
if (!existsSync(licensePath)) {
|
||||||
|
// Create the file if it doesn't exist
|
||||||
|
writeFileSync(licensePath, JSON.stringify([licenseInfo], null, 2))
|
||||||
|
} else {
|
||||||
|
// Append to the file if it does exist
|
||||||
|
const licenseFile = await readFileSync(licensePath, "utf8")
|
||||||
|
const licenseData = JSON.parse(licenseFile)
|
||||||
|
licenseData.push(licenseInfo)
|
||||||
|
writeFileSync(licensePath, JSON.stringify(licenseData, null, 2))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue