Fix: canonicalize wikimedia links, see #2367, fix attribution

This commit is contained in:
Pieter Vander Vennet 2025-04-28 00:53:23 +02:00
parent af2636bfaa
commit 518a426805

View file

@ -25,12 +25,36 @@ export class WikimediaImageProvider extends ImageProvider {
super() super()
} }
private static ExtractFileName(url: string) { /**
* Replaces (multiple) spaces to underscores.
* Will remove a "File:"-prefix
*
* WikimediaImageProvider.makeCanonical("Some File.jpg") // => "Some_File.jpg"
*
* // Double spaces
* WikimediaImageProvider.makeCanonical("Some File.jpg") // => "Some_File.jpg"
* WikimediaImageProvider.makeCanonical("Some+File.jpg") // => "Some+File.jpg"
*
* // Remove File: prefix
*/
private static makeCanonical(filename: string): string {
if (filename.startsWith("File:")) {
filename = filename.substring(5)
}
return filename.trim().replace(/\s+/g, "_")
}
/**
*
* WikimediaImageProvider.extractFileName("https://commons.wikimedia.org/wiki/File:Somefile.jpg") // => "Somefile.jpg"
* WikimediaImageProvider.extractFileName("https://commons.wikimedia.org/wiki/File:S%C3%A8vres%20-%20square_madame_de_Pompadour_-_bo%C3%AEte_%C3%A0_livres.jpg?uselang=en") // => "Sèvres_-_square_madame_de_Pompadour_-_boîte_à_livres.jpg"
*/
private static extractFileName(url: string) {
if (!url.startsWith("http")) { if (!url.startsWith("http")) {
return url return url
} }
const path = new URL(url).pathname const path = decodeURIComponent(new URL(url).pathname)
return path.substring(path.lastIndexOf("/") + 1) return WikimediaImageProvider.makeCanonical(path.substring(path.lastIndexOf("/") + 1))
} }
private static PrepareUrl(value: string, useHd = false): string { private static PrepareUrl(value: string, useHd = false): string {
@ -98,6 +122,15 @@ export class WikimediaImageProvider extends ImageProvider {
return this.UrlForImage("File:" + value) return this.UrlForImage("File:" + value)
} }
/**
*
* @param key
* @param value
* @constructor
*
* const result = await WikimediaImageProvider.singleton.ExtractUrls("wikimedia_commons", "File:Sèvres_-_square_madame_de_Pompadour_-_boîte_à_livres.jpg")
* result[0].url_hd // => "https://commons.wikimedia.org/wiki/Special:FilePath/File%3AS%C3%A8vres_-_square_madame_de_Pompadour_-_bo%C3%AEte_%C3%A0_livres.jpg"
*/
public async ExtractUrls(key: string, value: string): undefined | Promise<ProvidedImage[]> { public async ExtractUrls(key: string, value: string): undefined | Promise<ProvidedImage[]> {
const hasCommonsPrefix = WikimediaImageProvider.startsWithCommonsPrefix(value) const hasCommonsPrefix = WikimediaImageProvider.startsWithCommonsPrefix(value)
if (key !== undefined && key !== this.commons_key && !hasCommonsPrefix) { if (key !== undefined && key !== this.commons_key && !hasCommonsPrefix) {
@ -123,7 +156,8 @@ export class WikimediaImageProvider extends ImageProvider {
} }
public async DownloadAttribution(img: { url: string }): Promise<LicenseInfo> { public async DownloadAttribution(img: { url: string }): Promise<LicenseInfo> {
const filename = WikimediaImageProvider.ExtractFileName(img.url) const filename = "File:" + WikimediaImageProvider.extractFileName(img.url)
console.log("Downloading attribution for", filename, img.url)
if (filename === "") { if (filename === "") {
return undefined return undefined
} }
@ -145,23 +179,19 @@ export class WikimediaImageProvider extends ImageProvider {
pageInfo = pages.at(-1) pageInfo = pages.at(-1)
} }
if (pageInfo === undefined) { if (pageInfo === undefined) {
console.warn("No attribution found for wikimedia image:", filename)
return undefined return undefined
} }
const license = (pageInfo.imageinfo ?? [])[0]?.extmetadata const license = (pageInfo.imageinfo ?? [])[0]?.extmetadata
if (license === undefined) { if (license === undefined) {
console.warn( console.warn(
"The file", "The file", filename, "has no usable metedata or license attached... Please fix the license info file yourself!"
filename,
"has no usable metedata or license attached... Please fix the license info file yourself!"
) )
return undefined return undefined
} }
let title = pageInfo.title let title = WikimediaImageProvider.makeCanonical(pageInfo.title)
if (title.startsWith("File:")) {
title = title.substr("File:".length)
}
if (title.endsWith(".jpg") || title.endsWith(".png")) { if (title.endsWith(".jpg") || title.endsWith(".png")) {
title = title.substring(0, title.length - 4) title = title.substring(0, title.length - 4)
} }
@ -180,9 +210,7 @@ export class WikimediaImageProvider extends ImageProvider {
} }
private UrlForImage(image: string): ProvidedImage { private UrlForImage(image: string): ProvidedImage {
if (!image.startsWith("File:")) { image = "File:" + WikimediaImageProvider.makeCanonical(image)
image = "File:" + image
}
return { return {
url: WikimediaImageProvider.PrepareUrl(image), url: WikimediaImageProvider.PrepareUrl(image),
url_hd: WikimediaImageProvider.PrepareUrl(image, true), url_hd: WikimediaImageProvider.PrepareUrl(image, true),