Fix: canonicalize wikimedia links, see #2367, fix attribution

This commit is contained in:
Pieter Vander Vennet 2025-04-28 00:53:23 +02:00
parent af2636bfaa
commit 518a426805

View file

@ -25,12 +25,36 @@ export class WikimediaImageProvider extends ImageProvider {
super()
}
private static ExtractFileName(url: string) {
/**
* Replaces (multiple) spaces to underscores.
* Will remove a "File:"-prefix
*
* WikimediaImageProvider.makeCanonical("Some File.jpg") // => "Some_File.jpg"
*
* // Double spaces
* WikimediaImageProvider.makeCanonical("Some File.jpg") // => "Some_File.jpg"
* WikimediaImageProvider.makeCanonical("Some+File.jpg") // => "Some+File.jpg"
*
* // Remove File: prefix
*/
private static makeCanonical(filename: string): string {
if (filename.startsWith("File:")) {
filename = filename.substring(5)
}
return filename.trim().replace(/\s+/g, "_")
}
/**
*
* WikimediaImageProvider.extractFileName("https://commons.wikimedia.org/wiki/File:Somefile.jpg") // => "Somefile.jpg"
* WikimediaImageProvider.extractFileName("https://commons.wikimedia.org/wiki/File:S%C3%A8vres%20-%20square_madame_de_Pompadour_-_bo%C3%AEte_%C3%A0_livres.jpg?uselang=en") // => "Sèvres_-_square_madame_de_Pompadour_-_boîte_à_livres.jpg"
*/
private static extractFileName(url: string) {
if (!url.startsWith("http")) {
return url
}
const path = new URL(url).pathname
return path.substring(path.lastIndexOf("/") + 1)
const path = decodeURIComponent(new URL(url).pathname)
return WikimediaImageProvider.makeCanonical(path.substring(path.lastIndexOf("/") + 1))
}
private static PrepareUrl(value: string, useHd = false): string {
@ -98,6 +122,15 @@ export class WikimediaImageProvider extends ImageProvider {
return this.UrlForImage("File:" + value)
}
/**
*
* @param key
* @param value
* @constructor
*
* const result = await WikimediaImageProvider.singleton.ExtractUrls("wikimedia_commons", "File:Sèvres_-_square_madame_de_Pompadour_-_boîte_à_livres.jpg")
* result[0].url_hd // => "https://commons.wikimedia.org/wiki/Special:FilePath/File%3AS%C3%A8vres_-_square_madame_de_Pompadour_-_bo%C3%AEte_%C3%A0_livres.jpg"
*/
public async ExtractUrls(key: string, value: string): undefined | Promise<ProvidedImage[]> {
const hasCommonsPrefix = WikimediaImageProvider.startsWithCommonsPrefix(value)
if (key !== undefined && key !== this.commons_key && !hasCommonsPrefix) {
@ -123,7 +156,8 @@ export class WikimediaImageProvider extends ImageProvider {
}
public async DownloadAttribution(img: { url: string }): Promise<LicenseInfo> {
const filename = WikimediaImageProvider.ExtractFileName(img.url)
const filename = "File:" + WikimediaImageProvider.extractFileName(img.url)
console.log("Downloading attribution for", filename, img.url)
if (filename === "") {
return undefined
}
@ -145,23 +179,19 @@ export class WikimediaImageProvider extends ImageProvider {
pageInfo = pages.at(-1)
}
if (pageInfo === undefined) {
console.warn("No attribution found for wikimedia image:", filename)
return undefined
}
const license = (pageInfo.imageinfo ?? [])[0]?.extmetadata
if (license === undefined) {
console.warn(
"The file",
filename,
"has no usable metedata or license attached... Please fix the license info file yourself!"
"The file", filename, "has no usable metedata or license attached... Please fix the license info file yourself!"
)
return undefined
}
let title = pageInfo.title
if (title.startsWith("File:")) {
title = title.substr("File:".length)
}
let title = WikimediaImageProvider.makeCanonical(pageInfo.title)
if (title.endsWith(".jpg") || title.endsWith(".png")) {
title = title.substring(0, title.length - 4)
}
@ -180,9 +210,7 @@ export class WikimediaImageProvider extends ImageProvider {
}
private UrlForImage(image: string): ProvidedImage {
if (!image.startsWith("File:")) {
image = "File:" + image
}
image = "File:" + WikimediaImageProvider.makeCanonical(image)
return {
url: WikimediaImageProvider.PrepareUrl(image),
url_hd: WikimediaImageProvider.PrepareUrl(image, true),