Refactoring of image detection, fix loading wikimedia images

This commit is contained in:
Pieter Vander Vennet 2021-09-29 23:56:59 +02:00
parent 4da6070b28
commit a6e8714ae0
21 changed files with 468 additions and 528 deletions

View file

@ -1,9 +1,59 @@
import {Mapillary} from "./Mapillary";
import {Wikimedia} from "./Wikimedia";
import {WikimediaImageProvider} from "./WikimediaImageProvider";
import {Imgur} from "./Imgur";
import GenericImageProvider from "./GenericImageProvider";
import {UIEventSource} from "../UIEventSource";
import ImageProvider, {ProvidedImage} from "./ImageProvider";
import {WikidataImageProvider} from "./WikidataImageProvider";
import {Utils} from "../../Utils";
/**
* A generic 'from the interwebz' image picker, without attribution
*/
export default class AllImageProviders {
public static ImageAttributionSource = [Imgur.singleton, Mapillary.singleton, Wikimedia.singleton]
public static ImageAttributionSource: ImageProvider[] = [
Imgur.singleton,
Mapillary.singleton,
WikidataImageProvider.singleton,
WikimediaImageProvider.singleton,
new GenericImageProvider(Imgur.defaultValuePrefix)]
private static _cache: Map<string, UIEventSource<ProvidedImage[]>> = new Map<string, UIEventSource<ProvidedImage[]>>()
public static LoadImagesFor(tags: UIEventSource<any>, imagePrefix: string, loadSpecialSource: boolean): UIEventSource<ProvidedImage[]> {
const id = tags.data.id
if (id === undefined) {
return undefined;
}
const cached = this._cache.get(tags.data.id)
if (cached !== undefined) {
return cached
}
const source = new UIEventSource([])
this._cache.set(id, source)
const allSources = []
for (const imageProvider of AllImageProviders.ImageAttributionSource) {
const singleSource = imageProvider.GetRelevantUrls(tags)
allSources.push(singleSource)
singleSource.addCallbackAndRunD(_ => {
const all : ProvidedImage[] = [].concat(...allSources.map(source => source.data))
const uniq = []
const seen = new Set<string>()
for (const img of all) {
if(seen.has(img.url)){
continue
}
seen.add(img.url)
uniq.push(img)
}
source.setData(uniq)
})
}
return source;
}
}

View file

@ -0,0 +1,36 @@
import ImageProvider, {ProvidedImage} from "./ImageProvider";
export default class GenericImageProvider extends ImageProvider {
public defaultKeyPrefixes: string[] = ["image"];
private readonly _valuePrefixBlacklist: string[];
public constructor(valuePrefixBlacklist: string[]) {
super();
this._valuePrefixBlacklist = valuePrefixBlacklist;
}
protected DownloadAttribution(url: string) {
return undefined
}
async ExtractUrls(key: string, value: string): Promise<Promise<ProvidedImage>[]> {
if (this._valuePrefixBlacklist.some(prefix => value.startsWith(prefix))) {
return []
}
return [Promise.resolve({
key: key,
url: value,
provider: this
})]
}
SourceIcon(backlinkSource?: string) {
return undefined;
}
}

View file

@ -1,33 +0,0 @@
import {UIEventSource} from "../UIEventSource";
import {LicenseInfo} from "./Wikimedia";
import BaseUIElement from "../../UI/BaseUIElement";
export default abstract class ImageAttributionSource {
private _cache = new Map<string, UIEventSource<LicenseInfo>>()
GetAttributionFor(url: string): UIEventSource<LicenseInfo> {
const cached = this._cache.get(url);
if (cached !== undefined) {
return cached;
}
const src = new UIEventSource(undefined)
this._cache.set(url, src)
this.DownloadAttribution(url).then(license =>
src.setData(license))
.catch(e => console.error("Could not download license information for ", url, " due to", e))
return src;
}
public abstract SourceIcon(backlinkSource?: string): BaseUIElement;
/*Converts a value to a URL. Can return null if not applicable*/
public PrepareUrl(value: string): string | UIEventSource<string> {
return value;
}
protected abstract DownloadAttribution(url: string): Promise<LicenseInfo>;
}

View file

@ -0,0 +1,64 @@
import {UIEventSource} from "../UIEventSource";
import BaseUIElement from "../../UI/BaseUIElement";
import {LicenseInfo} from "./LicenseInfo";
export interface ProvidedImage {
url: string, key: string, provider: ImageProvider
}
export default abstract class ImageProvider {
protected abstract readonly defaultKeyPrefixes : string[]
private _cache = new Map<string, UIEventSource<LicenseInfo>>()
GetAttributionFor(url: string): UIEventSource<LicenseInfo> {
const cached = this._cache.get(url);
if (cached !== undefined) {
return cached;
}
const src =UIEventSource.FromPromise(this.DownloadAttribution(url))
this._cache.set(url, src)
return src;
}
public abstract SourceIcon(backlinkSource?: string): BaseUIElement;
protected abstract DownloadAttribution(url: string): Promise<LicenseInfo>;
/**
* Given a properies object, maps it onto _all_ the available pictures for this imageProvider
*/
public GetRelevantUrls(allTags: UIEventSource<any>, options?: {
prefixes?: string[]
}):UIEventSource<ProvidedImage[]> {
const prefixes = options?.prefixes ?? this.defaultKeyPrefixes
const relevantUrls = new UIEventSource<{ url: string; key: string; provider: ImageProvider }[]>([])
const seenValues = new Set<string>()
allTags.addCallbackAndRunD(tags => {
for (const key in tags) {
if(!prefixes.some(prefix => key.startsWith(prefix))){
continue
}
const value = tags[key]
if(seenValues.has(value)){
continue
}
seenValues.add(value)
this.ExtractUrls(key, value).then(promises => {
for (const promise of promises) {
promise.then(providedImage => {
relevantUrls.data.push(providedImage)
relevantUrls.ping()
})
}
})
}
})
return relevantUrls
}
public abstract ExtractUrls(key: string, value: string) : Promise<Promise<ProvidedImage>[]>;
}

View file

@ -1,12 +1,14 @@
// @ts-ignore
import $ from "jquery"
import {LicenseInfo} from "./Wikimedia";
import ImageAttributionSource from "./ImageAttributionSource";
import ImageProvider, {ProvidedImage} from "./ImageProvider";
import BaseUIElement from "../../UI/BaseUIElement";
import {Utils} from "../../Utils";
import Constants from "../../Models/Constants";
import {LicenseInfo} from "./LicenseInfo";
export class Imgur extends ImageAttributionSource {
export class Imgur extends ImageProvider {
public static readonly defaultValuePrefix = ["https://i.imgur.com"]
public readonly defaultKeyPrefixes: string[] = ["image"];
public static readonly singleton = new Imgur();
@ -87,7 +89,7 @@ export class Imgur extends ImageAttributionSource {
return undefined;
}
protected async DownloadAttribution(url: string): Promise<LicenseInfo> {
protected DownloadAttribution: (url: string) => Promise<LicenseInfo> = async (url: string) => {
const hash = url.substr("https://i.imgur.com/".length).split(".jpg")[0];
const apiUrl = 'https://api.imgur.com/3/image/' + hash;
@ -110,5 +112,16 @@ export class Imgur extends ImageAttributionSource {
return licenseInfo
}
public async ExtractUrls(key: string, value: string): Promise<Promise<ProvidedImage>[]> {
if (Imgur.defaultValuePrefix.some(prefix => value.startsWith(prefix))) {
return [Promise.resolve({
url: value,
key: key,
provider: this
})]
}
return []
}
}

View file

@ -0,0 +1,10 @@
export class LicenseInfo {
artist: string = "";
license: string = "";
licenseShortName: string = "";
usageTerms: string = "";
attributionRequired: boolean = false;
copyrighted: boolean = false;
credit: string = "";
description: string = "";
}

View file

@ -1,19 +1,19 @@
import {LicenseInfo} from "./Wikimedia";
import ImageAttributionSource from "./ImageAttributionSource";
import ImageProvider, {ProvidedImage} from "./ImageProvider";
import BaseUIElement from "../../UI/BaseUIElement";
import {UIEventSource} from "../UIEventSource";
import Svg from "../../Svg";
import {Utils} from "../../Utils";
import {LicenseInfo} from "./LicenseInfo";
import Constants from "../../Models/Constants";
export class Mapillary extends ImageAttributionSource {
export class Mapillary extends ImageProvider {
defaultKeyPrefixes = ["mapillary"]
public static readonly singleton = new Mapillary();
private static readonly v4_cached_urls = new Map<string, UIEventSource<string>>();
private static readonly client_token_v3 = 'TXhLaWthQ1d4RUg0czVxaTVoRjFJZzowNDczNjUzNmIyNTQyYzI2'
private static readonly client_token_v4 = "MLY|4441509239301885|b40ad2d3ea105435bd40c7e76993ae85"
private constructor() {
super();
}
@ -56,29 +56,34 @@ export class Mapillary extends ImageAttributionSource {
return Svg.mapillary_svg();
}
PrepareUrl(value: string): string | UIEventSource<string> {
const keyV = Mapillary.ExtractKeyFromURL(value)
if (!keyV.isApiv4) {
return `https://images.mapillary.com/${keyV.key}/thumb-640.jpg?client_id=${Mapillary.client_token_v3}`
} else {
const key = keyV.key;
if (Mapillary.v4_cached_urls.has(key)) {
return Mapillary.v4_cached_urls.get(key)
}
const metadataUrl = 'https://graph.mapillary.com/' + key + '?fields=thumb_1024_url&&access_token=' + Mapillary.client_token_v4;
const source = new UIEventSource<string>(undefined)
Mapillary.v4_cached_urls.set(key, source)
Utils.downloadJson(metadataUrl).then(
json => {
console.warn("Got response on mapillary image", json, json["thumb_1024_url"])
return source.setData(json["thumb_1024_url"]);
}
)
return source
}
async ExtractUrls(key: string, value: string): Promise<Promise<ProvidedImage>[]> {
return [this.PrepareUrlAsync(key, value)]
}
private async PrepareUrlAsync(key: string, value: string): Promise<ProvidedImage> {
const keyV = Mapillary.ExtractKeyFromURL(value)
if (!keyV.isApiv4) {
const url = `https://images.mapillary.com/${keyV.key}/thumb-640.jpg?client_id=${Constants.mapillary_client_token_v3}`
return {
url: url,
provider: this,
key: key
}
} else {
const key = keyV.key;
const metadataUrl = 'https://graph.mapillary.com/' + key + '?fields=thumb_1024_url&&access_token=' + Constants.mapillary_client_token_v4;
const source = new UIEventSource<string>(undefined)
Mapillary.v4_cached_urls.set(key, source)
const response = await Utils.downloadJson(metadataUrl)
const url = <string> response["thumb_1024_url"];
return {
url: url,
provider: this,
key: key
}
}
}
protected async DownloadAttribution(url: string): Promise<LicenseInfo> {
const keyV = Mapillary.ExtractKeyFromURL(url)

View file

@ -0,0 +1,51 @@
import {Utils} from "../../Utils";
import ImageProvider, {ProvidedImage} from "./ImageProvider";
import BaseUIElement from "../../UI/BaseUIElement";
import Svg from "../../Svg";
import {WikimediaImageProvider} from "./WikimediaImageProvider";
export class WikidataImageProvider extends ImageProvider {
public SourceIcon(backlinkSource?: string): BaseUIElement {
throw Svg.wikidata_svg();
}
public static readonly singleton = new WikidataImageProvider()
public readonly defaultKeyPrefixes = ["wikidata"]
private constructor() {
super()
}
protected DownloadAttribution(url: string): Promise<any> {
throw new Error("Method not implemented; shouldn't be needed!");
}
public async ExtractUrls(key: string, value: string): Promise<Promise<ProvidedImage>[]> {
const wikidataUrl = "https://www.wikidata.org/wiki/"
if (value.startsWith(wikidataUrl)) {
value = value.substring(wikidataUrl.length)
}
if (!value.startsWith("Q")) {
value = "Q" + value
}
const url = "https://www.wikidata.org/wiki/Special:EntityData/" + value + ".json";
const response = await Utils.downloadJson(url)
const entity = response.entities[value];
const commons = entity.sitelinks.commonswiki;
// P18 is the claim 'depicted in this image'
const image = entity.claims.P18?.[0]?.mainsnak?.datavalue?.value;
const allImages = []
if (image !== undefined) {
// We found a 'File://'
const promises = await WikimediaImageProvider.singleton.ExtractUrls(key, image)
allImages.push(...promises)
}
if (commons !== undefined) {
const promises = await WikimediaImageProvider.singleton.ExtractUrls(commons, image)
allImages.push(...promises)
}
return allImages
}
}

View file

@ -1,185 +0,0 @@
import ImageAttributionSource from "./ImageAttributionSource";
import BaseUIElement from "../../UI/BaseUIElement";
import Svg from "../../Svg";
import Link from "../../UI/Base/Link";
import {Utils} from "../../Utils";
/**
* This module provides endpoints for wikipedia/wikimedia and others
*/
export class Wikimedia extends ImageAttributionSource {
public static readonly singleton = new Wikimedia();
private constructor() {
super();
}
static ImageNameToUrl(filename: string, width: number = 500, height: number = 200): string {
filename = encodeURIComponent(filename);
return "https://commons.wikimedia.org/wiki/Special:FilePath/" + filename + "?width=" + width + "&height=" + height;
}
static GetCategoryFiles(categoryName: string, handleCategory: ((ImagesInCategory: ImagesInCategory) => void),
alreadyLoaded = 0,
continueParameter: { k: string, param: string } = undefined) {
if (categoryName === undefined || categoryName === null || categoryName === "") {
return;
}
// @ts-ignore
if (!categoryName.startsWith("Category:")) {
categoryName = "Category:" + categoryName;
}
let url = "https://commons.wikimedia.org/w/api.php?" +
"action=query&list=categorymembers&format=json&" +
"&origin=*" +
"&cmtitle=" + encodeURIComponent(categoryName);
if (continueParameter !== undefined) {
url = url + "&" + continueParameter.k + "=" + continueParameter.param;
}
const self = this;
console.log("Loading a wikimedia category: ", url)
Utils.downloadJson(url).then((response) => {
let imageOverview = new ImagesInCategory();
let members = response.query?.categorymembers;
if (members === undefined) {
members = [];
}
for (const member of members) {
imageOverview.images.push(member.title);
}
console.log("Got images! ", imageOverview)
if (response.continue === undefined) {
handleCategory(imageOverview);
return;
}
if (alreadyLoaded > 10) {
console.log(`Recursive wikimedia category load stopped for ${categoryName} - got already enough images now (${alreadyLoaded})`)
handleCategory(imageOverview)
return;
}
self.GetCategoryFiles(categoryName,
(recursiveImages) => {
recursiveImages.images.push(...imageOverview.images);
handleCategory(recursiveImages);
},
alreadyLoaded + 10,
{k: "cmcontinue", param: response.continue.cmcontinue})
});
}
static GetWikiData(id: number, handleWikidata: ((Wikidata) => void)) {
const url = "https://www.wikidata.org/wiki/Special:EntityData/Q" + id + ".json";
Utils.downloadJson(url).then(response => {
const entity = response.entities["Q" + id];
const commons = entity.sitelinks.commonswiki;
const wd = new Wikidata();
wd.commonsWiki = commons?.title;
// P18 is the claim 'depicted in this image'
const image = entity.claims.P18?.[0]?.mainsnak?.datavalue?.value;
if (image) {
wd.image = "File:" + image;
}
handleWikidata(wd);
});
}
private static ExtractFileName(url: string) {
if (!url.startsWith("http")) {
return url;
}
const path = new URL(url).pathname
return path.substring(path.lastIndexOf("/") + 1);
}
SourceIcon(backlink: string): BaseUIElement {
const img = Svg.wikimedia_commons_white_svg()
.SetStyle("width:2em;height: 2em");
if (backlink === undefined) {
return img
}
return new Link(Svg.wikimedia_commons_white_img,
`https://commons.wikimedia.org/wiki/${backlink}`, true)
}
PrepareUrl(value: string): string {
if (value.toLowerCase().startsWith("https://commons.wikimedia.org/wiki/")) {
return value;
}
return Wikimedia.ImageNameToUrl(value, 500, 400)
.replace(/'/g, '%27');
}
protected async DownloadAttribution(filename: string): Promise<LicenseInfo> {
filename = Wikimedia.ExtractFileName(filename)
if (filename === "") {
return undefined;
}
const url = "https://en.wikipedia.org/w/" +
"api.php?action=query&prop=imageinfo&iiprop=extmetadata&" +
"titles=" + filename +
"&format=json&origin=*";
const data = await Utils.downloadJson(url)
const licenseInfo = new LicenseInfo();
const license = (data.query.pages[-1].imageinfo ?? [])[0]?.extmetadata;
if (license === undefined) {
console.error("This file has no usable metedata or license attached... Please fix the license info file yourself!")
return undefined;
}
licenseInfo.artist = license.Artist?.value;
licenseInfo.license = license.License?.value;
licenseInfo.copyrighted = license.Copyrighted?.value;
licenseInfo.attributionRequired = license.AttributionRequired?.value;
licenseInfo.usageTerms = license.UsageTerms?.value;
licenseInfo.licenseShortName = license.LicenseShortName?.value;
licenseInfo.credit = license.Credit?.value;
licenseInfo.description = license.ImageDescription?.value;
return licenseInfo;
}
}
export class Wikidata {
commonsWiki: string;
image: string;
}
export class ImagesInCategory {
// Filenames of relevant images
images: string[] = [];
}
export class LicenseInfo {
artist: string = "";
license: string = "";
licenseShortName: string = "";
usageTerms: string = "";
attributionRequired: boolean = false;
copyrighted: boolean = false;
credit: string = "";
description: string = "";
}

View file

@ -0,0 +1,163 @@
import ImageProvider, {ProvidedImage} from "./ImageProvider";
import BaseUIElement from "../../UI/BaseUIElement";
import Svg from "../../Svg";
import Link from "../../UI/Base/Link";
import {Utils} from "../../Utils";
import {LicenseInfo} from "./LicenseInfo";
/**
* This module provides endpoints for wikimedia and others
*/
export class WikimediaImageProvider extends ImageProvider {
public readonly defaultKeyPrefixes = ["wikimedia_commons"]
public static readonly singleton = new WikimediaImageProvider();
private constructor() {
super();
}
/**
* Recursively walks a wikimedia commons category in order to search for (image) files
* Returns (a promise of) a list of URLS
* @param categoryName The name of the wikimedia category
* @param maxLoad: the maximum amount of images to return
* @param continueParameter: if the page indicates that more pages should be loaded, this uses a token to continue. Provided by wikimedia
*/
private static async GetImagesInCategory(categoryName: string,
maxLoad = 10,
continueParameter: string = undefined): Promise<string[]> {
if (categoryName === undefined || categoryName === null || categoryName === "") {
return [];
}
if (!categoryName.startsWith("Category:")) {
categoryName = "Category:" + categoryName;
}
let url = "https://commons.wikimedia.org/w/api.php?" +
"action=query&list=categorymembers&format=json&" +
"&origin=*" +
"&cmtitle=" + encodeURIComponent(categoryName);
if (continueParameter !== undefined) {
url = `${url}&cmcontinue=${continueParameter}`;
}
console.log("Loading a wikimedia category: ", url)
const response = await Utils.downloadJson(url)
const members = response.query?.categorymembers ?? [];
const imageOverview: string[] = members.map(member => member.title);
if (response.continue === undefined) {
// We are done crawling through the category - no continuation in sight
return imageOverview;
}
if (maxLoad - imageOverview.length <= 0) {
console.log(`Recursive wikimedia category load stopped for ${categoryName}`)
return imageOverview;
}
// We do have a continue token - let's load the next page
const recursive = await this.GetImagesInCategory(categoryName, maxLoad - imageOverview.length, response.continue.cmcontinue)
imageOverview.push(...recursive)
return imageOverview
}
private static ExtractFileName(url: string) {
if (!url.startsWith("http")) {
return url;
}
const path = new URL(url).pathname
return path.substring(path.lastIndexOf("/") + 1);
}
SourceIcon(backlink: string): BaseUIElement {
const img = Svg.wikimedia_commons_white_svg()
.SetStyle("width:2em;height: 2em");
if (backlink === undefined) {
return img
}
return new Link(Svg.wikimedia_commons_white_img,
`https://commons.wikimedia.org/wiki/${backlink}`, true)
}
private PrepareUrl(value: string): string {
if (value.toLowerCase().startsWith("https://commons.wikimedia.org/wiki/")) {
return value;
}
return (`https://commons.wikimedia.org/wiki/Special:FilePath/${encodeURIComponent(value)}?width=500&height=400`)
}
protected async DownloadAttribution(filename: string): Promise<LicenseInfo> {
filename = WikimediaImageProvider.ExtractFileName(filename)
if (filename === "") {
return undefined;
}
const url = "https://en.wikipedia.org/w/" +
"api.php?action=query&prop=imageinfo&iiprop=extmetadata&" +
"titles=" + filename +
"&format=json&origin=*";
const data = await Utils.downloadJson(url)
const licenseInfo = new LicenseInfo();
const license = (data.query.pages[-1].imageinfo ?? [])[0]?.extmetadata;
if (license === undefined) {
console.error("This file has no usable metedata or license attached... Please fix the license info file yourself!")
return undefined;
}
licenseInfo.artist = license.Artist?.value;
licenseInfo.license = license.License?.value;
licenseInfo.copyrighted = license.Copyrighted?.value;
licenseInfo.attributionRequired = license.AttributionRequired?.value;
licenseInfo.usageTerms = license.UsageTerms?.value;
licenseInfo.licenseShortName = license.LicenseShortName?.value;
licenseInfo.credit = license.Credit?.value;
licenseInfo.description = license.ImageDescription?.value;
return licenseInfo;
}
private async UrlForImage(image: string): Promise<ProvidedImage>{
if(!image.startsWith("File:")){
image = "File:"+image
}
return {url: this.PrepareUrl(image), key: undefined, provider: this}
}
public async ExtractUrls(key: string, value: string): Promise<Promise<ProvidedImage>[]> {
const commonsPrefix = "https://commons.wikimedia.org/wiki/"
if(value.startsWith(commonsPrefix)){
value = value.substring(commonsPrefix.length)
} else if(value.startsWith("https://upload.wikimedia.org")){
const result : ProvidedImage = {
key: undefined,
url: value,
provider: this
}
return [Promise.resolve(result)]
}
if(value.startsWith("Category:")){
const urls = await WikimediaImageProvider.GetImagesInCategory(value)
return urls.map(image => this.UrlForImage(image))
}
if(value.startsWith("File:")){
return [this.UrlForImage(value)]
}
if(value.startsWith("http")){
// PRobably an error
return []
}
// We do a last effort and assume this is a file
return [this.UrlForImage("File:"+value)]
}
}