Merge develop
This commit is contained in:
commit
59356545ed
68 changed files with 1670 additions and 67 deletions
|
@ -69,6 +69,30 @@ interface CategoryQueryAPIResponse {
|
|||
}
|
||||
}
|
||||
|
||||
interface ImagesQueryAPIResponse {
|
||||
continue: {
|
||||
imcontinue: string
|
||||
continue: string
|
||||
}
|
||||
query: {
|
||||
normalized?: {
|
||||
from: string
|
||||
to: string
|
||||
}[]
|
||||
pages: {
|
||||
[key: string]: {
|
||||
pageid: number
|
||||
ns: number
|
||||
title: string
|
||||
images?: {
|
||||
ns: number
|
||||
title: string
|
||||
}[]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
interface TemplateQueryAPIResponse {
|
||||
batchcomplete: string
|
||||
query: {
|
||||
|
@ -96,13 +120,14 @@ const licenseMapping = {}
|
|||
// Map template names to license names
|
||||
const templateMapping = {
|
||||
"Template:PD": "Public Domain",
|
||||
"Template:CC0": "CC0 1.0",
|
||||
}
|
||||
|
||||
async function main(args: string[]) {
|
||||
if (args.length < 2) {
|
||||
console.log("Usage: downloadCommons.ts <output folder> <url> <?url> <?url> .. ")
|
||||
console.log(
|
||||
"Example: npx vite-node downloadCommons.ts -- assets/svg https://commons.wikimedia.org/wiki/File:Example.jpg"
|
||||
"Example: npx vite-node scripts/downloadCommons.ts -- assets/svg https://commons.wikimedia.org/wiki/File:Example.jpg"
|
||||
)
|
||||
process.exit(1)
|
||||
}
|
||||
|
@ -128,8 +153,24 @@ async function main(args: string[]) {
|
|||
for (const member of apiDetails.query.categorymembers) {
|
||||
await downloadImage(member.title, outputFolder, baseUrl)
|
||||
}
|
||||
} else {
|
||||
} else if (url.includes("File:")) {
|
||||
await downloadImage(commonsFileName, outputFolder, baseUrl)
|
||||
} else {
|
||||
// Probably a page url, try to get all images from the page
|
||||
const apiUrl = `${baseUrl}/w/api.php?action=query&format=json&prop=images&titles=${commonsFileName}&imlimit=250`
|
||||
const response = await fetch(apiUrl)
|
||||
const apiDetails: ImagesQueryAPIResponse = await response.json()
|
||||
const page = apiDetails.query.pages[Object.keys(apiDetails.query.pages)[0]]
|
||||
if (page.images) {
|
||||
for (const image of page.images) {
|
||||
await downloadImage(image.title, outputFolder, baseUrl)
|
||||
}
|
||||
} else {
|
||||
console.log(
|
||||
"\x1b[31m%s\x1b[0m",
|
||||
`URL ${url} doesn't seem to contain any images! Skipping...`
|
||||
)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
console.log(
|
||||
|
@ -154,6 +195,12 @@ async function downloadImage(filename: string, outputFolder: string, baseUrl: st
|
|||
const apiDetails: ImageQueryAPIResponse = await response.json()
|
||||
const missingPage = apiDetails.query.pages["-1"]
|
||||
|
||||
// Check if the local file already exists, if it does, skip it
|
||||
if (existsSync(`${outputFolder}/${filename}`)) {
|
||||
console.log(`\x1b[33m%s\x1b[0m`, `${filename} already exists, skipping...`)
|
||||
return
|
||||
}
|
||||
|
||||
// Check if the file exists, locally or externally
|
||||
if (missingPage !== undefined) {
|
||||
// Image does not exist locally, check if it exists externally
|
||||
|
@ -271,8 +318,8 @@ async function downloadImage(filename: string, outputFolder: string, baseUrl: st
|
|||
// Save the license information
|
||||
const licenseInfo: SmallLicense = {
|
||||
path: cleanFileName,
|
||||
license: licenseMapping[license] || license,
|
||||
authors: [author],
|
||||
license: licenseMapping[license] || license.replace("CC BY", "CC-BY"),
|
||||
authors: [removeLinks(author)],
|
||||
sources: [wikiUrl],
|
||||
}
|
||||
|
||||
|
@ -293,4 +340,9 @@ async function downloadImage(filename: string, outputFolder: string, baseUrl: st
|
|||
}
|
||||
}
|
||||
|
||||
function removeLinks(text: string): string {
|
||||
// Remove <a> tags
|
||||
return text.replace(/<a.*?>(.*?)<\/a>/g, "$1")
|
||||
}
|
||||
|
||||
main(process.argv.slice(2))
|
||||
|
|
|
@ -5,7 +5,7 @@ import ScriptUtils from "./ScriptUtils"
|
|||
import Script from "./Script"
|
||||
|
||||
const knownLanguages = ["en", "nl", "de", "fr", "es", "gl", "ca"]
|
||||
|
||||
const ignoreTerms = ["searchTerms"]
|
||||
class TranslationPart {
|
||||
contents: Map<string, TranslationPart | string> = new Map<string, TranslationPart | string>()
|
||||
|
||||
|
@ -49,6 +49,7 @@ class TranslationPart {
|
|||
if (!translations.hasOwnProperty(translationsKey)) {
|
||||
continue
|
||||
}
|
||||
|
||||
const v = translations[translationsKey]
|
||||
if (typeof v != "string") {
|
||||
console.error(
|
||||
|
@ -106,6 +107,9 @@ class TranslationPart {
|
|||
if (!object.hasOwnProperty(key)) {
|
||||
continue
|
||||
}
|
||||
if (ignoreTerms.indexOf(key) >= 0) {
|
||||
continue
|
||||
}
|
||||
|
||||
if (dontTranslateKeys?.indexOf(key) >= 0) {
|
||||
continue
|
||||
|
|
|
@ -9,7 +9,7 @@ export class Server {
|
|||
handle: {
|
||||
mustMatch: string | RegExp
|
||||
mimetype: string
|
||||
handle: (path: string) => Promise<string>
|
||||
handle: (path: string, queryParams: URLSearchParams) => Promise<string>
|
||||
}[]
|
||||
) {
|
||||
handle.push({
|
||||
|
@ -89,7 +89,7 @@ export class Server {
|
|||
}
|
||||
|
||||
try {
|
||||
const result = await handler.handle(path)
|
||||
const result = await handler.handle(path, url.searchParams)
|
||||
res.writeHead(200, { "Content-Type": handler.mimetype })
|
||||
res.write(result)
|
||||
res.end()
|
||||
|
|
43
scripts/serverLdScrape.ts
Normal file
43
scripts/serverLdScrape.ts
Normal file
|
@ -0,0 +1,43 @@
|
|||
import Script from "../scripts/Script"
|
||||
import { Server } from "../scripts/server"
|
||||
import { Utils } from "../src/Utils"
|
||||
import parse from "node-html-parser"
|
||||
class ServerLdScrape extends Script {
|
||||
constructor() {
|
||||
super("Starts a server which fetches a webpage and returns embedded LD+JSON")
|
||||
}
|
||||
async main(args: string[]): Promise<void> {
|
||||
const port = Number(args[0] ?? 2346)
|
||||
new Server(port, {}, [
|
||||
{
|
||||
mustMatch: "extractgraph",
|
||||
mimetype: "application/ld+json",
|
||||
async handle(content, searchParams: URLSearchParams) {
|
||||
const url = searchParams.get("url")
|
||||
const dloaded = await Utils.download(url, {
|
||||
"User-Agent":
|
||||
"MapComplete/openstreetmap scraper; pietervdvn@posteo.net; https://github.com/pietervdvn/MapComplete",
|
||||
})
|
||||
const parsed = parse(dloaded)
|
||||
const scripts = Array.from(parsed.getElementsByTagName("script"))
|
||||
const snippets = []
|
||||
for (const script of scripts) {
|
||||
const tp = script.attributes["type"]
|
||||
if (tp !== "application/ld+json") {
|
||||
continue
|
||||
}
|
||||
try {
|
||||
snippets.push(JSON.parse(script.textContent))
|
||||
} catch (e) {
|
||||
console.error(e)
|
||||
}
|
||||
}
|
||||
|
||||
return JSON.stringify(snippets)
|
||||
},
|
||||
},
|
||||
])
|
||||
}
|
||||
}
|
||||
|
||||
new ServerLdScrape().run()
|
Loading…
Add table
Add a link
Reference in a new issue