Improve URL-validation for blocked and discouraged sites

This commit is contained in:
Pieter Vander Vennet 2024-08-24 01:50:34 +02:00
parent 0d22af629c
commit d28acfdb20
2 changed files with 26 additions and 7 deletions

View file

@ -847,9 +847,10 @@
}, },
"tooLong": "The text is too long, at most 255 characters are allowed. You have {count} characters now.", "tooLong": "The text is too long, at most 255 characters are allowed. You have {count} characters now.",
"url": { "url": {
"aggregator": "{host} is a third-party aggregator website. If possible, search the official website.", "aggregator": "{host} is a third-party website. If possible, search the official website.",
"description": "link to a website", "description": "link to a website",
"feedback": "This is not a valid web address" "feedback": "This is not a valid web address",
"spamSite": "{host} is considered a low-quality website. Using this website is not allowed."
}, },
"wikidata": { "wikidata": {
"description": "A Wikidata identifier", "description": "A Wikidata identifier",

View file

@ -5,14 +5,19 @@ import Translations from "../../i18n/Translations"
export default class UrlValidator extends Validator { export default class UrlValidator extends Validator {
private readonly _forceHttps: boolean private readonly _forceHttps: boolean
private static readonly aggregatorWebsites = new Set<string>([ private static readonly spamWebsites = new Set<string>([
"booking.com", "booking.com",
"hotel-details-guide.com", "hotel-details-guide.com",
"tripingguide.com", "tripingguide.com",
"tripadvisor.com", "tripadvisor.com",
"tripadvisor.co.uk", "tripadvisor.co.uk",
"tripadvisor.com.au", "tripadvisor.com.au",
"katestravelexperience.eu" "katestravelexperience.eu",
"hoteldetails.eu"
])
private static readonly discouragedWebsites = new Set<string>([
"facebook.com"
]) ])
constructor(name?: string, explanation?: string, forceHttps?: boolean) { constructor(name?: string, explanation?: string, forceHttps?: boolean) {
@ -89,15 +94,27 @@ export default class UrlValidator extends Validator {
* *
*/ */
getFeedback(s: string, getCountry?: () => string): Translation | undefined { getFeedback(s: string, getCountry?: () => string): Translation | undefined {
if (
!s.startsWith("http://") &&
!s.startsWith("https://") &&
!s.startsWith("http:")
) {
s = "https://" + s
}
try{ try{
const url = new URL(s) const url = new URL(s)
let host = url.host.toLowerCase() let host = url.host.toLowerCase()
if (host.startsWith("www.")) { if (host.startsWith("www.")) {
host = host.slice(4) host = host.slice(4)
} }
if (UrlValidator.aggregatorWebsites.has(host)) { if (UrlValidator.spamWebsites.has(host)) {
return Translations.t.validation.url.spamSite.Subs({ host })
}
if (UrlValidator.discouragedWebsites.has(host)) {
return Translations.t.validation.url.aggregator.Subs({ host }) return Translations.t.validation.url.aggregator.Subs({ host })
} }
}catch (e) { }catch (e) {
// pass // pass
} }
@ -111,6 +128,7 @@ export default class UrlValidator extends Validator {
} }
isValid(str: string): boolean { isValid(str: string): boolean {
try { try {
if ( if (
!str.startsWith("http://") && !str.startsWith("http://") &&
@ -120,16 +138,16 @@ export default class UrlValidator extends Validator {
str = "https://" + str str = "https://" + str
} }
const url = new URL(str) const url = new URL(str)
const dotIndex = url.host.indexOf(".")
let host = url.host.toLowerCase() let host = url.host.toLowerCase()
if (host.startsWith("www.")) { if (host.startsWith("www.")) {
host = host.slice(4) host = host.slice(4)
} }
if (UrlValidator.aggregatorWebsites.has(host)) { if (UrlValidator.spamWebsites.has(host)) {
return false return false
} }
const dotIndex = url.host.indexOf(".")
return dotIndex > 0 && url.host[url.host.length - 1] !== "." return dotIndex > 0 && url.host[url.host.length - 1] !== "."
} catch (e) { } catch (e) {
return false return false