2023-03-29 17:56:42 +02:00
import { Validator } from "../Validator"
2024-08-21 12:05:20 +02:00
import { Translation } from "../../i18n/Translation"
import Translations from "../../i18n/Translations"
2023-03-29 17:21:20 +02:00
export default class UrlValidator extends Validator {
2024-02-22 18:58:34 +01:00
private readonly _forceHttps : boolean
2024-08-21 12:05:20 +02:00
2024-08-24 01:50:34 +02:00
private static readonly spamWebsites = new Set < string > ( [
2024-08-21 12:05:20 +02:00
"booking.com" ,
2024-08-23 13:13:41 +02:00
"hotel-details-guide.com" ,
"tripingguide.com" ,
"tripadvisor.com" ,
"tripadvisor.co.uk" ,
"tripadvisor.com.au" ,
2024-08-24 01:50:34 +02:00
"katestravelexperience.eu" ,
2024-09-02 12:48:15 +02:00
"hoteldetails.eu" ,
2024-08-24 01:50:34 +02:00
] )
2024-09-02 12:48:15 +02:00
private static readonly discouragedWebsites = new Set < string > ( [ "facebook.com" ] )
2024-08-21 12:05:20 +02:00
2024-02-22 18:58:34 +01:00
constructor ( name? : string , explanation? : string , forceHttps? : boolean ) {
2023-03-29 17:21:20 +02:00
super (
2023-11-09 16:30:26 +01:00
name ? ? "url" ,
explanation ? ?
2024-08-23 13:13:41 +02:00
"The validatedTextField will format URLs to always be valid and have a https://-header (even though the 'https'-part will be hidden from the user. Furthermore, some tracking parameters will be removed" ,
"url"
2023-03-29 17:21:20 +02:00
)
2024-02-22 18:58:34 +01:00
this . _forceHttps = forceHttps ? ? false
2023-03-29 17:21:20 +02:00
}
2024-08-21 12:05:20 +02:00
/ * *
*
* new UrlValidator ( ) . reformat ( "https://example.com/page?fbclid=123456&utm_source=mastodon" ) // => "https://example.com/page"
* /
2023-03-29 17:21:20 +02:00
reformat ( str : string ) : string {
try {
let url : URL
// str = str.toLowerCase() // URLS are case sensitive. Lowercasing them might break some URLS. See #763
if (
! str . startsWith ( "http://" ) &&
! str . startsWith ( "https://" ) &&
! str . startsWith ( "http:" )
) {
url = new URL ( "https://" + str )
} else {
url = new URL ( str )
}
2024-02-22 18:58:34 +01:00
if ( this . _forceHttps ) {
url . protocol = "https:"
}
2023-03-29 17:21:20 +02:00
const blacklistedTrackingParams = [
"fbclid" , // Oh god, how I hate the fbclid. Let it burn, burn in hell!
"gclid" ,
"cmpid" ,
"agid" ,
"utm" ,
"utm_source" ,
"utm_medium" ,
"campaignid" ,
"campaign" ,
"AdGroupId" ,
"AdGroup" ,
"TargetId" ,
"msclkid" ,
2024-02-26 02:24:46 +01:00
"pk_source" ,
"pk_medium" ,
"pk_campaign" ,
"pk_content" ,
2024-04-13 02:40:21 +02:00
"pk_kwd" ,
2023-03-29 17:21:20 +02:00
]
for ( const dontLike of blacklistedTrackingParams ) {
url . searchParams . delete ( dontLike . toLowerCase ( ) )
}
let cleaned = url . toString ( )
if ( cleaned . endsWith ( "/" ) && ! str . endsWith ( "/" ) ) {
// Do not add a trailing '/' if it wasn't typed originally
cleaned = cleaned . substr ( 0 , cleaned . length - 1 )
}
return cleaned
} catch ( e ) {
console . error ( e )
return undefined
}
}
2024-08-21 12:05:20 +02:00
/ * *
*
* const v = new UrlValidator ( )
* v . getFeedback ( "example." ) . textFor ( "en" ) // => "This is not a valid web address"
2024-08-25 02:50:28 +02:00
* v . getFeedback ( "https://booking.com/some-hotel.html" ) . textFor ( "en" ) // => Translations.t.validation.url.spamSite.Subs({host: "booking.com"}).textFor("en")
2024-08-21 12:05:20 +02:00
* /
getFeedback ( s : string , getCountry ? : ( ) = > string ) : Translation | undefined {
2024-09-02 12:48:15 +02:00
if ( ! s . startsWith ( "http://" ) && ! s . startsWith ( "https://" ) && ! s . startsWith ( "http:" ) ) {
2024-08-24 01:50:34 +02:00
s = "https://" + s
}
2024-09-02 12:48:15 +02:00
try {
2024-08-23 21:21:27 +02:00
const url = new URL ( s )
let host = url . host . toLowerCase ( )
if ( host . startsWith ( "www." ) ) {
host = host . slice ( 4 )
}
2024-08-24 01:50:34 +02:00
if ( UrlValidator . spamWebsites . has ( host ) ) {
return Translations . t . validation . url . spamSite . Subs ( { host } )
}
if ( UrlValidator . discouragedWebsites . has ( host ) ) {
2024-08-23 21:21:27 +02:00
return Translations . t . validation . url . aggregator . Subs ( { host } )
}
2024-09-02 12:48:15 +02:00
} catch ( e ) {
2024-08-23 21:21:27 +02:00
// pass
}
2024-08-21 12:05:20 +02:00
const upstream = super . getFeedback ( s , getCountry )
if ( upstream ) {
return upstream
}
2024-08-23 21:21:27 +02:00
2024-08-21 12:05:20 +02:00
return undefined
}
2024-08-25 02:50:28 +02:00
/ * *
* const v = new UrlValidator ( )
* v . isValid ( "https://booking.com/some-hotel.html" ) // => false
* /
2023-03-29 17:21:20 +02:00
isValid ( str : string ) : boolean {
try {
if (
! str . startsWith ( "http://" ) &&
! str . startsWith ( "https://" ) &&
! str . startsWith ( "http:" )
) {
str = "https://" + str
}
const url = new URL ( str )
2024-08-23 21:21:27 +02:00
let host = url . host . toLowerCase ( )
if ( host . startsWith ( "www." ) ) {
host = host . slice ( 4 )
}
2024-08-24 01:50:34 +02:00
if ( UrlValidator . spamWebsites . has ( host ) ) {
2024-08-23 21:21:27 +02:00
return false
}
2024-08-24 01:50:34 +02:00
const dotIndex = url . host . indexOf ( "." )
2023-03-29 17:21:20 +02:00
return dotIndex > 0 && url . host [ url . host . length - 1 ] !== "."
} catch ( e ) {
return false
}
}
}