@@ -5,22 +5,25 @@ import {
5
5
htmlEntitiesRegex ,
6
6
invalidProtocolRegex ,
7
7
relativeFirstCharacters ,
8
- urlSchemeRegex ,
9
8
whitespaceEscapeCharsRegex ,
9
+ urlSchemeRegex ,
10
10
} from "./constants" ;
11
11
12
12
function isRelativeUrlWithoutProtocol ( url : string ) : boolean {
13
13
return relativeFirstCharacters . indexOf ( url [ 0 ] ) > - 1 ;
14
14
}
15
15
16
- // adapted from https://stackoverflow.com/a/29824550/2601552
17
16
function decodeHtmlCharacters ( str : string ) {
18
17
const removedNullByte = str . replace ( ctrlCharactersRegex , "" ) ;
19
18
return removedNullByte . replace ( htmlEntitiesRegex , ( match , dec ) => {
20
19
return String . fromCharCode ( dec ) ;
21
20
} ) ;
22
21
}
23
22
23
+ function isValidUrl ( url : string ) : boolean {
24
+ return URL . canParse ( url ) ;
25
+ }
26
+
24
27
function decodeURI ( uri : string ) : string {
25
28
try {
26
29
return decodeURIComponent ( uri ) ;
@@ -36,8 +39,9 @@ export function sanitizeUrl(url?: string): string {
36
39
if ( ! url ) {
37
40
return BLANK_URL ;
38
41
}
42
+
39
43
let charsToDecode ;
40
- let decodedUrl = decodeURI ( url ) ;
44
+ let decodedUrl = decodeURI ( url . trim ( ) ) ;
41
45
42
46
do {
43
47
decodedUrl = decodeHtmlCharacters ( decodedUrl )
@@ -54,7 +58,9 @@ export function sanitizeUrl(url?: string): string {
54
58
decodedUrl . match ( htmlCtrlEntityRegex ) ||
55
59
decodedUrl . match ( whitespaceEscapeCharsRegex ) ;
56
60
} while ( charsToDecode && charsToDecode . length > 0 ) ;
61
+
57
62
const sanitizedUrl = decodedUrl ;
63
+
58
64
if ( ! sanitizedUrl ) {
59
65
return BLANK_URL ;
60
66
}
@@ -63,17 +69,39 @@ export function sanitizeUrl(url?: string): string {
63
69
return sanitizedUrl ;
64
70
}
65
71
66
- const urlSchemeParseResults = sanitizedUrl . match ( urlSchemeRegex ) ;
72
+ // Remove any leading whitespace before checking the URL scheme
73
+ const trimmedUrl = sanitizedUrl . trimStart ( ) ;
74
+ const urlSchemeParseResults = trimmedUrl . match ( urlSchemeRegex ) ;
67
75
68
76
if ( ! urlSchemeParseResults ) {
69
77
return sanitizedUrl ;
70
78
}
71
79
72
- const urlScheme = urlSchemeParseResults [ 0 ] ;
80
+ const urlScheme = urlSchemeParseResults [ 0 ] . toLowerCase ( ) . trim ( ) ;
73
81
74
82
if ( invalidProtocolRegex . test ( urlScheme ) ) {
75
83
return BLANK_URL ;
76
84
}
77
85
78
- return sanitizedUrl ;
86
+ const backSanitized = trimmedUrl . replace ( / \\ / g, "/" ) ;
87
+
88
+ // Handle special cases for mailto: and custom deep-link protocols
89
+ if ( urlScheme === "mailto:" || urlScheme . includes ( "://" ) ) {
90
+ return backSanitized ;
91
+ }
92
+
93
+ // For http and https URLs, perform additional validation
94
+ if ( urlScheme === "http:" || urlScheme === "https:" ) {
95
+ if ( ! isValidUrl ( backSanitized ) ) {
96
+ return BLANK_URL ;
97
+ }
98
+
99
+ const url = new URL ( backSanitized ) ;
100
+ url . protocol = url . protocol . toLowerCase ( ) ;
101
+ url . hostname = url . hostname . toLowerCase ( ) ;
102
+
103
+ return url . toString ( ) ;
104
+ }
105
+
106
+ return backSanitized ;
79
107
}
0 commit comments