@@ -612,6 +612,54 @@ def test_urlsplit_attributes(self):
612
612
with self .assertRaisesRegex (ValueError , "out of range" ):
613
613
p .port
614
614
615
+ def test_urlsplit_remove_unsafe_bytes (self ):
616
+ # Remove ASCII tabs and newlines from input, for http common case scenario.
617
+ url = "h\n ttp://www.python\n .org\t /java\n script:\t alert('msg\r \n ')/?query\n =\t something#frag\n ment"
618
+ p = urllib .parse .urlsplit (url )
619
+ self .assertEqual (p .scheme , "http" )
620
+ self .assertEqual (p .netloc , "www.python.org" )
621
+ self .assertEqual (p .path , "/javascript:alert('msg')/" )
622
+ self .assertEqual (p .query , "query=something" )
623
+ self .assertEqual (p .fragment , "fragment" )
624
+ self .assertEqual (p .username , None )
625
+ self .assertEqual (p .password , None )
626
+ self .assertEqual (p .hostname , "www.python.org" )
627
+ self .assertEqual (p .port , None )
628
+ self .assertEqual (p .geturl (), "http://www.python.org/javascript:alert('msg')/?query=something#fragment" )
629
+
630
+ # Remove ASCII tabs and newlines from input as bytes, for http common case scenario.
631
+ url = b"h\n ttp://www.python\n .org\t /java\n script:\t alert('msg\r \n ')/?query\n =\t something#frag\n ment"
632
+ p = urllib .parse .urlsplit (url )
633
+ self .assertEqual (p .scheme , b"http" )
634
+ self .assertEqual (p .netloc , b"www.python.org" )
635
+ self .assertEqual (p .path , b"/javascript:alert('msg')/" )
636
+ self .assertEqual (p .query , b"query=something" )
637
+ self .assertEqual (p .fragment , b"fragment" )
638
+ self .assertEqual (p .username , None )
639
+ self .assertEqual (p .password , None )
640
+ self .assertEqual (p .hostname , b"www.python.org" )
641
+ self .assertEqual (p .port , None )
642
+ self .assertEqual (p .geturl (), b"http://www.python.org/javascript:alert('msg')/?query=something#fragment" )
643
+
644
+ # any scheme
645
+ url = "x-new-scheme\t ://www.python\n .org\t /java\n script:\t alert('msg\r \n ')/?query\n =\t something#frag\n ment"
646
+ p = urllib .parse .urlsplit (url )
647
+ self .assertEqual (p .geturl (), "x-new-scheme://www.python.org/javascript:alert('msg')/?query=something#fragment" )
648
+
649
+ # Remove ASCII tabs and newlines from input as bytes, any scheme.
650
+ url = b"x-new-scheme\t ://www.python\n .org\t /java\n script:\t alert('msg\r \n ')/?query\n =\t something#frag\n ment"
651
+ p = urllib .parse .urlsplit (url )
652
+ self .assertEqual (p .geturl (), b"x-new-scheme://www.python.org/javascript:alert('msg')/?query=something#fragment" )
653
+
654
+ # Unsafe bytes is not returned from urlparse cache.
655
+ # scheme is stored after parsing, sending an scheme with unsafe bytes *will not* return an unsafe scheme
656
+ url = "https://www.python\n .org\t /java\n script:\t alert('msg\r \n ')/?query\n =\t something#frag\n ment"
657
+ scheme = "htt\n ps"
658
+ for _ in range (2 ):
659
+ p = urllib .parse .urlsplit (url , scheme = scheme )
660
+ self .assertEqual (p .scheme , "https" )
661
+ self .assertEqual (p .geturl (), "https://www.python.org/javascript:alert('msg')/?query=something#fragment" )
662
+
615
663
def test_attributes_bad_port (self ):
616
664
"""Check handling of invalid ports."""
617
665
for bytes in (False , True ):
0 commit comments