@@ -29,6 +29,9 @@ const {
2929 Symbol,
3030 SymbolIterator,
3131 SymbolToStringTag,
32+ TypedArrayPrototypeGetBuffer,
33+ TypedArrayPrototypeGetByteLength,
34+ TypedArrayPrototypeGetByteOffset,
3235 decodeURIComponent,
3336} = primordials ;
3437
@@ -81,13 +84,17 @@ const {
8184 CHAR_LOWERCASE_Z ,
8285 CHAR_PERCENT ,
8386 CHAR_PLUS ,
87+ CHAR_COLON ,
8488} = require ( 'internal/constants' ) ;
8589const path = require ( 'path' ) ;
90+ const { Buffer } = require ( 'buffer' ) ;
8691
8792const {
8893 validateFunction,
8994} = require ( 'internal/validators' ) ;
9095
96+ const { percentDecode } = require ( 'internal/data_url' ) ;
97+
9198const querystring = require ( 'querystring' ) ;
9299
93100const bindingUrl = internalBinding ( 'url' ) ;
@@ -1482,6 +1489,76 @@ function getPathFromURLWin32(url) {
14821489 return StringPrototypeSlice ( pathname , 1 ) ;
14831490}
14841491
1492+ function getPathBufferFromURLWin32 ( url ) {
1493+ const hostname = url . hostname ;
1494+ let pathname = url . pathname ;
1495+ // In the getPathFromURLWin32 variant, we scan the input for backslash (\)
1496+ // and forward slash (/) characters, specifically looking for the ASCII/UTF8
1497+ // encoding these and forbidding their use. This is a bit tricky
1498+ // because these may conflict with non-UTF8 encodings. For instance,
1499+ // in shift-jis, %5C identifies the symbol for the Japanese Yen and not the
1500+ // backslash. If we have a url like file:///foo/%5c/bar, then we really have
1501+ // no way of knowing if that %5c is meant to be a backslash \ or a yen sign.
1502+ // Passing in an encoding option does not help since our Buffer encoding only
1503+ // knows about certain specific text encodings and a single file path might
1504+ // actually contain segments that use multiple encodings. It's tricky! So,
1505+ // for this variation where we are producing a buffer, we won't scan for the
1506+ // slashes at all, and instead will decode the bytes literally into the
1507+ // returned Buffer. That said, that can also be tricky because, on windows,
1508+ // the file path separator *is* the ASCII backslash. This is a known issue
1509+ // on windows specific to the Shift-JIS encoding that we're not really going
1510+ // to solve here. Instead, we're going to do the best we can and just
1511+ // interpret the input url as a sequence of bytes.
1512+
1513+ // Because we are converting to a Windows file path here, we need to replace
1514+ // the explicit forward slash separators with backslashes. Note that this
1515+ // intentionally disregards any percent-encoded forward slashes in the path.
1516+ pathname = SideEffectFreeRegExpPrototypeSymbolReplace ( FORWARD_SLASH , pathname , '\\' ) ;
1517+
1518+ // Now, let's start to build our Buffer. We will initially start with a
1519+ // Buffer allocated to fit in the entire string. Worst case there are no
1520+ // percent encoded characters and we take the string as is. Any invalid
1521+ // percent encodings, e.g. `%ZZ` are ignored and are passed through
1522+ // literally.
1523+ const decodedu8 = percentDecode ( Buffer . from ( pathname , 'utf8' ) ) ;
1524+ const decodedPathname = Buffer . from ( TypedArrayPrototypeGetBuffer ( decodedu8 ) ,
1525+ TypedArrayPrototypeGetByteOffset ( decodedu8 ) ,
1526+ TypedArrayPrototypeGetByteLength ( decodedu8 ) ) ;
1527+ if ( hostname !== '' ) {
1528+ // If hostname is set, then we have a UNC path
1529+ // Pass the hostname through domainToUnicode just in case
1530+ // it is an IDN using punycode encoding. We do not need to worry
1531+ // about percent encoding because the URL parser will have
1532+ // already taken care of that for us. Note that this only
1533+ // causes IDNs with an appropriate `xn--` prefix to be decoded.
1534+
1535+ // This is a bit tricky because of the need to convert to a Buffer
1536+ // followed by concatenation of the results.
1537+ const prefix = Buffer . from ( '\\\\' , 'ascii' ) ;
1538+ const domain = Buffer . from ( domainToUnicode ( hostname ) , 'utf8' ) ;
1539+
1540+ return Buffer . concat ( [ prefix , domain , decodedPathname ] ) ;
1541+ }
1542+ // Otherwise, it's a local path that requires a drive letter
1543+ // In this case we're only going to pay attention to the second and
1544+ // third bytes in the decodedPathname. If first byte is either an ASCII
1545+ // uppercase letter between 'A' and 'Z' or lowercase letter between
1546+ // 'a' and 'z', and the second byte must be an ASCII `:` or the
1547+ // operation will fail.
1548+
1549+ const letter = decodedPathname [ 1 ] | 0x20 ;
1550+ const sep = decodedPathname [ 2 ] ;
1551+
1552+ if ( letter < CHAR_LOWERCASE_A || letter > CHAR_LOWERCASE_Z || // a..z A..Z
1553+ ( sep !== CHAR_COLON ) ) {
1554+ throw new ERR_INVALID_FILE_URL_PATH ( 'must be absolute' ) ;
1555+ }
1556+
1557+ // Now, we'll just return everything except the first byte of
1558+ // decodedPathname
1559+ return decodedPathname . subarray ( 1 ) ;
1560+ }
1561+
14851562function getPathFromURLPosix ( url ) {
14861563 if ( url . hostname !== '' ) {
14871564 throw new ERR_INVALID_FILE_URL_HOST ( platform ) ;
@@ -1500,6 +1577,28 @@ function getPathFromURLPosix(url) {
15001577 return decodeURIComponent ( pathname ) ;
15011578}
15021579
1580+ function getPathBufferFromURLPosix ( url ) {
1581+ if ( url . hostname !== '' ) {
1582+ throw new ERR_INVALID_FILE_URL_HOST ( platform ) ;
1583+ }
1584+ const pathname = url . pathname ;
1585+
1586+ // In the getPathFromURLPosix variant, we scan the input for forward slash
1587+ // (/) characters, specifically looking for the ASCII/UTF8 and forbidding
1588+ // its use. This is a bit tricky because these may conflict with non-UTF8
1589+ // encodings. Passing in an encoding option does not help since our Buffer
1590+ // encoding only knows about certain specific text encodings and a single
1591+ // file path might actually contain segments that use multiple encodings.
1592+ // It's tricky! So, for this variation where we are producing a buffer, we
1593+ // won't scan for the slashes at all, and instead will decode the bytes
1594+ // literally into the returned Buffer. We're going to do the best we can and
1595+ // just interpret the input url as a sequence of bytes.
1596+ const u8 = percentDecode ( Buffer . from ( pathname , 'utf8' ) ) ;
1597+ return Buffer . from ( TypedArrayPrototypeGetBuffer ( u8 ) ,
1598+ TypedArrayPrototypeGetByteOffset ( u8 ) ,
1599+ TypedArrayPrototypeGetByteLength ( u8 ) ) ;
1600+ }
1601+
15031602function fileURLToPath ( path , options = kEmptyObject ) {
15041603 const windows = options ?. windows ;
15051604 if ( typeof path === 'string' )
@@ -1511,6 +1610,24 @@ function fileURLToPath(path, options = kEmptyObject) {
15111610 return ( windows ?? isWindows ) ? getPathFromURLWin32 ( path ) : getPathFromURLPosix ( path ) ;
15121611}
15131612
1613+ // An alternative to fileURLToPath that outputs a Buffer
1614+ // instead of a string. The other fileURLToPath does not
1615+ // handle non-UTF8 encoded percent encodings at all, so
1616+ // converting to a Buffer is necessary in cases where the
1617+ // to string conversion would fail.
1618+ function fileURLToPathBuffer ( path , options = kEmptyObject ) {
1619+ const windows = options ?. windows ;
1620+ if ( typeof path === 'string' ) {
1621+ path = new URL ( path ) ;
1622+ } else if ( ! isURL ( path ) ) {
1623+ throw new ERR_INVALID_ARG_TYPE ( 'path' , [ 'string' , 'URL' ] , path ) ;
1624+ }
1625+ if ( path . protocol !== 'file:' ) {
1626+ throw new ERR_INVALID_URL_SCHEME ( 'file' ) ;
1627+ }
1628+ return ( windows ?? isWindows ) ? getPathBufferFromURLWin32 ( path ) : getPathBufferFromURLPosix ( path ) ;
1629+ }
1630+
15141631function pathToFileURL ( filepath , options = kEmptyObject ) {
15151632 const windows = options ?. windows ?? isWindows ;
15161633 const isUNC = windows && StringPrototypeStartsWith ( filepath , '\\\\' ) ;
@@ -1571,6 +1688,7 @@ function getURLOrigin(url) {
15711688
15721689module . exports = {
15731690 fileURLToPath,
1691+ fileURLToPathBuffer,
15741692 pathToFileURL,
15751693 toPathIfFileURL,
15761694 installObjectURLMethods,
0 commit comments