@@ -81,13 +81,17 @@ const {
8181 CHAR_LOWERCASE_Z ,
8282 CHAR_PERCENT ,
8383 CHAR_PLUS ,
84+ CHAR_COLON ,
8485} = require ( 'internal/constants' ) ;
8586const path = require ( 'path' ) ;
87+ const { Buffer } = require ( 'buffer' ) ;
8688
8789const {
8890 validateFunction,
8991} = require ( 'internal/validators' ) ;
9092
93+ const { percentDecode } = require ( 'internal/data_url' ) ;
94+
9195const querystring = require ( 'querystring' ) ;
9296
9397const bindingUrl = internalBinding ( 'url' ) ;
@@ -1482,6 +1486,76 @@ function getPathFromURLWin32(url) {
14821486 return StringPrototypeSlice ( pathname , 1 ) ;
14831487}
14841488
1489+ function getPathBufferFromURLWin32 ( url ) {
1490+ const hostname = url . hostname ;
1491+ let pathname = url . pathname ;
1492+ // In the getPathFromURLWin32 variant, we scan the input for backslash (\)
1493+ // and forward slash (/) characters, specifically looking for the ASCII/UTF8
1494+ // encoding these and forbidding their use. This is a bit tricky
1495+ // because these may conflict with non-UTF8 encodings. For instance,
1496+ // in shift-jis, %5C identifies the symbol for the Japanese Yen and not the
1497+ // backslash. If we have a url like file:///foo/%5c/bar, then we really have
1498+ // no way of knowing if that %5c is meant to be a backslash \ or a yen sign.
1499+ // Passing in an encoding option does not help since our Buffer encoding only
1500+ // knows about certain specific text encodings and a single file path might
1501+ // actually contain segments that use multiple encodings. It's tricky! So,
1502+ // for this variation where we are producing a buffer, we won't scan for the
1503+ // slashes at all, and instead will decode the bytes literally into the
1504+ // returned Buffer. That said, that can also be tricky because, on windows,
1505+ // the file path separator *is* the ASCII backslash. This is a known issue
1506+ // on windows specific to the Shift-JIS encoding that we're not really going
1507+ // to solve here. Instead, we're going to do the best we can and just
1508+ // interpret the input url as a sequence of bytes.
1509+
1510+ // Because we are converting to a Windows file path here, we need to replace
1511+ // the explicit forward slash separators with backslashes. Note that this
1512+ // intentionally disregards any percent-encoded forward slashes in the path.
1513+ pathname = SideEffectFreeRegExpPrototypeSymbolReplace ( FORWARD_SLASH , pathname , '\\' ) ;
1514+
1515+ // Now, let's start to build our Buffer. We will initially start with a
1516+ // Buffer allocated to fit in the entire string. Worst case there are no
1517+ // percent encoded characters and we take the string as is. Any invalid
1518+ // percent encodings, e.g. `%ZZ` are ignored and are passed through
1519+ // literally.
1520+ const decodedu8 = percentDecode ( Buffer . from ( pathname , 'utf8' ) ) ;
1521+ const decodedPathname = Buffer . from ( decodedu8 . buffer ,
1522+ decodedu8 . byteOffset ,
1523+ decodedu8 . byteLength ) ;
1524+ if ( hostname !== '' ) {
1525+ // If hostname is set, then we have a UNC path
1526+ // Pass the hostname through domainToUnicode just in case
1527+ // it is an IDN using punycode encoding. We do not need to worry
1528+ // about percent encoding because the URL parser will have
1529+ // already taken care of that for us. Note that this only
1530+ // causes IDNs with an appropriate `xn--` prefix to be decoded.
1531+
1532+ // This is a bit tricky because of the need to convert to a Buffer
1533+ // followed by concatenation of the results.
1534+ const prefix = Buffer . from ( '\\\\' , 'ascii' ) ;
1535+ const domain = Buffer . from ( domainToUnicode ( hostname ) , 'utf8' ) ;
1536+
1537+ return Buffer . concat ( [ prefix , domain , decodedPathname ] ) ;
1538+ }
1539+ // Otherwise, it's a local path that requires a drive letter
1540+ // In this case we're only going to pay attention to the second and
1541+ // third bytes in the decodedPathname. If first byte is either an ASCII
1542+ // uppercase letter between 'A' and 'Z' or lowercase letter between
1543+ // 'a' and 'z', and the second byte must be an ASCII `:` or the
1544+ // operation will fail.
1545+
1546+ const letter = decodedPathname [ 1 ] | 0x20 ;
1547+ const sep = decodedPathname [ 2 ] ;
1548+
1549+ if ( letter < CHAR_LOWERCASE_A || letter > CHAR_LOWERCASE_Z || // a..z A..Z
1550+ ( sep !== CHAR_COLON ) ) {
1551+ throw new ERR_INVALID_FILE_URL_PATH ( 'must be absolute' ) ;
1552+ }
1553+
1554+ // Now, we'll just return everything except the first byte of
1555+ // decodedPathname
1556+ return decodedPathname . subarray ( 1 ) ;
1557+ }
1558+
14851559function getPathFromURLPosix ( url ) {
14861560 if ( url . hostname !== '' ) {
14871561 throw new ERR_INVALID_FILE_URL_HOST ( platform ) ;
@@ -1500,6 +1574,26 @@ function getPathFromURLPosix(url) {
15001574 return decodeURIComponent ( pathname ) ;
15011575}
15021576
1577+ function getPathBufferFromURLPosix ( url ) {
1578+ if ( url . hostname !== '' ) {
1579+ throw new ERR_INVALID_FILE_URL_HOST ( platform ) ;
1580+ }
1581+ const pathname = url . pathname ;
1582+
1583+ // In the getPathFromURLPosix variant, we scan the input for forward slash
1584+ // (/) characters, specifically looking for the ASCII/UTF8 and forbidding
1585+ // its use. This is a bit tricky because these may conflict with non-UTF8
1586+ // encodings. Passing in an encoding option does not help since our Buffer
1587+ // encoding only knows about certain specific text encodings and a single
1588+ // file path might actually contain segments that use multiple encodings.
1589+ // It's tricky! So, for this variation where we are producing a buffer, we
1590+ // won't scan for the slashes at all, and instead will decode the bytes
1591+ // literally into the returned Buffer. We're going to do the best we can and
1592+ // just interpret the input url as a sequence of bytes.
1593+ const u8 = percentDecode ( Buffer . from ( pathname , 'utf8' ) ) ;
1594+ return Buffer . from ( u8 . buffer , u8 . byteOffset , u8 . byteLength ) ;
1595+ }
1596+
15031597function fileURLToPath ( path , options = kEmptyObject ) {
15041598 const windows = options ?. windows ;
15051599 if ( typeof path === 'string' )
@@ -1511,6 +1605,24 @@ function fileURLToPath(path, options = kEmptyObject) {
15111605 return ( windows ?? isWindows ) ? getPathFromURLWin32 ( path ) : getPathFromURLPosix ( path ) ;
15121606}
15131607
1608+ // An alternative to fileURLToPath that outputs a Buffer
1609+ // instead of a string. The other fileURLToPath does not
1610+ // handle non-UTF8 encoded percent encodings at all, so
1611+ // converting to a Buffer is necessary in cases where the
1612+ // to string conversion would fail.
1613+ function fileURLToPathBuffer ( path , options = kEmptyObject ) {
1614+ const windows = options ?. windows ;
1615+ if ( typeof path === 'string' ) {
1616+ path = new URL ( path ) ;
1617+ } else if ( ! isURL ( path ) ) {
1618+ throw new ERR_INVALID_ARG_TYPE ( 'path' , [ 'string' , 'URL' ] , path ) ;
1619+ }
1620+ if ( path . protocol !== 'file:' ) {
1621+ throw new ERR_INVALID_URL_SCHEME ( 'file' ) ;
1622+ }
1623+ return ( windows ?? isWindows ) ? getPathBufferFromURLWin32 ( path ) : getPathBufferFromURLPosix ( path ) ;
1624+ }
1625+
15141626function pathToFileURL ( filepath , options = kEmptyObject ) {
15151627 const windows = options ?. windows ?? isWindows ;
15161628 const isUNC = windows && StringPrototypeStartsWith ( filepath , '\\\\' ) ;
@@ -1571,6 +1683,7 @@ function getURLOrigin(url) {
15711683
15721684module . exports = {
15731685 fileURLToPath,
1686+ fileURLToPathBuffer,
15741687 pathToFileURL,
15751688 toPathIfFileURL,
15761689 installObjectURLMethods,
0 commit comments