|
6 | 6 | import threading
|
7 | 7 | from collections import deque
|
8 | 8 | from contextlib import ExitStack, suppress
|
| 9 | +from glob import has_magic |
9 | 10 | from typing import TYPE_CHECKING, Any, Callable, Optional, Union
|
10 | 11 |
|
11 |
| -from fsspec.spec import AbstractFileSystem |
| 12 | +from fsspec.spec import DEFAULT_CALLBACK, AbstractFileSystem |
12 | 13 | from funcy import wrap_with
|
13 | 14 |
|
14 | 15 | from dvc.log import logger
|
15 |
| -from dvc_objects.fs.base import FileSystem |
| 16 | +from dvc.utils.threadpool import ThreadPoolExecutor |
| 17 | +from dvc_objects.fs.base import AnyFSPath, FileSystem |
16 | 18 |
|
17 | 19 | from .data import DataFileSystem
|
18 | 20 |
|
19 | 21 | if TYPE_CHECKING:
|
20 | 22 | from dvc.repo import Repo
|
21 | 23 | from dvc.types import DictStrAny, StrPath
|
22 | 24 |
|
| 25 | + from .callbacks import Callback |
| 26 | + |
23 | 27 | logger = logger.getChild(__name__)
|
24 | 28 |
|
25 | 29 | RepoFactory = Union[Callable[..., "Repo"], type["Repo"]]
|
@@ -474,9 +478,110 @@ def _info( # noqa: C901
|
474 | 478 | info["name"] = path
|
475 | 479 | return info
|
476 | 480 |
|
| 481 | + def get( |
| 482 | + self, |
| 483 | + rpath, |
| 484 | + lpath, |
| 485 | + recursive=False, |
| 486 | + callback=DEFAULT_CALLBACK, |
| 487 | + maxdepth=None, |
| 488 | + batch_size=None, |
| 489 | + **kwargs, |
| 490 | + ): |
| 491 | + self._get( |
| 492 | + rpath, |
| 493 | + lpath, |
| 494 | + recursive=recursive, |
| 495 | + callback=callback, |
| 496 | + maxdepth=maxdepth, |
| 497 | + batch_size=batch_size, |
| 498 | + **kwargs, |
| 499 | + ) |
| 500 | + |
| 501 | + def _get( # noqa: C901 |
| 502 | + self, |
| 503 | + rpath, |
| 504 | + lpath, |
| 505 | + recursive=False, |
| 506 | + callback=DEFAULT_CALLBACK, |
| 507 | + maxdepth=None, |
| 508 | + batch_size=None, |
| 509 | + **kwargs, |
| 510 | + ) -> list[Union[tuple[str, str], tuple[str, str, dict]]]: |
| 511 | + if ( |
| 512 | + isinstance(rpath, list) |
| 513 | + or isinstance(lpath, list) |
| 514 | + or has_magic(rpath) |
| 515 | + or not self.exists(rpath) |
| 516 | + or not recursive |
| 517 | + ): |
| 518 | + super().get( |
| 519 | + rpath, |
| 520 | + lpath, |
| 521 | + recursive=recursive, |
| 522 | + callback=callback, |
| 523 | + maxdepth=maxdepth, |
| 524 | + **kwargs, |
| 525 | + ) |
| 526 | + return [] |
| 527 | + |
| 528 | + if os.path.isdir(lpath) or lpath.endswith(os.path.sep): |
| 529 | + lpath = self.join(lpath, os.path.basename(rpath)) |
| 530 | + |
| 531 | + if self.isfile(rpath): |
| 532 | + with callback.branched(rpath, lpath) as child: |
| 533 | + self.get_file(rpath, lpath, callback=child, **kwargs) |
| 534 | + return [(rpath, lpath)] |
| 535 | + |
| 536 | + _files = [] |
| 537 | + _dirs: list[str] = [] |
| 538 | + for root, dirs, files in self.walk(rpath, maxdepth=maxdepth, detail=True): |
| 539 | + if files: |
| 540 | + callback.set_size((callback.size or 0) + len(files)) |
| 541 | + |
| 542 | + parts = self.relparts(root, rpath) |
| 543 | + if parts in ((os.curdir,), ("",)): |
| 544 | + parts = () |
| 545 | + dest_root = os.path.join(lpath, *parts) |
| 546 | + if not maxdepth or len(parts) < maxdepth - 1: |
| 547 | + _dirs.extend(f"{dest_root}{os.path.sep}{d}" for d in dirs) |
| 548 | + |
| 549 | + key = self._get_key_from_relative(root) |
| 550 | + _, dvc_fs, _ = self._get_subrepo_info(key) |
| 551 | + |
| 552 | + for name, info in files.items(): |
| 553 | + src_path = f"{root}{self.sep}{name}" |
| 554 | + dest_path = f"{dest_root}{os.path.sep}{name}" |
| 555 | + _files.append((dvc_fs, src_path, dest_path, info)) |
| 556 | + |
| 557 | + os.makedirs(lpath, exist_ok=True) |
| 558 | + for d in _dirs: |
| 559 | + os.mkdir(d) |
| 560 | + |
| 561 | + def _get_file(arg): |
| 562 | + dvc_fs, src, dest, info = arg |
| 563 | + dvc_info = info.get("dvc_info") |
| 564 | + if dvc_info and dvc_fs: |
| 565 | + dvc_path = dvc_info["name"] |
| 566 | + dvc_fs.get_file( |
| 567 | + dvc_path, dest, callback=callback, info=dvc_info, **kwargs |
| 568 | + ) |
| 569 | + else: |
| 570 | + self.get_file(src, dest, callback=callback, **kwargs) |
| 571 | + return src, dest, info |
| 572 | + |
| 573 | + with ThreadPoolExecutor(max_workers=batch_size) as executor: |
| 574 | + return list(executor.imap_unordered(_get_file, _files)) |
| 575 | + |
477 | 576 | def get_file(self, rpath, lpath, **kwargs):
|
478 | 577 | key = self._get_key_from_relative(rpath)
|
479 | 578 | fs_path = self._from_key(key)
|
| 579 | + |
| 580 | + dirpath = os.path.dirname(lpath) |
| 581 | + if dirpath: |
| 582 | + # makedirs raises error if the string is empty |
| 583 | + os.makedirs(dirpath, exist_ok=True) |
| 584 | + |
480 | 585 | try:
|
481 | 586 | return self.repo.fs.get_file(fs_path, lpath, **kwargs)
|
482 | 587 | except FileNotFoundError:
|
@@ -553,6 +658,45 @@ def immutable(self):
|
553 | 658 | def getcwd(self):
|
554 | 659 | return self.fs.getcwd()
|
555 | 660 |
|
| 661 | + def _get( |
| 662 | + self, |
| 663 | + from_info: Union[AnyFSPath, list[AnyFSPath]], |
| 664 | + to_info: Union[AnyFSPath, list[AnyFSPath]], |
| 665 | + callback: "Callback" = DEFAULT_CALLBACK, |
| 666 | + recursive: bool = False, |
| 667 | + batch_size: Optional[int] = None, |
| 668 | + **kwargs, |
| 669 | + ) -> list[Union[tuple[str, str], tuple[str, str, dict]]]: |
| 670 | + # FileSystem.get is non-recursive by default if arguments are lists |
| 671 | + # otherwise, it's recursive. |
| 672 | + recursive = not (isinstance(from_info, list) and isinstance(to_info, list)) |
| 673 | + return self.fs._get( |
| 674 | + from_info, |
| 675 | + to_info, |
| 676 | + callback=callback, |
| 677 | + recursive=recursive, |
| 678 | + batch_size=batch_size, |
| 679 | + **kwargs, |
| 680 | + ) |
| 681 | + |
| 682 | + def get( |
| 683 | + self, |
| 684 | + from_info: Union[AnyFSPath, list[AnyFSPath]], |
| 685 | + to_info: Union[AnyFSPath, list[AnyFSPath]], |
| 686 | + callback: "Callback" = DEFAULT_CALLBACK, |
| 687 | + recursive: bool = False, |
| 688 | + batch_size: Optional[int] = None, |
| 689 | + **kwargs, |
| 690 | + ) -> None: |
| 691 | + self._get( |
| 692 | + from_info, |
| 693 | + to_info, |
| 694 | + callback=callback, |
| 695 | + batch_size=batch_size, |
| 696 | + recursive=recursive, |
| 697 | + **kwargs, |
| 698 | + ) |
| 699 | + |
556 | 700 | @property
|
557 | 701 | def fsid(self) -> str:
|
558 | 702 | return self.fs.fsid
|
|
0 commit comments