Skip to content

Commit

Permalink
Merge pull request #29 from jodal/scanner-improvements
Browse files Browse the repository at this point in the history
Scanner improvements
  • Loading branch information
jodal authored Jan 3, 2020
2 parents 152a0e1 + 1d7da69 commit 9f5872c
Show file tree
Hide file tree
Showing 8 changed files with 173 additions and 125 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@ Changelog
*********


v3.1.0 (UNRELEASED)
===================

- Improve handling of paths with arbitrary encodings in the scanner. (#20, PR: #29)
- Add ``.cue`` to the list of excluded file extensions. (PR: #29)


v3.0.0 (2019-12-22)
===================

Expand Down
1 change: 1 addition & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ The following configuration values are available:

- ``local/excluded_file_extensions``: File extensions to exclude when scanning
the media directory. Values should be separated by either comma or newline.
Each file extension should start with a dot, .e.g. ``.html``.

- ``local/directories``: List of top-level directory names and URIs
for browsing.
Expand Down
186 changes: 116 additions & 70 deletions mopidy_local/commands.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import logging
import os
import pathlib
import time

from mopidy import commands, exceptions
from mopidy.audio import scan, tags
from mopidy.internal import path

from mopidy_local import mtimes, storage, translator

Expand All @@ -26,17 +25,17 @@ class ClearCommand(commands.Command):
def run(self, args, config):
library = storage.LocalStorageProvider(config)

prompt = "\nAre you sure you want to clear the library? [y/N] "
prompt = "Are you sure you want to clear the library? [y/N] "

if input(prompt).lower() != "y":
print("Clearing library aborted.")
print("Clearing library aborted")
return 0

if library.clear():
print("Library successfully cleared.")
print("Library successfully cleared")
return 0

print("Unable to clear library.")
print("Unable to clear library")
return 1


Expand All @@ -62,104 +61,154 @@ def __init__(self):
)

def run(self, args, config):
media_dir = config["local"]["media_dir"]
scan_timeout = config["local"]["scan_timeout"]
flush_threshold = config["local"]["scan_flush_threshold"]
excluded_file_extensions = config["local"]["excluded_file_extensions"]
excluded_file_extensions = tuple(
file_ext.lower() for file_ext in excluded_file_extensions
media_dir = pathlib.Path(config["local"]["media_dir"]).resolve()
library = storage.LocalStorageProvider(config)

file_mtimes = self._find_files(
media_dir=media_dir,
follow_symlinks=config["local"]["scan_follow_symlinks"],
)

library = storage.LocalStorageProvider(config)
files_to_update, files_in_library = self._check_tracks_in_library(
media_dir=media_dir,
file_mtimes=file_mtimes,
library=library,
force_rescan=args.force,
)

file_mtimes, file_errors = mtimes.find_mtimes(
media_dir, follow=config["local"]["scan_follow_symlinks"]
files_to_update.update(
self._find_files_to_scan(
media_dir=media_dir,
file_mtimes=file_mtimes,
files_in_library=files_in_library,
excluded_file_exts=[
file_ext.lower()
for file_ext in config["local"]["excluded_file_extensions"]
],
)
)

logger.info("Found %d files in media_dir.", len(file_mtimes))
self._scan_metadata(
media_dir=media_dir,
file_mtimes=file_mtimes,
files=files_to_update,
library=library,
timeout=config["local"]["scan_timeout"],
flush_threshold=config["local"]["scan_flush_threshold"],
limit=args.limit,
)

library.close()
return 0

def _find_files(self, *, media_dir, follow_symlinks):
logger.info(f"Finding files in {media_dir.as_uri()} ...")
file_mtimes, file_errors = mtimes.find_mtimes(media_dir, follow=follow_symlinks)
logger.info(f"Found {len(file_mtimes)} files in {media_dir.as_uri()}")

if file_errors:
logger.warning(
"Encountered %d errors while scanning media_dir.", len(file_errors)
f"Encountered {len(file_errors)} errors "
f"while finding files in {media_dir.as_uri()}"
)
for name in file_errors:
logger.debug("Scan error %r for %r", file_errors[name], name)
for path in file_errors:
logger.warning(f"Error for {path.as_uri()}: {file_errors[path]}")

return file_mtimes

def _check_tracks_in_library(
self, *, media_dir, file_mtimes, library, force_rescan
):
num_tracks = library.load()
logger.info("Checking %d tracks from library.", num_tracks)
logger.info(f"Checking {num_tracks} tracks from library")

uris_to_update = set()
uris_to_remove = set()
uris_in_library = set()
files_to_update = set()
files_in_library = set()

for track in library.begin():
abspath = translator.local_uri_to_path(track.uri, media_dir)
mtime = file_mtimes.get(abspath)
absolute_path = translator.local_uri_to_path(track.uri, media_dir)
mtime = file_mtimes.get(absolute_path)
if mtime is None:
logger.debug("Missing file %s", track.uri)
logger.debug(f"Removing {track.uri}: File not found")
uris_to_remove.add(track.uri)
elif mtime > track.last_modified or args.force:
uris_to_update.add(track.uri)
uris_in_library.add(track.uri)

logger.info("Removing %d missing tracks.", len(uris_to_remove))
for uri in uris_to_remove:
library.remove(uri)

for abspath in file_mtimes:
relpath = os.path.relpath(abspath, media_dir)
uri = translator.path_to_local_track_uri(relpath)

if "/." in relpath or relpath.startswith("."):
logger.debug("Skipped %s: Hidden directory/file.", uri)
elif relpath.lower().endswith(excluded_file_extensions):
logger.debug("Skipped %s: File extension excluded.", uri)
elif uri not in uris_in_library:
uris_to_update.add(uri)

logger.info("Found %d tracks which need to be updated.", len(uris_to_update))
elif mtime > track.last_modified or force_rescan:
files_to_update.add(absolute_path)
files_in_library.add(absolute_path)

logger.info(f"Removing {len(uris_to_remove)} missing tracks")
for local_uri in uris_to_remove:
library.remove(local_uri)

return files_to_update, files_in_library

def _find_files_to_scan(
self, *, media_dir, file_mtimes, files_in_library, excluded_file_exts
):
files_to_update = set()

for absolute_path in file_mtimes:
relative_path = absolute_path.relative_to(media_dir)
file_uri = absolute_path.as_uri()

if any(p.startswith(".") for p in relative_path.parts):
logger.debug(f"Skipped {file_uri}: Hidden directory/file")
elif relative_path.suffix.lower() in excluded_file_exts:
logger.debug(f"Skipped {file_uri}: File extension excluded")
elif absolute_path not in files_in_library:
files_to_update.add(absolute_path)

logger.info(f"Found {len(files_to_update)} tracks which need to be updated")
return files_to_update

def _scan_metadata(
self, *, media_dir, file_mtimes, files, library, timeout, flush_threshold, limit
):
logger.info("Scanning...")

uris_to_update = sorted(uris_to_update, key=lambda v: v.lower())
uris_to_update = uris_to_update[: args.limit]
files = sorted(files)[:limit]

scanner = scan.Scanner(scan_timeout)
progress = _Progress(flush_threshold, len(uris_to_update))
scanner = scan.Scanner(timeout)
progress = _ScanProgress(batch_size=flush_threshold, total=len(files))

for uri in uris_to_update:
for absolute_path in files:
try:
relpath = translator.local_uri_to_path(uri, media_dir)
file_uri = path.path_to_uri(media_dir / relpath)
file_uri = absolute_path.as_uri()
result = scanner.scan(file_uri)

if not result.playable:
logger.warning("Failed %s: No audio found in file.", uri)
logger.warning(
f"Failed scanning {file_uri}: No audio found in file"
)
elif result.duration < MIN_DURATION_MS:
logger.warning(
"Failed %s: Track shorter than %dms", uri, MIN_DURATION_MS
f"Failed scanning {file_uri}: "
f"Track shorter than {MIN_DURATION_MS}ms"
)
else:
mtime = file_mtimes.get(media_dir / relpath)
local_uri = translator.path_to_local_track_uri(
absolute_path, media_dir
)
mtime = file_mtimes.get(absolute_path)
track = tags.convert_tags_to_track(result.tags).replace(
uri=uri, length=result.duration, last_modified=mtime
uri=local_uri, length=result.duration, last_modified=mtime
)
library.add(track, result.tags, result.duration)
logger.debug("Added %s", track.uri)
logger.debug(f"Added {track.uri}")
except exceptions.ScannerError as error:
logger.warning("Failed %s: %s", uri, error)
logger.warning(f"Failed scanning {file_uri}: {error}")

if progress.increment():
progress.log()
if library.flush():
logger.debug("Progress flushed.")
logger.debug("Progress flushed")

progress.log()
library.close()
logger.info("Done scanning.")
return 0
logger.info("Done scanning")


class _Progress:
def __init__(self, batch_size, total):
class _ScanProgress:
def __init__(self, *, batch_size, total):
self.count = 0
self.batch_size = batch_size
self.total = total
Expand All @@ -173,14 +222,11 @@ def log(self):
duration = time.time() - self.start
if self.count >= self.total or not self.count:
logger.info(
"Scanned %d of %d files in %ds.", self.count, self.total, duration
f"Scanned {self.count} of {self.total} files in {duration:.3f}s."
)
else:
remainder = duration / self.count * (self.total - self.count)
logger.info(
"Scanned %d of %d files in %ds, ~%ds left.",
self.count,
self.total,
duration,
remainder,
f"Scanned {self.count} of {self.total} files "
f"in {duration:.3f}s, ~{remainder:.0f}s left"
)
1 change: 1 addition & 0 deletions mopidy_local/ext.conf
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ scan_timeout = 1000
scan_flush_threshold = 100
scan_follow_symlinks = false
excluded_file_extensions =
.cue
.directory
.html
.jpeg
Expand Down
2 changes: 1 addition & 1 deletion mopidy_local/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def begin(self):
return schema.tracks(self._connect())

def add(self, track, tags=None, duration=None):
logger.info("Adding track: %s", track)
logger.debug("Adding track: %s", track)
images = None
if track.album and track.album.name: # FIXME: album required
uri = translator.local_uri_to_file_uri(track.uri, self._media_dir)
Expand Down
50 changes: 25 additions & 25 deletions mopidy_local/translator.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,41 @@
from __future__ import annotations

import logging
import os
import pathlib
import urllib
from pathlib import Path
from typing import Union

logger = logging.getLogger(__name__)


def local_uri_to_file_uri(uri, media_dir):
def local_uri_to_file_uri(local_uri: str, media_dir: Path) -> str:
"""Convert local track or directory URI to file URI."""
return path_to_file_uri(local_uri_to_path(uri, media_dir))
path = local_uri_to_path(local_uri, media_dir)
return path.as_uri()


def local_uri_to_path(uri, media_dir):
def local_uri_to_path(local_uri: str, media_dir: Path) -> Path:
"""Convert local track or directory URI to absolute path."""
if not uri.startswith("local:directory:") and not uri.startswith("local:track:"):
if not local_uri.startswith(("local:directory:", "local:track:")):
raise ValueError("Invalid URI.")
uri_path = uri.split(":", 2)[2]
file_bytes = urllib.parse.unquote_to_bytes(urllib.parse.urlsplit(uri_path).path)
file_path = pathlib.Path(os.fsdecode(file_bytes))
uri_path = urllib.parse.urlsplit(local_uri.split(":", 2)[2]).path
file_bytes = urllib.parse.unquote_to_bytes(uri_path)
file_path = Path(os.fsdecode(file_bytes))
return media_dir / file_path


def path_to_file_uri(path):
def path_to_file_uri(path: Union[str, bytes, Path]) -> str:
"""Convert absolute path to file URI."""
return pathlib.Path(os.fsdecode(path)).as_uri()


def path_to_local_track_uri(relpath):
"""Convert path relative to :confval:`local/media_dir` to local track
URI."""
if isinstance(relpath, str):
relpath = relpath.encode()
return "local:track:%s" % urllib.parse.quote(relpath)


def path_to_local_directory_uri(relpath):
"""Convert path relative to :confval:`local/media_dir` to directory URI."""
if isinstance(relpath, str):
relpath = relpath.encode()
return "local:directory:%s" % urllib.parse.quote(relpath)
ppath = Path(os.fsdecode(path))
assert ppath.is_absolute()
return ppath.as_uri()


def path_to_local_track_uri(path: Union[str, bytes, Path], media_dir: Path) -> str:
"""Convert path to local track URI."""
ppath = Path(os.fsdecode(path))
if ppath.is_absolute():
ppath = ppath.relative_to(media_dir)
quoted_path = urllib.parse.quote(bytes(ppath))
return f"local:track:{quoted_path}"
7 changes: 5 additions & 2 deletions tests/test_library.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import pathlib
import os
import unittest

Expand Down Expand Up @@ -42,7 +43,7 @@ def tearDown(self): # noqa: N802

def test_add_noname_ascii(self):
name = "Test.mp3"
uri = translator.path_to_local_track_uri(name)
uri = translator.path_to_local_track_uri(name, pathlib.Path("/media/dir"))
track = Track(name=name, uri=uri)
self.storage.begin()
self.storage.add(track)
Expand All @@ -51,7 +52,9 @@ def test_add_noname_ascii(self):

def test_add_noname_utf8(self):
name = "Mi\xf0vikudags.mp3"
uri = translator.path_to_local_track_uri(name.encode())
uri = translator.path_to_local_track_uri(
name.encode(), pathlib.Path("/media/dir")
)
track = Track(name=name, uri=uri)
self.storage.begin()
self.storage.add(track)
Expand Down
Loading

0 comments on commit 9f5872c

Please sign in to comment.