Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scanner improvements #29

Merged
merged 23 commits into from
Jan 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
3ef0a16
translator: pathlib instances works as expected
jodal Jan 1, 2020
0617bd5
translator: Use os.fsencode(), rename variables
jodal Jan 1, 2020
03c7864
scan: Rename variables
jodal Jan 1, 2020
7012eec
scan: Use pathlib instead of mopidy.internal.path
jodal Jan 1, 2020
35f9b27
scan: Use f-strings
jodal Jan 1, 2020
6e49140
scan: Remove punctuation from logging
jodal Jan 1, 2020
4d133dd
scan: Tweak log output
jodal Jan 1, 2020
376811c
scan: Split run() into four pieces
jodal Jan 1, 2020
4d896e1
translator: Remove unnused path_to_local_directory_uri()
jodal Jan 2, 2020
f0faa40
translator: Fix skipped tests
jodal Jan 2, 2020
3a98020
docs: Add note on leading dot in excluded file extensions
jodal Jan 2, 2020
4e92ccb
scan: Log file finding errors on warning level
jodal Jan 2, 2020
d309a2d
scan: Use file URIs in warnings
jodal Jan 2, 2020
7812494
docs: Update changelog
jodal Jan 2, 2020
6f0b64d
Add .cue to excluded_file_extensions
jodal Jan 2, 2020
ad1f775
storage: Log adding of track on debug level
jodal Jan 2, 2020
6c3f2ee
scan: Reduce convertions between URIs and paths
jodal Jan 2, 2020
26a81cc
translator: Skip some indirection
jodal Jan 2, 2020
4cf798f
translator: Add type hints
jodal Jan 2, 2020
7a9700f
translator: Make path_to_local_track_uri() handle absolute paths
jodal Jan 2, 2020
4334382
translator: Use f-string
jodal Jan 3, 2020
6ab1946
translator: .startswith() can take a tuple of strings
jodal Jan 3, 2020
1d7da69
translator: Remove duplicate test case
jodal Jan 3, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@ Changelog
*********


v3.1.0 (UNRELEASED)
===================

- Improve handling of paths with arbitrary encodings in the scanner. (#20, PR: #29)
- Add ``.cue`` to the list of excluded file extensions. (PR: #29)


v3.0.0 (2019-12-22)
===================

Expand Down
1 change: 1 addition & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ The following configuration values are available:

- ``local/excluded_file_extensions``: File extensions to exclude when scanning
the media directory. Values should be separated by either comma or newline.
Each file extension should start with a dot, .e.g. ``.html``.

- ``local/directories``: List of top-level directory names and URIs
for browsing.
Expand Down
186 changes: 116 additions & 70 deletions mopidy_local/commands.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import logging
import os
import pathlib
import time

from mopidy import commands, exceptions
from mopidy.audio import scan, tags
from mopidy.internal import path

from mopidy_local import mtimes, storage, translator

Expand All @@ -26,17 +25,17 @@ class ClearCommand(commands.Command):
def run(self, args, config):
library = storage.LocalStorageProvider(config)

prompt = "\nAre you sure you want to clear the library? [y/N] "
prompt = "Are you sure you want to clear the library? [y/N] "

if input(prompt).lower() != "y":
print("Clearing library aborted.")
print("Clearing library aborted")
return 0

if library.clear():
print("Library successfully cleared.")
print("Library successfully cleared")
return 0

print("Unable to clear library.")
print("Unable to clear library")
return 1


Expand All @@ -62,104 +61,154 @@ def __init__(self):
)

def run(self, args, config):
media_dir = config["local"]["media_dir"]
scan_timeout = config["local"]["scan_timeout"]
flush_threshold = config["local"]["scan_flush_threshold"]
excluded_file_extensions = config["local"]["excluded_file_extensions"]
excluded_file_extensions = tuple(
file_ext.lower() for file_ext in excluded_file_extensions
media_dir = pathlib.Path(config["local"]["media_dir"]).resolve()
library = storage.LocalStorageProvider(config)

file_mtimes = self._find_files(
media_dir=media_dir,
follow_symlinks=config["local"]["scan_follow_symlinks"],
)

library = storage.LocalStorageProvider(config)
files_to_update, files_in_library = self._check_tracks_in_library(
media_dir=media_dir,
file_mtimes=file_mtimes,
library=library,
force_rescan=args.force,
)

file_mtimes, file_errors = mtimes.find_mtimes(
media_dir, follow=config["local"]["scan_follow_symlinks"]
files_to_update.update(
self._find_files_to_scan(
media_dir=media_dir,
file_mtimes=file_mtimes,
files_in_library=files_in_library,
excluded_file_exts=[
file_ext.lower()
for file_ext in config["local"]["excluded_file_extensions"]
],
)
)

logger.info("Found %d files in media_dir.", len(file_mtimes))
self._scan_metadata(
media_dir=media_dir,
file_mtimes=file_mtimes,
files=files_to_update,
library=library,
timeout=config["local"]["scan_timeout"],
flush_threshold=config["local"]["scan_flush_threshold"],
limit=args.limit,
)

library.close()
return 0

def _find_files(self, *, media_dir, follow_symlinks):
logger.info(f"Finding files in {media_dir.as_uri()} ...")
file_mtimes, file_errors = mtimes.find_mtimes(media_dir, follow=follow_symlinks)
logger.info(f"Found {len(file_mtimes)} files in {media_dir.as_uri()}")

if file_errors:
logger.warning(
"Encountered %d errors while scanning media_dir.", len(file_errors)
f"Encountered {len(file_errors)} errors "
f"while finding files in {media_dir.as_uri()}"
)
for name in file_errors:
logger.debug("Scan error %r for %r", file_errors[name], name)
for path in file_errors:
logger.warning(f"Error for {path.as_uri()}: {file_errors[path]}")

return file_mtimes

def _check_tracks_in_library(
self, *, media_dir, file_mtimes, library, force_rescan
):
num_tracks = library.load()
logger.info("Checking %d tracks from library.", num_tracks)
logger.info(f"Checking {num_tracks} tracks from library")

uris_to_update = set()
uris_to_remove = set()
uris_in_library = set()
files_to_update = set()
files_in_library = set()

for track in library.begin():
abspath = translator.local_uri_to_path(track.uri, media_dir)
mtime = file_mtimes.get(abspath)
absolute_path = translator.local_uri_to_path(track.uri, media_dir)
mtime = file_mtimes.get(absolute_path)
if mtime is None:
logger.debug("Missing file %s", track.uri)
logger.debug(f"Removing {track.uri}: File not found")
uris_to_remove.add(track.uri)
elif mtime > track.last_modified or args.force:
uris_to_update.add(track.uri)
uris_in_library.add(track.uri)

logger.info("Removing %d missing tracks.", len(uris_to_remove))
for uri in uris_to_remove:
library.remove(uri)

for abspath in file_mtimes:
relpath = os.path.relpath(abspath, media_dir)
uri = translator.path_to_local_track_uri(relpath)

if "/." in relpath or relpath.startswith("."):
logger.debug("Skipped %s: Hidden directory/file.", uri)
elif relpath.lower().endswith(excluded_file_extensions):
logger.debug("Skipped %s: File extension excluded.", uri)
elif uri not in uris_in_library:
uris_to_update.add(uri)

logger.info("Found %d tracks which need to be updated.", len(uris_to_update))
elif mtime > track.last_modified or force_rescan:
files_to_update.add(absolute_path)
files_in_library.add(absolute_path)

logger.info(f"Removing {len(uris_to_remove)} missing tracks")
for local_uri in uris_to_remove:
library.remove(local_uri)

return files_to_update, files_in_library

def _find_files_to_scan(
self, *, media_dir, file_mtimes, files_in_library, excluded_file_exts
):
files_to_update = set()

for absolute_path in file_mtimes:
relative_path = absolute_path.relative_to(media_dir)
file_uri = absolute_path.as_uri()

if any(p.startswith(".") for p in relative_path.parts):
logger.debug(f"Skipped {file_uri}: Hidden directory/file")
elif relative_path.suffix.lower() in excluded_file_exts:
logger.debug(f"Skipped {file_uri}: File extension excluded")
elif absolute_path not in files_in_library:
files_to_update.add(absolute_path)

logger.info(f"Found {len(files_to_update)} tracks which need to be updated")
return files_to_update

def _scan_metadata(
self, *, media_dir, file_mtimes, files, library, timeout, flush_threshold, limit
):
logger.info("Scanning...")

uris_to_update = sorted(uris_to_update, key=lambda v: v.lower())
uris_to_update = uris_to_update[: args.limit]
files = sorted(files)[:limit]

scanner = scan.Scanner(scan_timeout)
progress = _Progress(flush_threshold, len(uris_to_update))
scanner = scan.Scanner(timeout)
progress = _ScanProgress(batch_size=flush_threshold, total=len(files))

for uri in uris_to_update:
for absolute_path in files:
try:
relpath = translator.local_uri_to_path(uri, media_dir)
file_uri = path.path_to_uri(media_dir / relpath)
file_uri = absolute_path.as_uri()
result = scanner.scan(file_uri)

if not result.playable:
logger.warning("Failed %s: No audio found in file.", uri)
logger.warning(
f"Failed scanning {file_uri}: No audio found in file"
)
elif result.duration < MIN_DURATION_MS:
logger.warning(
"Failed %s: Track shorter than %dms", uri, MIN_DURATION_MS
f"Failed scanning {file_uri}: "
f"Track shorter than {MIN_DURATION_MS}ms"
)
else:
mtime = file_mtimes.get(media_dir / relpath)
local_uri = translator.path_to_local_track_uri(
absolute_path, media_dir
)
mtime = file_mtimes.get(absolute_path)
track = tags.convert_tags_to_track(result.tags).replace(
uri=uri, length=result.duration, last_modified=mtime
uri=local_uri, length=result.duration, last_modified=mtime
)
library.add(track, result.tags, result.duration)
logger.debug("Added %s", track.uri)
logger.debug(f"Added {track.uri}")
except exceptions.ScannerError as error:
logger.warning("Failed %s: %s", uri, error)
logger.warning(f"Failed scanning {file_uri}: {error}")

if progress.increment():
progress.log()
if library.flush():
logger.debug("Progress flushed.")
logger.debug("Progress flushed")

progress.log()
library.close()
logger.info("Done scanning.")
return 0
logger.info("Done scanning")


class _Progress:
def __init__(self, batch_size, total):
class _ScanProgress:
def __init__(self, *, batch_size, total):
self.count = 0
self.batch_size = batch_size
self.total = total
Expand All @@ -173,14 +222,11 @@ def log(self):
duration = time.time() - self.start
if self.count >= self.total or not self.count:
logger.info(
"Scanned %d of %d files in %ds.", self.count, self.total, duration
f"Scanned {self.count} of {self.total} files in {duration:.3f}s."
)
else:
remainder = duration / self.count * (self.total - self.count)
logger.info(
"Scanned %d of %d files in %ds, ~%ds left.",
self.count,
self.total,
duration,
remainder,
f"Scanned {self.count} of {self.total} files "
f"in {duration:.3f}s, ~{remainder:.0f}s left"
)
1 change: 1 addition & 0 deletions mopidy_local/ext.conf
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ scan_timeout = 1000
scan_flush_threshold = 100
scan_follow_symlinks = false
excluded_file_extensions =
.cue
.directory
.html
.jpeg
Expand Down
2 changes: 1 addition & 1 deletion mopidy_local/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def begin(self):
return schema.tracks(self._connect())

def add(self, track, tags=None, duration=None):
logger.info("Adding track: %s", track)
logger.debug("Adding track: %s", track)
images = None
if track.album and track.album.name: # FIXME: album required
uri = translator.local_uri_to_file_uri(track.uri, self._media_dir)
Expand Down
50 changes: 25 additions & 25 deletions mopidy_local/translator.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,41 @@
from __future__ import annotations

import logging
import os
import pathlib
import urllib
from pathlib import Path
from typing import Union

logger = logging.getLogger(__name__)


def local_uri_to_file_uri(uri, media_dir):
def local_uri_to_file_uri(local_uri: str, media_dir: Path) -> str:
"""Convert local track or directory URI to file URI."""
return path_to_file_uri(local_uri_to_path(uri, media_dir))
path = local_uri_to_path(local_uri, media_dir)
return path.as_uri()


def local_uri_to_path(uri, media_dir):
def local_uri_to_path(local_uri: str, media_dir: Path) -> Path:
"""Convert local track or directory URI to absolute path."""
if not uri.startswith("local:directory:") and not uri.startswith("local:track:"):
if not local_uri.startswith(("local:directory:", "local:track:")):
raise ValueError("Invalid URI.")
uri_path = uri.split(":", 2)[2]
file_bytes = urllib.parse.unquote_to_bytes(urllib.parse.urlsplit(uri_path).path)
file_path = pathlib.Path(os.fsdecode(file_bytes))
uri_path = urllib.parse.urlsplit(local_uri.split(":", 2)[2]).path
file_bytes = urllib.parse.unquote_to_bytes(uri_path)
file_path = Path(os.fsdecode(file_bytes))
return media_dir / file_path


def path_to_file_uri(path):
def path_to_file_uri(path: Union[str, bytes, Path]) -> str:
"""Convert absolute path to file URI."""
return pathlib.Path(os.fsdecode(path)).as_uri()


def path_to_local_track_uri(relpath):
"""Convert path relative to :confval:`local/media_dir` to local track
URI."""
if isinstance(relpath, str):
relpath = relpath.encode()
return "local:track:%s" % urllib.parse.quote(relpath)


def path_to_local_directory_uri(relpath):
"""Convert path relative to :confval:`local/media_dir` to directory URI."""
if isinstance(relpath, str):
relpath = relpath.encode()
return "local:directory:%s" % urllib.parse.quote(relpath)
ppath = Path(os.fsdecode(path))
assert ppath.is_absolute()
return ppath.as_uri()


def path_to_local_track_uri(path: Union[str, bytes, Path], media_dir: Path) -> str:
"""Convert path to local track URI."""
ppath = Path(os.fsdecode(path))
if ppath.is_absolute():
ppath = ppath.relative_to(media_dir)
quoted_path = urllib.parse.quote(bytes(ppath))
return f"local:track:{quoted_path}"
7 changes: 5 additions & 2 deletions tests/test_library.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import pathlib
import os
import unittest

Expand Down Expand Up @@ -42,7 +43,7 @@ def tearDown(self): # noqa: N802

def test_add_noname_ascii(self):
name = "Test.mp3"
uri = translator.path_to_local_track_uri(name)
uri = translator.path_to_local_track_uri(name, pathlib.Path("/media/dir"))
track = Track(name=name, uri=uri)
self.storage.begin()
self.storage.add(track)
Expand All @@ -51,7 +52,9 @@ def test_add_noname_ascii(self):

def test_add_noname_utf8(self):
name = "Mi\xf0vikudags.mp3"
uri = translator.path_to_local_track_uri(name.encode())
uri = translator.path_to_local_track_uri(
name.encode(), pathlib.Path("/media/dir")
)
track = Track(name=name, uri=uri)
self.storage.begin()
self.storage.add(track)
Expand Down
Loading