Skip to content

Commit 208780e

Browse files
committed
Add support for compound file extensions.
Created a new class ReaderTree that is an infinitely nested defaultdict containing components of the extension. See comments on PR getpelican#2816.
1 parent 1b87ef6 commit 208780e

File tree

4 files changed

+261
-8
lines changed

4 files changed

+261
-8
lines changed

Diff for: pelican/generators.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -123,10 +123,13 @@ def _include_path(self, path, extensions=None):
123123
if any(fnmatch.fnmatch(basename, ignore) for ignore in ignores):
124124
return False
125125

126-
ext = os.path.splitext(basename)[1][1:]
127-
if extensions is False or ext in extensions:
126+
if extensions is False:
128127
return True
129128

129+
for ext in extensions:
130+
if basename.endswith(f'.{ext}'):
131+
return True
132+
130133
return False
131134

132135
def get_files(self, paths, exclude=[], extensions=None):

Diff for: pelican/readers.py

+162-5
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
import datetime
22
import logging
3+
import operator
34
import os
45
import re
5-
from collections import OrderedDict
6+
from collections import OrderedDict, defaultdict
7+
from functools import reduce
68
from html import escape
79
from html.parser import HTMLParser
810
from io import StringIO
@@ -496,8 +498,8 @@ class Readers(FileStampDataCacher):
496498

497499
def __init__(self, settings=None, cache_name=''):
498500
self.settings = settings or {}
499-
self.readers = {}
500-
self.reader_classes = {}
501+
self.readers = ReaderTree()
502+
self.reader_classes = ReaderTree()
501503

502504
for cls in [BaseReader] + BaseReader.__subclasses__():
503505
if not cls.enabled:
@@ -542,8 +544,7 @@ def read_file(self, base_path, path, content_class=Page, fmt=None,
542544
source_path, content_class.__name__)
543545

544546
if not fmt:
545-
_, ext = os.path.splitext(os.path.basename(path))
546-
fmt = ext[1:]
547+
fmt = self.readers.get_format(path)
547548

548549
if fmt not in self.readers:
549550
raise TypeError(
@@ -746,3 +747,159 @@ def parse_path_metadata(source_path, settings=None, process=None):
746747
v = process(k, v)
747748
metadata[k] = v
748749
return metadata
750+
751+
752+
class ReaderTree():
753+
754+
def __init__(self):
755+
self.tree_dd = ReaderTree._rec_dd()
756+
757+
def __str__(self):
758+
return str(ReaderTree._rec_dd_to_dict(self.tree_dd))
759+
760+
def __iter__(self):
761+
for key in ReaderTree._rec_get_next_key(self.tree_dd):
762+
yield key
763+
764+
def __setitem__(self, key, value):
765+
components = reversed(key.split('.'))
766+
reduce(operator.getitem, components, self.tree_dd)[''] = value
767+
768+
def __getitem__(self, key):
769+
components = reversed(key.split('.'))
770+
value = reduce(operator.getitem, components, self.tree_dd)
771+
if value:
772+
return value['']
773+
else:
774+
raise KeyError
775+
776+
def __delitem__(self, key):
777+
value = ReaderTree._rec_del_item(self.tree_dd, key)
778+
if not value:
779+
raise KeyError
780+
781+
def __contains__(self, item):
782+
try:
783+
self[item]
784+
return True
785+
except KeyError:
786+
return False
787+
788+
def __len__(self):
789+
return len(list(self.keys()))
790+
791+
def keys(self):
792+
return self.__iter__()
793+
794+
def values(self):
795+
for value in ReaderTree._rec_get_next_value(self.tree_dd):
796+
yield value
797+
798+
def items(self):
799+
return zip(self.keys(), self.values())
800+
801+
def get(self, key):
802+
return self[key]
803+
804+
def setdefault(self, key, value):
805+
if key in self:
806+
return self[key]
807+
else:
808+
self[key] = value
809+
return value
810+
811+
def clear(self):
812+
self.tree_dd.clear()
813+
814+
def pop(self, key, default=None):
815+
if key in self:
816+
value = self[key]
817+
del self[key]
818+
return value
819+
elif default:
820+
return default
821+
else:
822+
raise KeyError
823+
824+
def copy(self):
825+
return self.tree_dd.copy()
826+
827+
def update(self, d):
828+
for key, value in d.items():
829+
self[key] = value
830+
831+
def get_format(self, filename):
832+
try:
833+
ext = ReaderTree._rec_get_fmt_from_filename(self.tree_dd, filename)
834+
return ext[1:]
835+
except TypeError:
836+
return ''
837+
838+
def has_reader(self, filename):
839+
fmt = self.get_format(filename)
840+
return fmt in self
841+
842+
def as_dict(self):
843+
return ReaderTree._rec_dd_to_dict(self.tree_dd)
844+
845+
@staticmethod
846+
def _rec_dd():
847+
return defaultdict(ReaderTree._rec_dd)
848+
849+
@staticmethod
850+
def _rec_dd_to_dict(dd):
851+
d = dict(dd)
852+
853+
for key, value in d.items():
854+
if type(value) == defaultdict:
855+
d[key] = ReaderTree._rec_dd_to_dict(value)
856+
857+
return d
858+
859+
@staticmethod
860+
def _rec_get_next_key(d):
861+
for key in d:
862+
if key != '':
863+
if '' in d[key]:
864+
yield key
865+
if type(d[key]) == defaultdict:
866+
for component in ReaderTree._rec_get_next_key(d[key]):
867+
yield '.'.join([component, key])
868+
869+
@staticmethod
870+
def _rec_get_next_value(d):
871+
for key, value in d.items():
872+
if key == '':
873+
yield value
874+
else:
875+
if type(d[key]) == defaultdict:
876+
yield from ReaderTree._rec_get_next_value(d[key])
877+
878+
@staticmethod
879+
def _rec_del_item(d, intended_key):
880+
if intended_key in d:
881+
value = d[intended_key]['']
882+
del d[intended_key]['']
883+
return value
884+
else:
885+
for key in d:
886+
if type(d[key]) == defaultdict:
887+
ReaderTree._rec_del_item(d[key], intended_key)
888+
889+
return None
890+
891+
@staticmethod
892+
def _rec_get_fmt_from_filename(d, filename):
893+
if '.' in filename:
894+
file, ext = os.path.splitext(filename)
895+
fmt = ext[1:] if ext else ext
896+
897+
if fmt in d:
898+
next_component = ReaderTree._rec_get_fmt_from_filename(d[fmt], file)
899+
return '.'.join([next_component, fmt])
900+
elif '' in d:
901+
return ''
902+
else:
903+
raise TypeError('No reader found for file.')
904+
else:
905+
return ''

Diff for: pelican/tests/test_generators.py

+3
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ def test_include_path(self):
4141
ignored_file = os.path.join(CUR_DIR, 'content', 'ignored1.rst')
4242
self.assertFalse(include_path(ignored_file))
4343

44+
compound_file = os.path.join(CUR_DIR, 'content', 'compound.md.html')
45+
self.assertTrue(include_path(compound_file, extensions=('md.html',)))
46+
4447
def test_get_files_exclude(self):
4548
"""Test that Generator.get_files() properly excludes directories.
4649
"""

Diff for: pelican/tests/test_readers.py

+91-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import os
2-
from unittest.mock import patch
2+
from unittest.mock import Mock, patch
33

44
from pelican import readers
55
from pelican.tests.support import get_settings, unittest
@@ -76,6 +76,18 @@ def test_readfile_unknown_extension(self):
7676
with self.assertRaises(TypeError):
7777
self.read_file(path='article_with_metadata.unknownextension')
7878

79+
with self.assertRaises(TypeError):
80+
self.read_file(path='article_with.compound.extension')
81+
82+
def test_readfile_compound_extension(self):
83+
CompoundReader = Mock()
84+
85+
# throws type error b/c of mock
86+
with self.assertRaises(TypeError):
87+
self.read_file(path='article_with.compound.extension',
88+
READERS={'compound.extension': CompoundReader})
89+
CompoundReader.read.assert_called_with('article_with.compound.extension')
90+
7991
def test_readfile_path_metadata_implicit_dates(self):
8092
test_file = 'article_with_metadata_implicit_dates.html'
8193
page = self.read_file(path=test_file, DEFAULT_DATE='fs')
@@ -918,3 +930,81 @@ def test_article_with_inline_svg(self):
918930
'title': 'Article with an inline SVG',
919931
}
920932
self.assertDictHasSubset(page.metadata, expected)
933+
934+
935+
class ReaderTreeTest(unittest.TestCase):
936+
937+
def setUp(self):
938+
939+
readers_and_exts = {
940+
'BaseReader': ['static'],
941+
'RstReader': ['rst'],
942+
'HtmlReader': ['htm', 'html'],
943+
'MDReader': ['md', 'mk', 'mkdown', 'mkd'],
944+
'MDeepReader': ['md.html'],
945+
'FooReader': ['foo.bar.baz.yaz']
946+
}
947+
948+
self.reader_classes = readers.ReaderTree()
949+
950+
for reader, exts in readers_and_exts.items():
951+
for ext in exts:
952+
self.reader_classes[ext] = reader
953+
954+
def test_correct_mapping_generated(self):
955+
expected_mapping = {
956+
'static': {'': 'BaseReader'},
957+
'rst': {'': 'RstReader'},
958+
'htm': {'': 'HtmlReader'},
959+
'html': {
960+
'': 'HtmlReader',
961+
'md': {'': 'MDeepReader'}
962+
},
963+
'md': {'': 'MDReader'},
964+
'mk': {'': 'MDReader'},
965+
'mkdown': {'': 'MDReader'},
966+
'mkd': {'': 'MDReader'},
967+
'yaz': {
968+
'baz': {
969+
'bar': {
970+
'foo': {'': 'FooReader'}}}}}
971+
972+
self.assertEqual(expected_mapping, self.reader_classes.as_dict())
973+
974+
def test_containment(self):
975+
self.assertTrue('md.html' in self.reader_classes)
976+
self.assertTrue('html' in self.reader_classes)
977+
self.assertFalse('txt' in self.reader_classes)
978+
979+
def test_deletion(self):
980+
self.assertTrue('rst' in self.reader_classes)
981+
del self.reader_classes['rst']
982+
self.assertFalse('rst' in self.reader_classes)
983+
984+
def test_update(self):
985+
self.reader_classes.update({
986+
'new.ext': 'NewExtReader',
987+
'txt': 'TxtReader'
988+
})
989+
self.assertEqual(self.reader_classes['new.ext'], 'NewExtReader')
990+
self.assertEqual(self.reader_classes['txt'], 'TxtReader')
991+
992+
def test_get_format(self):
993+
html_ext = self.reader_classes.get_format('text.html')
994+
md_ext = self.reader_classes.get_format('another.md')
995+
compound_ext = self.reader_classes.get_format('dots.compound.md.html')
996+
no_ext = self.reader_classes.get_format('no_extension')
997+
bar_ext = self.reader_classes.get_format('file.bar')
998+
999+
self.assertEqual(html_ext, 'html')
1000+
self.assertEqual(md_ext, 'md')
1001+
self.assertEqual(compound_ext, 'md.html')
1002+
self.assertEqual(no_ext, '')
1003+
self.assertEqual(bar_ext, '')
1004+
1005+
def test_has_reader(self):
1006+
has_reader = self.reader_classes.has_reader
1007+
self.assertTrue(has_reader('text.html'))
1008+
self.assertFalse(has_reader('no_ext'))
1009+
print(has_reader('bad_ext.bar'))
1010+
self.assertFalse(has_reader('bad_ext.bar'))

0 commit comments

Comments
 (0)