|
2 | 2 | import logging
|
3 | 3 | import os
|
4 | 4 | import re
|
5 |
| -from collections import OrderedDict |
| 5 | +from collections import OrderedDict, defaultdict |
6 | 6 | from html import escape
|
7 | 7 | from html.parser import HTMLParser
|
8 | 8 | from io import StringIO
|
| 9 | +from functools import reduce |
| 10 | +import operator |
9 | 11 |
|
10 | 12 | import docutils
|
11 | 13 | import docutils.core
|
@@ -496,8 +498,8 @@ class Readers(FileStampDataCacher):
|
496 | 498 |
|
497 | 499 | def __init__(self, settings=None, cache_name=''):
|
498 | 500 | self.settings = settings or {}
|
499 |
| - self.readers = {} |
500 |
| - self.reader_classes = {} |
| 501 | + self.readers = ReaderTree() |
| 502 | + self.reader_classes = ReaderTree() |
501 | 503 |
|
502 | 504 | for cls in [BaseReader] + BaseReader.__subclasses__():
|
503 | 505 | if not cls.enabled:
|
@@ -542,8 +544,7 @@ def read_file(self, base_path, path, content_class=Page, fmt=None,
|
542 | 544 | source_path, content_class.__name__)
|
543 | 545 |
|
544 | 546 | if not fmt:
|
545 |
| - _, ext = os.path.splitext(os.path.basename(path)) |
546 |
| - fmt = ext[1:] |
| 547 | + fmt = self.readers.get_format(path) |
547 | 548 |
|
548 | 549 | if fmt not in self.readers:
|
549 | 550 | raise TypeError(
|
@@ -746,3 +747,152 @@ def parse_path_metadata(source_path, settings=None, process=None):
|
746 | 747 | v = process(k, v)
|
747 | 748 | metadata[k] = v
|
748 | 749 | return metadata
|
| 750 | + |
| 751 | + |
| 752 | +class ReaderTree(): |
| 753 | + |
| 754 | + def __init__(self): |
| 755 | + self.tree_dd = ReaderTree._rec_dd() |
| 756 | + |
| 757 | + def __str__(self): |
| 758 | + return str(ReaderTree._rec_dd_to_dict(self.tree_dd)) |
| 759 | + |
| 760 | + def __iter__(self): |
| 761 | + for key in ReaderTree._rec_get_next_key(self.tree_dd): |
| 762 | + yield key |
| 763 | + |
| 764 | + def __setitem__(self, key, value): |
| 765 | + components = reversed(key.split('.')) |
| 766 | + reduce(operator.getitem, components, self.tree_dd)[''] = value |
| 767 | + |
| 768 | + def __getitem__(self, key): |
| 769 | + components = reversed(key.split('.')) |
| 770 | + value = reduce(operator.getitem, components, self.tree_dd) |
| 771 | + if value: |
| 772 | + return value[''] |
| 773 | + else: |
| 774 | + raise KeyError |
| 775 | + |
| 776 | + def __delitem__(self, key): |
| 777 | + value = ReaderTree._rec_del_item(self.tree_dd, key) |
| 778 | + if not value: |
| 779 | + raise KeyError |
| 780 | + |
| 781 | + def __contains__(self, item): |
| 782 | + try: |
| 783 | + self[item] |
| 784 | + return True |
| 785 | + except KeyError: |
| 786 | + return False |
| 787 | + |
| 788 | + def __len__(self): |
| 789 | + return len(list(self.keys())) |
| 790 | + |
| 791 | + def keys(self): |
| 792 | + return self.__iter__() |
| 793 | + |
| 794 | + def values(self): |
| 795 | + for value in ReaderTree._rec_get_next_value(self.tree_dd): |
| 796 | + yield value |
| 797 | + |
| 798 | + def items(self): |
| 799 | + return zip(self.keys(), self.values()) |
| 800 | + |
| 801 | + def get(self, key): |
| 802 | + return self[key] |
| 803 | + |
| 804 | + def setdefault(self, key, value): |
| 805 | + if key in self: |
| 806 | + return self[key] |
| 807 | + else: |
| 808 | + self[key] = value |
| 809 | + return value |
| 810 | + |
| 811 | + def clear(self): |
| 812 | + self.tree_dd.clear() |
| 813 | + |
| 814 | + def pop(self, key, default=None): |
| 815 | + if key in self: |
| 816 | + value = self[key] |
| 817 | + del self[key] |
| 818 | + return value |
| 819 | + elif default: |
| 820 | + return default |
| 821 | + else: |
| 822 | + raise KeyError |
| 823 | + |
| 824 | + def copy(self): |
| 825 | + return self.tree_dd.copy() |
| 826 | + |
| 827 | + def update(self, d): |
| 828 | + for key, value in d.items(): |
| 829 | + self[key] = value |
| 830 | + |
| 831 | + def get_format(self, filename): |
| 832 | + ext = ReaderTree._rec_get_fmt_from_filename(self.tree_dd, filename) |
| 833 | + return ext[1:] |
| 834 | + |
| 835 | + def as_dict(self): |
| 836 | + return ReaderTree._rec_dd_to_dict(self.tree_dd) |
| 837 | + |
| 838 | + @staticmethod |
| 839 | + def _rec_dd(): |
| 840 | + return defaultdict(ReaderTree._rec_dd) |
| 841 | + |
| 842 | + @staticmethod |
| 843 | + def _rec_dd_to_dict(dd): |
| 844 | + d = dict(dd) |
| 845 | + |
| 846 | + for key, value in d.items(): |
| 847 | + if type(value) == defaultdict: |
| 848 | + d[key] = ReaderTree._rec_dd_to_dict(value) |
| 849 | + |
| 850 | + return d |
| 851 | + |
| 852 | + @staticmethod |
| 853 | + def _rec_get_next_key(d): |
| 854 | + for key in d: |
| 855 | + if key != '': |
| 856 | + if '' in d[key]: |
| 857 | + yield key |
| 858 | + if type(d[key]) == defaultdict: |
| 859 | + for component in ReaderTree._rec_get_next_key(d[key]): |
| 860 | + yield '.'.join([component, key]) |
| 861 | + |
| 862 | + @staticmethod |
| 863 | + def _rec_get_next_value(d): |
| 864 | + for key, value in d.items(): |
| 865 | + if key == '': |
| 866 | + yield value |
| 867 | + else: |
| 868 | + if type(d[key]) == defaultdict: |
| 869 | + yield from ReaderTree._rec_get_next_value(d[key]) |
| 870 | + |
| 871 | + @staticmethod |
| 872 | + def _rec_del_item(d, intended_key): |
| 873 | + if intended_key in d: |
| 874 | + value = d[intended_key][''] |
| 875 | + del d[intended_key][''] |
| 876 | + return value |
| 877 | + else: |
| 878 | + for key in d: |
| 879 | + if type(d[key]) == defaultdict: |
| 880 | + ReaderTree._rec_del_item(d[key], intended_key) |
| 881 | + |
| 882 | + return None |
| 883 | + |
| 884 | + @staticmethod |
| 885 | + def _rec_get_fmt_from_filename(d, filename): |
| 886 | + if '.' in filename: |
| 887 | + file, ext = os.path.splitext(filename) |
| 888 | + fmt = ext[1:] |
| 889 | + |
| 890 | + if fmt in d: |
| 891 | + next_component = ReaderTree._rec_get_fmt_from_filename(d[fmt], file) |
| 892 | + return '.'.join([next_component, fmt]) |
| 893 | + elif '' in d: |
| 894 | + return fmt |
| 895 | + else: |
| 896 | + raise TypeError("No valid extension found") |
| 897 | + else: |
| 898 | + return '' |
0 commit comments