NREL
diff --git a/‎docs/source/dev/ords_architecture.rst
+1-1 b/‎docs/source/dev/ords_architecture.rst
+1-1
diff --git a/‎elm/ords/download.py
+2-2 b/‎elm/ords/download.py
+2-2
diff --git a/‎elm/ords/extraction/apply.py
+18-18 b/‎elm/ords/extraction/apply.py
+18-18
diff --git a/‎elm/ords/process.py
+12-12 b/‎elm/ords/process.py
+12-12
diff --git a/‎elm/ords/services/threaded.py
+6-6 b/‎elm/ords/services/threaded.py
+6-6
diff --git a/‎elm/ords/validation/location.py
+2-2 b/‎elm/ords/validation/location.py
+2-2
diff --git a/‎elm/version.py
+1-1 b/‎elm/version.py
+1-1
diff --git a/‎elm/web/document.py
+28-10 b/‎elm/web/document.py
+28-10
diff --git a/‎elm/web/file_loader.py
+2-2 b/‎elm/web/file_loader.py
+2-2
@@ -407,7 +407,7 @@ for multiprocessing tasks.
 
     content = ...
     doc = HTMLDocument([content])
-    doc.text, doc.raw_pages, doc.metadata
+    doc.text, doc.raw_pages, doc.attrs
 
 --------------------------------------------------------------------------------------------------------------------------------------------------
 
 
@@ -136,7 +136,7 @@ async def _down_select_docs_correct_content(docs, location, **kwargs):
 async def _contains_ords(doc, **kwargs):
     """Helper coroutine that checks for ordinance info. """
     doc = await check_for_ordinance_info(doc, **kwargs)
-    return doc.metadata.get("contains_ord_info", False)
+    return doc.attrs.get("contains_ord_info", False)
 
 
 def _sort_final_ord_docs(all_ord_docs):
@@ -149,5 +149,5 @@ def _sort_final_ord_docs(all_ord_docs):
 
 def _ord_doc_sorting_key(doc):
     """All text sorting key"""
-    year, month, day = doc.metadata.get("date", (-1, -1, -1))
+    year, month, day = doc.attrs.get("date", (-1, -1, -1))
     return year, isinstance(doc, PDFDocument), -1 * len(doc.text), month, day
@@ -49,16 +49,16 @@ async def check_for_ordinance_info(doc, text_splitter, **kwargs):
         snippet. Note that the snippet may contain other info as well,
         but should encapsulate all of the ordinance text.
     """
-    if "contains_ord_info" in doc.metadata:
+    if "contains_ord_info" in doc.attrs:
         return doc
 
     llm_caller = StructuredLLMCaller(**kwargs)
     chunks = text_splitter.split_text(doc.text)
     validator = OrdinanceValidator(llm_caller, chunks)
-    doc.metadata["contains_ord_info"] = await validator.parse()
-    if doc.metadata["contains_ord_info"]:
-        doc.metadata["date"] = await DateExtractor(llm_caller).parse(doc)
-        doc.metadata["ordinance_text"] = validator.ordinance_text
+    doc.attrs["contains_ord_info"] = await validator.parse()
+    if doc.attrs["contains_ord_info"]:
+        doc.attrs["date"] = await DateExtractor(llm_caller).parse(doc)
+        doc.attrs["ordinance_text"] = validator.ordinance_text
 
     return doc
 
@@ -90,18 +90,18 @@ async def extract_ordinance_text_with_llm(doc, text_splitter, extractor):
     -------
     elm.web.document.BaseDocument
         Document that has been parsed for ordinance text. The results of
-        the extraction are stored in the document's metadata. In
-        particular, the metadata will contain a
+        the extraction are stored in the document's metadata (attrs). In
+        particular, the metadata (attrs) will contain a
         ``"cleaned_ordinance_text"`` key that will contain the cleaned
         ordinance text.
     """
-    text_chunks = text_splitter.split_text(doc.metadata["ordinance_text"])
+    text_chunks = text_splitter.split_text(doc.attrs["ordinance_text"])
     ordinance_text = await extractor.check_for_restrictions(text_chunks)
-    doc.metadata["restrictions_ordinance_text"] = ordinance_text
+    doc.attrs["restrictions_ordinance_text"] = ordinance_text
 
     text_chunks = text_splitter.split_text(ordinance_text)
     ordinance_text = await extractor.check_for_correct_size(text_chunks)
-    doc.metadata["cleaned_ordinance_text"] = ordinance_text
+    doc.attrs["cleaned_ordinance_text"] = ordinance_text
 
     return doc
 
@@ -167,7 +167,7 @@ async def extract_ordinance_text_with_ngram_validation(
         ``"cleaned_ordinance_text"`` key that will contain the cleaned
         ordinance text.
     """
-    if not doc.metadata.get("ordinance_text"):
+    if not doc.attrs.get("ordinance_text"):
         msg = (
             "Input document has no 'ordinance_text' key or string does not "
             "contain information. Please run `check_for_ordinance_info` "
@@ -203,8 +203,8 @@ async def _extract_with_ngram_check(
     """Extract ordinance info from doc and validate using ngrams."""
     from elm.ords.extraction.ngrams import sentence_ngram_containment
 
-    source = doc.metadata.get("source", "Unknown")
-    og_text = doc.metadata["ordinance_text"]
+    source = doc.attrs.get("source", "Unknown")
+    og_text = doc.attrs["ordinance_text"]
     if not og_text:
         msg = (
             "Document missing original ordinance text! No extraction "
@@ -221,7 +221,7 @@ async def _extract_with_ngram_check(
         doc = await extract_ordinance_text_with_llm(
             doc, text_splitter, extractor
         )
-        cleaned_text = doc.metadata["cleaned_ordinance_text"]
+        cleaned_text = doc.attrs["cleaned_ordinance_text"]
         if not cleaned_text:
             logger.debug(
                 "No cleaned text found after extraction on attempt %d "
@@ -256,7 +256,7 @@ async def _extract_with_ngram_check(
             source,
         )
     else:
-        doc.metadata["cleaned_ordinance_text"] = best_summary
+        doc.attrs["cleaned_ordinance_text"] = best_summary
         msg = (
             f"Ngram check failed after {num_tries}. LLM hallucination in "
             "cleaned ordinance text is extremely likely! Proceed with "
@@ -294,7 +294,7 @@ async def extract_ordinance_values(doc, **kwargs):
         particular, the metadata will contain an ``"ordinance_values"``
         key that will contain the DataFame with ordinance values.
     """
-    if not doc.metadata.get("cleaned_ordinance_text"):
+    if not doc.attrs.get("cleaned_ordinance_text"):
         msg = (
             "Input document has no 'cleaned_ordinance_text' key or string "
             "does not contain info. Please run "
@@ -305,6 +305,6 @@ async def extract_ordinance_values(doc, **kwargs):
         return doc
 
     parser = StructuredOrdinanceParser(**kwargs)
-    text = doc.metadata["cleaned_ordinance_text"]
-    doc.metadata["ordinance_values"] = await parser.parse(text)
+    text = doc.attrs["cleaned_ordinance_text"]
+    doc.attrs["ordinance_values"] = await parser.parse(text)
     return doc
@@ -530,8 +530,8 @@ async def process_county(
         await _record_time_and_usage(start_time, **kwargs)
         return None
 
-    doc.metadata["location"] = county
-    doc.metadata["location_name"] = county.full_name
+    doc.attrs["location"] = county
+    doc.attrs["location_name"] = county.full_name
     await _record_usage(**kwargs)
 
     doc = await extract_ordinance_text_with_ngram_validation(
@@ -550,7 +550,7 @@ async def process_county(
             "%d ordinance value(s) found for %s. Outputs are here: '%s'",
             ord_count,
             county.full_name,
-            doc.metadata["ord_db_fp"],
+            doc.attrs["ord_db_fp"],
         )
     else:
         logger.info("No ordinances found for %s.", county.full_name)
@@ -579,21 +579,21 @@ async def _record_time_and_usage(start_time, **kwargs):
 async def _move_file_to_out_dir(doc):
     """Move PDF or HTML text file to output directory."""
     out_fp = await FileMover.call(doc)
-    doc.metadata["out_fp"] = out_fp
+    doc.attrs["out_fp"] = out_fp
     return doc
 
 
 async def _write_cleaned_text(doc):
     """Write cleaned text to `clean_dir`."""
     out_fp = await CleanedFileWriter.call(doc)
-    doc.metadata["cleaned_fp"] = out_fp
+    doc.attrs["cleaned_fp"] = out_fp
     return doc
 
 
 async def _write_ord_db(doc):
     """Write cleaned text to `county_dbs_dir`."""
     out_fp = await OrdDBFileWriter.call(doc)
-    doc.metadata["ord_db_fp"] = out_fp
+    doc.attrs["ord_db_fp"] = out_fp
     return doc
 
 
@@ -628,10 +628,10 @@ def _num_ords_in_doc(doc):
     if doc is None:
         return 0
 
-    if "ordinance_values" not in doc.metadata:
+    if "ordinance_values" not in doc.attrs:
         return 0
 
-    ord_vals = doc.metadata["ordinance_values"]
+    ord_vals = doc.attrs["ordinance_values"]
     if ord_vals.empty:
         return 0
 
@@ -666,16 +666,16 @@ def _docs_to_db(docs):
 
 def _db_results(doc):
     """Extract results from doc metadata to DataFrame."""
-    results = doc.metadata.get("ordinance_values")
+    results = doc.attrs.get("ordinance_values")
     if results is None:
         return None
 
-    results["source"] = doc.metadata.get("source")
-    year = doc.metadata.get("date", (None, None, None))[0]
+    results["source"] = doc.attrs.get("source")
+    year = doc.attrs.get("date", (None, None, None))[0]
     results["ord_year"] = year if year is not None and year > 0 else None
     results["last_updated"] = datetime.now().strftime("%m/%d/%Y")
 
-    location = doc.metadata["location"]
+    location = doc.attrs["location"]
     results["FIPS"] = location.fips
     results["county"] = location.name
     results["state"] = location.state
 
@@ -16,12 +16,12 @@
 
 def _move_file(doc, out_dir):
     """Move a file from a temp directory to an output directory."""
-    cached_fp = doc.metadata.get("cache_fn")
+    cached_fp = doc.attrs.get("cache_fn")
     if cached_fp is None:
         return
 
     cached_fp = Path(cached_fp)
-    out_fn = doc.metadata.get("location_name", cached_fp.name)
+    out_fn = doc.attrs.get("location_name", cached_fp.name)
     if not out_fn.endswith(cached_fp.suffix):
         out_fn = f"{out_fn}{cached_fp.suffix}"
 
@@ -32,8 +32,8 @@ def _move_file(doc, out_dir):
 
 def _write_cleaned_file(doc, out_dir):
     """Write cleaned ordinance text to directory."""
-    cleaned_text = doc.metadata.get("cleaned_ordinance_text")
-    location_name = doc.metadata.get("location_name")
+    cleaned_text = doc.attrs.get("cleaned_ordinance_text")
+    location_name = doc.attrs.get("location_name")
 
     if cleaned_text is None or location_name is None:
         return
@@ -46,8 +46,8 @@ def _write_cleaned_file(doc, out_dir):
 
 def _write_ord_db(doc, out_dir):
     """Write parsed ordinance database to directory."""
-    ord_db = doc.metadata.get("ordinance_values")
-    location_name = doc.metadata.get("location_name")
+    ord_db = doc.attrs.get("ordinance_values")
+    location_name = doc.attrs.get("location_name")
 
     if ord_db is None or location_name is None:
         return
 
@@ -204,7 +204,7 @@ async def check(self, doc, county, state):
             `True` if the doc contents pertain to the input county.
             `False` otherwise.
         """
-        source = doc.metadata.get("source")
+        source = doc.attrs.get("source")
         logger.debug(
             "Validating document from source: %s", source or "Unknown"
         )
@@ -280,7 +280,7 @@ async def _validator_check_for_doc(validator, doc, score_thresh=0.8, **kwargs):
         "%s score is %.2f for doc from source %s (Pass: %s)",
         validator.__class__.__name__,
         score,
-        doc.metadata.get("source", "Unknown"),
+        doc.attrs.get("source", "Unknown"),
         str(score > score_thresh),
     )
     return score > score_thresh
 
@@ -2,4 +2,4 @@
 ELM version number
 """
 
-__version__ = "0.0.10"
+__version__ = "0.0.11"
@@ -5,6 +5,8 @@
 from functools import cached_property
 import logging
 
+import pandas as pd
+
 from elm.utilities.parse import (
     combine_pages,
     clean_headers,
@@ -39,20 +41,37 @@ class BaseDocument(ABC):
     .. end desc
     """
 
-    def __init__(self, pages, metadata=None):
+    def __init__(self, pages, attrs=None):
         """
 
         Parameters
         ----------
         pages : iterable
             Iterable of strings, where each string is a page of a
             document.
-        metadata : dict, optional
+        attrs : dict, optional
             Optional dict containing metadata for the document.
             By default, ``None``.
         """
         self.pages = remove_blank_pages(pages)
-        self.metadata = metadata or {}
+        self.attrs = attrs or {}
+
+    def __repr__(self):
+        header = (f"{self.__class__.__name__} with {len(self.pages):,} "
+                  "pages\nAttrs:")
+        if not self.attrs:
+            return f"{header} None"
+
+        attrs = {}
+        for k, v in self.attrs.items():
+            if isinstance(v, pd.DataFrame):
+                v = f"DataFrame with {len(v):,} rows"
+            attrs[k] = v
+
+        indent = max(len(k) for k in attrs) + 2
+        attrs = "\n".join([f"{k:>{indent}}:\t{v}"
+                           for k, v in attrs.items()])
+        return f"{header}\n{attrs}"
 
     @property
     def empty(self):
@@ -118,7 +137,7 @@ class PDFDocument(BaseDocument):
     def __init__(
         self,
         pages,
-        metadata=None,
+        attrs=None,
         percent_raw_pages_to_keep=25,
         max_raw_pages=18,
         num_end_pages_to_keep=2,
@@ -131,8 +150,7 @@ def __init__(
         pages : iterable
             Iterable of strings, where each string is a page of a
             document.
-        metadata : str, optional
-            metadata : dict, optional
+        attrs : str, optional
             Optional dict containing metadata for the document.
             By default, ``None``.
         percent_raw_pages_to_keep : int, optional
@@ -153,7 +171,7 @@ def __init__(
             to the :func:`~elm.utilities.parse.clean_headers`
             function. By default, ``None``.
         """
-        super().__init__(pages, metadata=metadata)
+        super().__init__(pages, attrs=attrs)
         self.percent_raw_pages_to_keep = percent_raw_pages_to_keep
         self.max_raw_pages = min(len(self.pages), max_raw_pages)
         self.num_end_pages_to_keep = num_end_pages_to_keep
@@ -244,7 +262,7 @@ class HTMLDocument(BaseDocument):
     def __init__(
         self,
         pages,
-        metadata=None,
+        attrs=None,
         html_table_to_markdown_kwargs=None,
         ignore_html_links=True,
         text_splitter=None,
@@ -256,7 +274,7 @@ def __init__(
         pages : iterable
             Iterable of strings, where each string is a page of a
             document.
-        metadata : dict, optional
+        attrs : dict, optional
             Optional dict containing metadata for the document.
             By default, ``None``.
         html_table_to_markdown_kwargs : dict, optional
@@ -275,7 +293,7 @@ def __init__(
             By default, ``None``, which means the original pages input
             becomes the raw pages attribute.
         """
-        super().__init__(pages, metadata=metadata)
+        super().__init__(pages, attrs=attrs)
         self.html_table_to_markdown_kwargs = deepcopy(
             self.HTML_TABLE_TO_MARKDOWN_KWARGS
         )
 
@@ -181,7 +181,7 @@ async def fetch(self, url):
     async def _fetch_doc_with_url_in_metadata(self, url):
         """Fetch doc contents and add URL to metadata"""
         doc, raw_content = await self._fetch_doc(url)
-        doc.metadata["source"] = url
+        doc.attrs["source"] = url
         return doc, raw_content
 
     async def _fetch_doc(self, url):
@@ -239,5 +239,5 @@ async def _cache_doc(self, doc, raw_content):
 
         cache_fn = await self.file_cache_coroutine(doc, raw_content)
         if cache_fn is not None:
-            doc.metadata["cache_fn"] = cache_fn
+            doc.attrs["cache_fn"] = cache_fn
         return doc