Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -862,6 +862,7 @@ jobs:
- run: python utils/sort_auto_mappings.py --check_only
- run: flake8 examples tests src utils
- run: doc-builder style src/transformers docs/source --max_len 119 --check_only --path_to_docs docs/source
- run: python utils/check_doc_toc.py

check_repository_consistency:
working_directory: ~/transformers
Expand Down
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,15 @@ quality:
python utils/sort_auto_mappings.py --check_only
flake8 $(check_dirs)
doc-builder style src/transformers docs/source --max_len 119 --check_only --path_to_docs docs/source
python utils/check_doc_toc.py

# Format source code automatically and check is there are any problems left that need manual fixing

extra_style_checks:
python utils/custom_init_isort.py
python utils/sort_auto_mappings.py
doc-builder style src/transformers docs/source --max_len 119 --path_to_docs docs/source
python utils/check_doc_toc.py --fix_and_overwrite

# this target runs checks on all files and potentially modifies some of them

Expand Down
16 changes: 8 additions & 8 deletions docs/source/en/_toctree.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
title: Tutorials
- sections:
- local: fast_tokenizers
title: "Use tokenizers from 🤗 Tokenizers"
title: Use tokenizers from 🤗 Tokenizers
- local: create_a_model
title: Create a custom architecture
- local: custom_models
Expand Down Expand Up @@ -94,15 +94,15 @@
- local: debugging
title: Debugging
- local: notebooks
title: "🤗 Transformers Notebooks"
title: 🤗 Transformers Notebooks
- local: community
title: Community
- local: contributing
title: How to contribute to transformers?
- local: add_new_model
title: "How to add a model to 🤗 Transformers?"
title: How to add a model to 🤗 Transformers?
- local: add_new_pipeline
title: "How to add a pipeline to 🤗 Transformers?"
title: How to add a pipeline to 🤗 Transformers?
- local: testing
title: Testing
- local: pr_checks
Expand Down Expand Up @@ -254,14 +254,14 @@
title: GLPN
- local: model_doc/openai-gpt
title: GPT
- local: model_doc/gpt2
title: GPT2
- local: model_doc/gptj
title: GPT-J
- local: model_doc/gpt_neo
title: GPT Neo
- local: model_doc/gpt_neox
title: GPT NeoX
- local: model_doc/gptj
title: GPT-J
- local: model_doc/gpt2
title: GPT2
- local: model_doc/groupvit
title: GroupViT
- local: model_doc/herbert
Expand Down
88 changes: 88 additions & 0 deletions utils/check_doc_toc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# coding=utf-8
# Copyright 2022 The HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
from collections import defaultdict

import yaml


PATH_TO_TOC = "docs/source/en/_toctree.yml"


def clean_model_doc_toc(model_doc):
"""
Cleans the table of content of the model documentation by removing duplicates and sorting models alphabetically.
"""
counts = defaultdict(int)
for doc in model_doc:
counts[doc["local"]] += 1
duplicates = [key for key, value in counts.items() if value > 1]

new_doc = []
for duplicate_key in duplicates:
titles = list(set(doc["title"] for doc in model_doc if doc["local"] == duplicate_key))
if len(titles) > 1:
raise ValueError(
f"{duplicate_key} is present several times in the documentation table of content at "
"`docs/source/en/_toctree.yml` with different *Title* values. Choose one of those and remove the "
"others."
)
# Only add this once
new_doc.append({"local": duplicate_key, "title": titles[0]})

# Add none duplicate-keys
new_doc.extend([doc for doc in model_doc if counts[doc["local"]] == 1])

# Sort
return sorted(new_doc, key=lambda s: s["title"].lower())


def check_model_doc(overwrite=False):
with open(PATH_TO_TOC, encoding="utf-8") as f:
content = yaml.safe_load(f.read())

# Get to the API doc
api_idx = 0
while content[api_idx]["title"] != "API":
api_idx += 1
api_doc = content[api_idx]["sections"]

# Then to the model doc
model_idx = 0
while api_doc[model_idx]["title"] != "Models":
model_idx += 1

old_model_doc = api_doc[model_idx]["sections"]
new_model_doc = clean_model_doc_toc(old_model_doc)

if old_model_doc != new_model_doc:
if overwrite:
api_doc[model_idx]["sections"] = new_model_doc
content[api_idx]["sections"] = api_doc
with open(PATH_TO_TOC, "w", encoding="utf-8") as f:
f.write(yaml.dump(content, allow_unicode=True))
else:
raise ValueError(
"The model doc part of the table of content is not properly sorted, run `make style` to fix this."
)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--fix_and_overwrite", action="store_true", help="Whether to fix inconsistencies.")
args = parser.parse_args()

check_model_doc(args.fix_and_overwrite)