Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Flexibly disambiguate multiple publications by the same author #581

Merged
merged 1 commit into from
Jun 27, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 44 additions & 2 deletions augur/export_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pathlib import Path
import os, sys
import time
from collections import defaultdict
from collections import defaultdict, deque
import warnings
import re
from Bio import Phylo
Expand Down Expand Up @@ -433,6 +433,47 @@ def set_panels(data_json, config, cmd_line_panels):
data_json['meta']["panels"] = panels


def counter_to_disambiguation_suffix(count):
"""Given a numeric count of author papers, return a distinct alphabetical
disambiguation suffix.

>>> counter_to_disambiguation_suffix(0)
'A'
>>> counter_to_disambiguation_suffix(25)
'Z'
>>> counter_to_disambiguation_suffix(26)
'AA'
>>> counter_to_disambiguation_suffix(51)
'AZ'
>>> counter_to_disambiguation_suffix(52)
'BA'
"""
letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
base = len(letters)
suffix = deque()

# Find the appropriate combination of letters for the given count. This
# closely resembles the steps required to calculate the base 26 value of the
# given base 10 number.
while True:
quotient = count // base
remainder = count % base

# Collect remainders from right to left. Letters are zero-indexed such
# that a count of 0 returns an "A".
suffix.appendleft(letters[remainder])

# Stop when we've accounted for all possible quotient and remainder
# values.
if quotient == 0:
break

# Convert counts to zero-indexed values such that the next place value
# starts with the letter "A" instead of the letter "B".
count = quotient - 1

return "".join(suffix)

def create_author_data(node_attrs):
"""Gather the authors which appear in the metadata and create the author
info structure with unique keys
Expand Down Expand Up @@ -488,7 +529,8 @@ def node_to_author_tuple(data):
author = node_author_info[node_name]["author"]
if len(author_to_unique_tuples[author]) > 1:
index = author_to_unique_tuples[author].index(author_tuple)
node_author_info[node_name]["value"] = author + " {}".format("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"[index])
disambiguation_suffix = counter_to_disambiguation_suffix(index)
node_author_info[node_name]["value"] = f"{author} {disambiguation_suffix}"
else:
node_author_info[node_name]["value"] = author

Expand Down