From af943fe96ce5d0a98d5dc610174a00c01c5e1a23 Mon Sep 17 00:00:00 2001
From: Jeffrey Niu <niujeffrey@yahoo.com>
Date: Mon, 30 Aug 2021 15:24:18 -0400
Subject: [PATCH] Edits in response to public comments 2021-06-28 through
 2021-07-09 (#2)

* Update BEDv1.tex

* polished edits

* Edits in response to public comments 2021-06-28 through 2021-07-09

* Edits in response to public comments 2021-06-28 through 2021-07-09 fix typo

* Edits in reponse to GA4GH PRC

* line edit

* Edits in response to public comments 2021-07-09

* further line edits and footnote fixing

* fix texlint issues

* WIP address uninformative/default/empty issues

* fix empty/uninformative issues

* clarify language on BED fields not being empty

* add special-case `diff/BEDv1.pdf` target that uses lualatex

Co-authored-by: Michael Hoffman <michaelmhoffman@users.noreply.github.com>
Co-authored-by: Jeffrey Niu <haliupenguin@DESKTOP-FKHRPBT.localdomain>
---
 BEDv1.tex | 245 +++++++++++++++++++++++++++++++++---------------------
 Makefile  |   2 +
 2 files changed, 153 insertions(+), 94 deletions(-)

diff --git a/BEDv1.tex b/BEDv1.tex
index 39ecf419f..92f26bedf 100644
--- a/BEDv1.tex
+++ b/BEDv1.tex
@@ -6,7 +6,9 @@
 \usepackage{amsmath}
 \usepackage{booktabs}
 \usepackage{calc}
+\usepackage{caption}
 \usepackage[flushmargin,hang]{footmisc}
+\usepackage{float}
 \usepackage{microtype}
 \usepackage{newverbs}
 \usepackage{tablefootnote}
@@ -30,13 +32,22 @@
 
 \definecolor{cverbbg}{gray}{0.93}
 
-\title{The Browser Extensible Data~(BED) format}
+\title{The \acf{BED} format}
 \author{Jeffrey Niu, Danielle Denisko, Michael M.~Hoffman}
 \date{\headdate}
 
 \setlength{\emergencystretch}{\hsize}
 \setlength{\footnotemargin}{1em}
 
+\floatplacement{table}{htbp}
+\setcounter{topnumber}{2}
+\setcounter{bottomnumber}{2}
+\setcounter{totalnumber}{4}
+\setcounter{dbltopnumber}{2}
+\renewcommand{\dbltopfraction}{0.9}
+\renewcommand{\textfraction}{0.07}
+\renewcommand{\floatpagefraction}{0.7}
+
 \interfootnotelinepenalty=1000000
 \makesavenoteenv{tabularx}
 
@@ -64,42 +75,49 @@
 
 \section{Specification}
 
-\Ac{BED} is a whitespace-delimited file format, where each~\textbf{file} consists of one or more~\textbf{line}s.\footnote{``Frequently Asked Questions: Data File Formats.'' \ac{UCSC} Genome Browser FAQ, \url{https://genome.ucsc.edu/FAQ/FAQformat.html}}
+\Ac{BED} is a whitespace-delimited file format, where each~\textbf{file} consists of zero or more~\textbf{line}s.\footnote{``Frequently Asked Questions: Data File Formats.'' \ac{UCSC} Genome Browser FAQ, \url{https://genome.ucsc.edu/FAQ/FAQformat.html}}
 Data are in~\textbf{data line}s, which describe discrete genomic~\textbf{feature}s by physical start and end position on a linear~\textbf{chromosome}.
 The file extension for the \ac{BED} format is~\texttt{.bed}.
 
-\subsection{Typographic conventions}
+\subsection{Scope}
 
-This document uses the following typographic conventions:
+This specification formalizes reasonable interpretations of the \ac{UCSC} Genome Browser \ac{BED} description.
+This specification also makes clear potential interoperability issues in the current format, which could be addressed in a future specification.
 
-\vspace{2ex}
+\subsection{Typographic conventions}
 
-\noindent
-\addstackgap[\baselineskip]{\begin{tabularx}{\textwidth}{r L L}
-  \toprule
-  Style & Meaning & Examples \\
-  \midrule
-  Bold & Terms defined in subsections~\ref{sec:terms}--\ref{sec:lines} & \textbf{chromosome}{\quad}\textbf{file} \\
-  Sans serif & Names of~\textbf{field}s & \textsf{chrom}{\quad}\textsf{chromStart}{\quad}\textsf{chromEnd} \\
-  Fixed-width & Literals or \ac{regex}es\footnote{POSIX/IEEE~1003.1--2017 Extended Regular Expressions, for the ``C'' locale.
-                \emph{IEEE Standard for Information Technology---Portable Operating System Interface~(POSIX) Base Specifications}, IEEE~1003.1--2017, 2017} & \texttt{.bed}{\quad}\texttt{grep}{\quad}\texttt{[[:alnum:]]+}{\quad}\texttt{ATCG} \\
-  \bottomrule
-\end{tabularx}}
+This document uses several typographic conventions~(\autoref{tab:typographic-conventions}).
+
+\begin{savenotes}
+  \begin{table}
+    \begin{tabularx}{\textwidth}{r L L}
+      \toprule
+      Style & Meaning & Examples \\
+      \midrule
+      Bold & Terms defined in subsections~\ref{sec:terms}--\ref{sec:lines} & \textbf{chromosome}{\quad}\textbf{file} \\
+      Sans serif & Names of~\textbf{field}s & \textsf{chrom}{\quad}\textsf{chromStart}{\quad}\textsf{chromEnd} \\
+      Fixed-width & Literals or \ac{regex}es\footnote{POSIX/IEEE~1003.1--2017 Extended Regular Expressions, for the ``C'' locale.
+                    \emph{IEEE Standard for Information Technology---Portable Operating System Interface~(POSIX) Base Specifications}, IEEE~1003.1--2017, 2017} & \texttt{.bed}{\quad}\texttt{grep}{\quad}\texttt{[[:alnum:]]+}{\quad}\texttt{ATCG} \\
+      \bottomrule
+    \end{tabularx}
+    \caption{\textbf{Typographic conventions.}}\label{tab:typographic-conventions}
+  \end{table}
+\end{savenotes}
 
 \subsection{Terminology and concepts}\label{sec:terms}
 \begin{description}
-\item[0-start, half-open coordinate system:]
+\item[0-based, half-open coordinate system:]
   A coordinate system where the first base starts at position~0, and the start of the interval is included but the end is not.
   For example, for a sequence of bases~\texttt{ACTGCG}, the bases given by the interval~[2,~4) are~\texttt{TG}. % chktex 9
 
-\item[BED$n$:]
+\item[\acs{BED}$n$:]
   A~\textbf{file} with the first $n$~\textbf{field}s of the \ac{BED} format.
   For example, \textbf{BED3}~means a~\textbf{file} with only the first 3~\textbf{field}s; \textbf{BED12}~means a~\textbf{file} with all 12~\textbf{field}s.
 
-\item[BED$n$+:]
-  A~\textbf{file} that has $n$~\textbf{field}s of the \ac{BED} format, followed by any number of~\textbf{field}s of custom data defined by a user.
+\item[\acs{BED}$n$+:]
+  A~\textbf{file} that has the first $n$~\textbf{field}s of the \ac{BED} format, followed by any number of~\textbf{field}s of custom data defined by a user.
 
-\item[BED$n$+$m$:]
+\item[\acs{BED}$n$+$m$:]
   A~\textbf{file} that has a custom format starting with the first $n$~\textbf{field}s of the \ac{BED} format, followed by $m$~\textbf{field}s of custom data defined by a user.
   For example, \textbf{BED6+4}~means a~\textbf{file} with the first 6~\textbf{field}s of the \ac{BED} format, followed by 4~user-defined~\textbf{field}s.
 
@@ -121,12 +139,14 @@ \subsection{Terminology and concepts}\label{sec:terms}
 
 \item[field:]
   Data stored as non-tab text.
-  All~\textbf{field}s are 7-bit US \ac{ASCII} printable characters\footnote{Characters in the range `\texttt{{\textbackslash}x20}' to `\texttt{{\textbackslash}x7e}', therefore not including any control characters}.
-  Only some \textbf{field}s can be empty, and they can only be empty when a single tab is used as the \textbf{field separator}.
+  All~\textbf{field}s are 7-bit US \ac{ASCII} printable characters\footnote{Characters in the range \texttt{{\textbackslash}x20} to \texttt{{\textbackslash}x7e}, therefore not including any control characters}.
+  Only custom \textbf{field}s can be empty, and they can only be empty when a single tab is used as the \textbf{field separator} throughout the \textbf{file}.
 
 \item[field separator:]
   One or more horizontal whitespace characters (space or tab).
-  The \textbf{field} separator must match the \ac{regex}~\texttt{[ {\textbackslash}]+}.
+  The \textbf{field} separator must match the \ac{regex}~\texttt{[ {\textbackslash}t]+}.
+  The \textbf{field} separator can vary throughout the \textbf{file}.
+  Some capabilities of the \ac{BED} format, however, are available only when a single tab is used as the \textbf{field} separator throughout the \textbf{file}.
 
 \item[file:]
   Sequence of one or more~\textbf{line}s.
@@ -137,7 +157,7 @@ \subsection{Terminology and concepts}\label{sec:terms}
   Discussed more fully in~\autoref{sec:lines}
 
 \item[line separator:]
-  Either carriage return, newline, or carriage return followed by newline\footnote{A newline is defined as `\texttt{{\textbackslash}n}'}
+  Either carriage return~(\texttt{{\textbackslash}r}, equivalent to \texttt{{\textbackslash}x0d}), newline~(\texttt{{\textbackslash}r}, equivalent to \texttt{{\textbackslash}x0a}), or carriage return followed by newline~(\texttt{{\textbackslash}r{\textbackslash}n}, equivalent to \texttt{{\textbackslash}x0d{\textbackslash}x0a}).
   The same \textbf{line separator} must be used throughout the \textbf{file}.
 \end{description}
 
@@ -161,48 +181,66 @@ \subsubsection{Comment lines and blank lines}
 
 \subsection{\acs{BED} fields}
 
-Each~\textbf{data line} contains between~3 and 12~~\textbf{field}s delimited by a \textbf{field separator}.
-The first 3~\textbf{field}s are mandatory, and the last 9~\textbf{field}s are optional.
-In optional~\textbf{field}s, the order is binding---if 1~\textbf{field} is filled, then all previous~\textbf{field}s must also be filled.
-However, \textbf{BED10} and \textbf{BED11} are prohibited.
+Each~\textbf{data line} contains between~3 and 12~\textbf{field}s delimited by a \textbf{field separator}.
+The first 3~\textbf{field}s are mandatory, and the last 9~\textbf{field}s are optional~(\autoref{tab:fields}).
+In optional~\textbf{field}s, the order is binding---if a \textbf{field} is filled, then all previous~\textbf{field}s must also be filled.
+Any mandatory or optional \textbf{field} included on any \textbf{data line} in the \textbf{file} must not be empty on any other \textbf{data line}.
+\textbf{BED10} and \textbf{BED11} are prohibited.
+
+\begin{savenotes}
+  \begin{table}
+    \begin{adjustwidth}{-0.5in}{-0.5in}
+      \begin{tabularx}{\linewidth}{r l l l L}
+        \toprule
+        Col & Field & Type & Regex or range & Brief description \\
+        \midrule
+        1
+        & \textsf{chrom}
+        & String
+        & \texttt{[[:alnum:]\_]\{1,255\}}\footnote{\texttt{[[:alnum:]\_]} is equivalent to the \ac{regex} \texttt{[A-Za-z0-9\_]}. % chktex 8
+        It is also equivalent to the Perl extension \texttt{[[:word:]]}}
+        & \textbf{Chromosome} name \\
+
+        2 & \textsf{chromStart} & Int & $[0, 2^{64}-1]$ & \textbf{Feature} start position \\
+        3 & \textsf{chromEnd} & Int & $[0, 2^{64} -1]$ & \textbf{Feature} end position \\
+        4 & \textsf{name} & String & \texttt{[{\textbackslash}x20-{\textbackslash}x7e]\{1,255\}} & \textbf{Feature} description \\
+        5 & \textsf{score} & Int & $[0, 1000]$ & A numerical value \\
+        6 & \textsf{strand} & String & \texttt{[-+.]} & \textbf{Feature} strand \\
+        7 & \textsf{thickStart} & Int & $[0, 2^{64}-1]$ & Thick start position \\
+        8 & \textsf{thickEnd} & Int & $[0, 2^{64}-1]$ & Thick end position \\
+        9 & \textsf{itemRgb} & Int,Int,Int & \texttt{(}$[0, 255], [0,255], [0,255]$\texttt{) | 0} & Display color \\ % chktex 9
+
+        10
+        & \textsf{blockCount}
+        & Int
+        & $[0, \textsf{chromEnd}-\textsf{chromStart}]$\footnote{\textsf{chromEnd}-\textsf{chromStart} is the maximum number of~\textbf{block}s that may exist without overlaps}
+        & Number of \textbf{block}s \\
+
+        11
+        & \textsf{blockSizes}
+        & List[Int]
+        & \texttt{([[:digit:]]+,)\{\textsf{blockCount}$-1$\}[[:digit:]]+,?}\footnote{For example, if~$\textsf{blockCount} = 4$, then the allowed \ac{regex} would be~\texttt{([[:digit:]]+,)\{3\}[[:digit:]]+,?}}
+        & \textbf{Block} sizes \\
+
+        12 & \textsf{blockStarts} & List[Int] & \texttt{([[:digit:]]+,)\{\textsf{blockCount}$-1$\}[[:digit:]]+,?} & \textbf{Block} start positions \\
+        \bottomrule
+      \end{tabularx}
+    \end{adjustwidth}
+    \caption{\textbf{Fields.}}\label{tab:fields}
+  \end{table}
+\end{savenotes}
 
 In a \ac{BED}~\textbf{file}, each~\textbf{data line} must have the same number of~\textbf{field}s.
 The positions in \ac{BED}~\textbf{field}s are all described in the~\textbf{0-based, half-open coordinate system}.
 
-\begin{adjustwidth}{-0.5in}{-0.5in}
-  \noindent
-  \addstackgap[\baselineskip]{\begin{tabularx}{\linewidth}{r l l l L}
-    \toprule
-    Col & Field & Type & Regex or range & Brief description \\
-    \midrule
-    1 & \textsf{chrom} & String & \texttt{[[:alnum:]\_]\{1,255\}}{\footnotemark} & \textbf{Chromosome} name \\
-    2 & \textsf{chromStart} & Int & $[0, 2^{64}-1]$ & \textbf{Feature} start position \\
-    3 & \textsf{chromEnd} & Int & $[0, 2^{64} -1]$ & \textbf{Feature} end position \\
-    4 & \textsf{name} & String & \texttt{[{\textasciicircum}{\textbackslash}t]\{1,255\}} & \textbf{Feature} description \\
-    5 & \textsf{score} & Int & $[0, 1000]$ & A numerical value \\
-    6 & \textsf{strand} & String & \texttt{[-+.]} & \textbf{Feature} strand \\
-    7 & \textsf{thickStart} & Int & $[0, 2^{64}-1]$ & Thick start position \\
-    8 & \textsf{thickEnd} & Int & $[0, 2^{64}-1]$ & Thick end position \\
-    9 & \textsf{itemRgb} & Int,Int,Int & \texttt{(}$[0, 255], [0,255], [0,255]$\texttt{) | 0} & Display color \\ % chktex 9
-    10 & \textsf{blockCount} & Int & $[0, \textsf{chromEnd}-\textsf{chromStart}]${\footnotemark} & Number of \textbf{block}s \\
-    11 & \textsf{blockSizes} & List[Int] & \texttt{([[:digit:]]+,)\{\textsf{blockCount}$-1$\}[[:digit:]]+,?}{\footnotemark} & \textbf{Block} sizes \\
-    12 & \textsf{blockStarts} & List[Int] & \texttt{([[:digit:]]+,)\{\textsf{blockCount}$-1$\}[[:digit:]]+,?} & \textbf{Block} start positions \\
-    \bottomrule
-  \end{tabularx}}
-  \footnotetext[5]{\texttt{[[:alnum:]\_]} is equivalent to the \ac{regex} \texttt{[A-Za-z0-9\_]}. % chktex 8
-      It is also equivalent to the Perl extension \texttt{[[:word:]]}.}
-  \footnotetext[6]{\textsf{chromEnd}-\textsf{chromStart} is the maximum number of~\textbf{block}s that may exist without overlaps.}
-  \footnotetext{For example, if~$\textsf{blockCount} = 4$, then the allowed \ac{regex} would be~\texttt{([[:digit:]]+,)\{3\}[[:digit:]]+,?}}
-\end{adjustwidth}
-
 \subsection{Coordinates}
 \begin{enumerate}
 \item \textsf{chrom}: The name of the~\textbf{chromosome} where the~\textbf{feature} is present.
-  Limiting only to word characters only, instead of all non-whitespace characters, makes \ac{BED}~\textbf{file}s more portable to varying environments which may make different assumptions about allowed characters.
+  Limiting to word characters only, instead of all non-whitespace printable characters, makes \ac{BED}~\textbf{file}s more portable to varying environments which may make different assumptions about allowed characters.
   The name must be between~1 and 255~characters long, inclusive.
 
 \item \textsf{chromStart}: Start position of the~\textbf{feature} on the~\textbf{chromosome}.
-  \textsf{chromStart}~must be an integer greater than or equal to~0 and less than the total number of bases of the~\textbf{chromosome} to which it belongs.
+  \textsf{chromStart}~must be an integer greater than or equal to~0 and less than or equal to the total number of bases of the~\textbf{chromosome} to which it belongs.
   If the size of the~\textbf{chromosome} is unknown, then \textsf{chromStart}~must be less than or equal to~$2^{64} - 1$, which is the maximum size of an unsigned 64-bit integer.
 
 \item \textsf{chromEnd}: End position of the~\textbf{feature} on the~\textbf{chromosome}.
@@ -218,18 +256,19 @@ \subsection{Simple attributes}
 
 \item \textsf{name}: String that describes the~\textbf{feature}.
   \textsf{name} must be~1 to 255~non-tab characters.
-  \textsf{name} must not be empty or contain whitespace, unless the only \textbf{field separator} is a single tab.
+  \textsf{name} must not contain whitespace, unless the only \textbf{field separator} is a single tab.
+  Multiple \textbf{data line}s may share the same \textsf{name}.
+  In \textbf{BED5+} \textbf{file}s where all \textbf{features} have uninformative \textsf{name}s, dot~(\texttt{.}) may be used as a \textsf{name} on every \textbf{data line}.
   A visual representation of the \ac{BED} format may display \textsf{name} next to the~\textbf{feature}.
 
 \item \textsf{score}: Integer between~0 and~1000, inclusive.
-  In~\textbf{BED6+} \textbf{file}s where all \textbf{feature}s have uninformative \textsf{score}s, \texttt{0} should be used as the \textsf{score} on every \textbf{data line}.
+  In \textbf{BED6+} \textbf{file}s where all \textbf{feature}s have uninformative \textsf{score}s, \texttt{0} should be used as the \textsf{score} on every \textbf{data line}.
   A visual representation of the \ac{BED} format may shade \textbf{feature}s differently depending on their \textsf{score}.
 
 \item \textsf{strand}: Strand that the~\textbf{feature} appears on.
   The \textsf{strand} may either refer to the~\texttt{+}~(sense or coding) strand or the~\texttt{-}~(antisense or complementary) strand.
-  If the~\textbf{feature} has no \textsf{strand} information or unknown \textsf{strand}, then a dot~(\texttt{.}) must be used as a default value.
-  \textsf{strand} cannot be empty in \textbf{BED6+} \textbf{file}s.
-  A parser should treat \textbf{files} that are not \textbf{BED6+} as if \textsf{strand} were \texttt{.}.
+  If the \textbf{feature} has no \textsf{strand} information or unknown \textsf{strand}, then a dot~(\texttt{.}) must be used as an uninformative value.
+  \textsf{strand} should be treated as \texttt{.} when parsing files that are not \textbf{BED6+}.
 \end{enumerate}
 
 \subsection{Display attributes}
@@ -238,19 +277,21 @@ \subsection{Display attributes}
 
 \item \textsf{thickStart}: Start position at which the~\textbf{feature} is visualized with a thicker or accented display.
   This value must be an integer between~\textsf{chromStart} and~\textsf{chromEnd}, inclusive.
-  There is no specified default value for~\textsf{thickStart}.
+  In \textbf{BED7+} \textbf{file}s where all \textbf{feature}s have uninformative \textsf{thickStart}s, the value of \textsf{chromStart} should be used as the \textsf{thickStart} on every \textbf{data line}.
 
 \item \textsf{thickEnd}: End position at which the~\textbf{feature} is visualized with a thicker or accented display.
   This value must be an integer greater than or equal to~\textsf{thickStart} and less than or equal to~\textsf{chromEnd}, inclusive.
-  In \ac{BED} \textbf{file}s with fewer than 7~\textbf{field}s, the whole~\textbf{feature} has thick display.
+  In \textbf{BED8+} \textbf{file}s where all \textbf{feature}s have uninformative \textsf{thickEnd}s, the value of \textsf{chromEnd} should be used as the \textsf{thickEnd} on every \textbf{data line}.
+  In \ac{BED} \textbf{file}s that are not \textbf{BED7+}, the whole~\textbf{feature} has thick display.
   In \textbf{BED7+}~\textbf{file}s, to achieve the same effect, set \textsf{thickStart}~equal to~\textsf{chromStart} and \textsf{thickEnd}~equal to~\textsf{chromEnd}.
-  If this~\textbf{field} is not specified but \textsf{thickStart}~is, then the entire~\textbf{feature} has thick display.
-  For \textbf{BED7+} \textbf{file}s that are not \textbf{BED8+}, there is no specified default value for~\textsf{thickEnd}.
+  If \textsf{thickEnd} is not specified but \textsf{thickStart}~is, then the entire~\textbf{feature} has thick display.
 
 \item \textsf{itemRgb}: A triple of integers that determines the color of this~\textbf{feature} when visualized.
   The triple is three integers separated by commas.
   Each integer is between~0 and~255, inclusive.
   To make a~\textbf{feature} black, \textsf{itemRgb}~may be a single~\texttt{0}, which is visualized identically to a~\textbf{feature} with \textsf{itemRgb} of \texttt{0,0,0}.
+  An \textsf{itemRgb} of~\texttt{0} is a special case and no other single-number value is valid.
+  In \textbf{BED9+} \textbf{file}s where all \textbf{feature}s have uninformative \textsf{itemRgb}s, \texttt{0} should be used as the \textsf{itemRgb} on every \textbf{data line}.
 \end{enumerate}
 
 \subsection{Blocks}
@@ -260,14 +301,12 @@ \subsection{Blocks}
 \item \textsf{blockCount}: Number of~\textbf{block}s in the~\textbf{feature}.
   \textsf{blockCount}~must be an integer greater than 0.
   \textsf{blockCount}~is mandatory in~\textbf{BED12+}~\textbf{file}s.
-  Empty~\textsf{blockCount} are not allowed, because~\textsf{blockSizes} and~\textsf{blockStarts} rely on~\textsf{blockCount}.
   A visual representation of the \ac{BED} format may have blocks appear thicker than the rest of the~\textbf{feature}.
 
 \item \textsf{blockSizes}: Comma-separated list of length~\textsf{blockCount} containing the size of each~\textbf{block}.
   There must be no spaces before or after commas.
   There may be a trailing comma after the last element of the list.
   \textsf{blockSizes}~is mandatory in \textbf{BED12+} \textbf{file}s.
-  Empty~\textsf{blockSizes} is not allowed, because \textsf{blockStarts}~cannot be verified without~\textsf{blockSizes}.
 
 \item \textsf{blockStarts}: Comma-separated list of length~\textsf{blockCount} containing each \textbf{block}'s~start position, relative to~\textsf{chromStart}.
   There must not be spaces before or after the commas.
@@ -280,7 +319,6 @@ \subsection{Blocks}
   Moreover, the~\textbf{block}s must not overlap.
   The list must be sorted in ascending order.
   \textsf{blockStarts}~is mandatory in~\textbf{BED12+} \textbf{file}s.
-  Empty~\textsf{blockStarts} is not allowed.
 \end{enumerate}
 
 \section{Examples}
@@ -323,8 +361,7 @@ \subsection{Mandatory fields}
 
 \subsection{Optional fields}\label{sec:optional}
 \begin{itemize}
-\item \textsf{name}: If a \textbf{feature} has no name, then a dot~(\texttt{.}) should be used.
-  Names should avoid using the space character even if the only \textbf{field separator} is a single tab character, because parsers may interpret a space as a \textbf{field separator}.
+\item \textsf{name}: Names should avoid using the space character even if the only \textbf{field separator} is a single tab character, because parsers may interpret a space as a \textbf{field separator}.
 
 \item \textsf{itemRgb}: Eight or fewer colors should be used as too many colors may slow down visualizations and are difficult for humans to distinguish.\footnote{``Frequently
     Asked Questions: Data File Formats.'' \ac{UCSC} Genome Browser FAQ,
@@ -339,20 +376,25 @@ \subsection{User-defined fields}
 Custom data \textbf{fields} may contain any printable 7-bit US \ac{ASCII} character (which includes spaces, but excludes tabs, newlines, and other control characters).
 Custom data \textbf{fields} can only be empty when a single tab is used as the \textbf{field separator} throughout the \textbf{file}.
 Definitions of a custom \ac{BED} format should restrict the type of each \textbf{field} to the extent possible.
-Each custom \textbf{field} should contain either one of the following data types or a comma-separated list of values of the same type:
-
-\noindent
-\addstackgap[\baselineskip]{\begin{tabularx}{\textwidth}{r L}
-  \toprule
-  Type & Definition \\
-  \midrule
-  Integer & String representation of 64-bit signed integer\footnote{\emph{IEEE 754--1985 IEEE Standard for Binary Floating-Point Arithmetic.} IEEE 754--1985, 1985} \\
-  Unsigned & String representation of 64-bit unsigned integer\footnotemark[10] \\
-  Float & String representation of 64-bit floating point number\footnotemark[10] \\
-  Character & One character, other than tab \\
-  String & One or more characters, other than tab \\
-  \bottomrule
-\end{tabularx}}
+Each custom \textbf{field} should contain either one of several specified data types~(\autoref{tab:custom-data-types}) or a comma-separated list of values of any type other than String.
+
+\begin{savenotes}
+  \begin{table}
+    \begin{tabularx}{\textwidth}{r L}
+      \toprule
+      Type & Definition \\
+      \midrule
+      Integer & Decimal string representation of 64-bit signed integer \\
+      Unsigned & Decimal string representation of 64-bit unsigned integer \\
+      Float & Decimal string representation of 64-bit floating point number\footnote{\emph{IEEE Standard for Binary Floating-Point Arithmetic.}
+              IEEE 754--1985, 1985} \\
+      Character & One character, other than tab \\
+      String & One or more characters, other than tab \\
+      \bottomrule
+    \end{tabularx}
+    \caption{\textbf{Custom field data types.}}\label{tab:custom-data-types}
+  \end{table}
+\end{savenotes}
 
 This specification does not contain a means for interchanging custom \ac{BED} format definitions.
 The AutoSQL format\footnote{Kent, W.~James.
@@ -363,14 +405,24 @@ \subsection{Sorting}
 \Ac{BED} \textbf{file}s should be sorted by~\textsf{chrom}, then by~\textsf{chromStart} numerically, and finally by~\textsf{chromEnd} numerically.
 \textsf{chrom} may be sorted using any scheme (such as lexicographic or numeric order), but all \textbf{data line}s with the same~\textsf{chrom} value should occur consecutively.
 For example, the lexicographic order of~\texttt{chr1}, \texttt{chr10}, \texttt{chr11}, \texttt{chr12}, {\ldots}, \texttt{chr2}, \texttt{chr20}, \texttt{chr21}, {\ldots}, \texttt{chr3}, {\ldots}, \texttt{chrX}, \texttt{chrY}, \texttt{chrM} is an acceptable sorting.
+This ordering is equivalent to sorting the \textbf{file} using the command \verb|LC\_ALL=C| \verb|sort| \verb|-k 1,1| \verb|-k 2,2n| \verb|-k 3,3n|.
 The numeric order of~\texttt{chr1}, \texttt{chr2}, {\ldots}, \texttt{chr21}, \texttt{chr22}, \texttt{chrM}, \texttt{chrX}, \texttt{chrY} is also acceptable.
 Arbitrary orderings of~\textsf{chrom} are allowed, but regardless of the \textbf{chromosome} sorting scheme, \textbf{data line}s for two \textbf{feature}s on the same \textbf{chromosome} should not have any \textbf{data line}s for \textbf{feature}s on other \textbf{chromosome}s between them.
 Multiple \textbf{feature}s that have the same~\textsf{chrom}, \textsf{chromStart}, and \textsf{chromEnd} can appear in any order.
+\textbf{Comment line}s and \textbf{blank line}s do not have to be sorted according to the schemes mentioned.
+
+Sorting is recommended because the implementation of downstream operations is easier if features of one chromosome are all grouped together and \textsf{chromStart} is non-decreasing within a chromosome.
+
+For \textbf{BED4+} files, a sorting scheme may also order by optional \textbf{field}s and any custom \textbf{field}s.
+A recommendation for how to do this is outside the scope of this version of the specification.
+Total deterministic sorting of \ac{BED} \textbf{file}s can prevent downstream analyses from producing different results depending on sort order.
 
 \subsection{Whitespace}\label{sec:whitespace}
-Though \textbf{data line}s may use any kind of horizontal whitespace as a delimiter between~\textbf{field}s, a single tab~(\texttt{{\textbackslash}t}) should be used.
+We recommend that only a single tab~(\texttt{{\textbackslash}t}) be used as \textbf{field separator}.
 This is because almost all tools support tabs while some tools do not support other kinds of whitespace.
-Also, spaces within the~\textsf{name}~\textbf{field} may be used only if the \textbf{field}~delimiter is tab throughout the \textbf{file}.
+Also, spaces within the~\textsf{name}~\textbf{field} may be used only if the \textbf{field separator} is tab throughout the \textbf{file}.
+
+It would be sensible for future major versions of this specification or overlay formats built on top of this specification to require that only a single tab be used as \textbf{field separator}.
 
 \subsection{Large \acs{BED} files}
 If a~\textbf{file} intended for visualization is over \SI{50}{\mebi\byte} in size, the~\textbf{file} should be converted to~\texttt{bigBed} format, which is an indexed binary format.\footnote{Kent, W.~James et al.
@@ -380,15 +432,21 @@ \subsection{Large \acs{BED} files}
 The~\texttt{bedToBigBed} program may perform this conversion.\footnote{``bigBed Track Format.''
   \Ac{UCSC} Genome Browser FAQ, \url{https://genome.ucsc.edu/goldenPath/help/bigBed.html}}
 
+Tabix is another option for storing larger \ac{BED} \textbf{file}s.\footnote{Li H.
+  (2011) ``Tabix: fast retrieval of sequence features from generic TAB-delimited files.''
+  \emph{Bioinformatics} 27(5):718--719.
+  \url{https://doi.org/10.1093/bioinformatics/btq671}}
+Tabix works only on \textbf{file}s using a single tab as the \textbf{field separator}.
+
 \section{Information supplied out-of-band}
 
 Some information about a \ac{BED} \textbf{file} can only be supplied unambiguously separately from the \textbf{data line}s of the \ac{BED} \textbf{file}.
 This specification does not contain a means for interchanging this information.
 Information that must be supplied out-of-band include:
 
-\begin{itemize}
+\begin{itemize}[noitemsep]
     \item Which of the first~4 to 12~\textbf{fields} are standard \ac{BED} \textbf{fields} and which are custom \textbf{field}s.
-    \item The genome assembly that define \textsf{chrom}, \textsf{chromStart}, and \textsf{chromEnd}.
+    \item The genome assembly that defines \textsf{chrom}, \textsf{chromStart}, and \textsf{chromEnd}.
     \item The semantics of \textbf{fields} such as \textsf{score}, \textsf{itemRgb}, thick vs.~thin positions, and block vs.~non-block positions.
     \item The definitions of custom \textbf{field}s.
     \item Whether the \textbf{field separator} is a single tab character.
@@ -397,17 +455,17 @@ \section{Information supplied out-of-band}
 \section{\acs{UCSC} track files}
 
 Track files are files that contain additional information intended for a visualization tool such as the \ac{UCSC} Genome Browser.\footnote{Haeussler, Maximilian et al.
-  (2019) ``The \acl{UCSC} Genome Browser database: 2019 update.''
+  (2019) ``The \acs{UCSC} Genome Browser database: 2019 update.''
   \emph{Nucleic Acids Research} 47(D1):D853--D858.
   \url{https://doi.org/10.1093/nar/gky1095}}
 Track files contain browser lines and track lines that precede lines from a file format supported by the Genome Browser.\footnote{``Displaying your own annotations in the Genome Browser.'' \ac{UCSC} Genome Browser FAQ, \url{https://genome.ucsc.edu/goldenPath/help/customTrack.html\#lines}}
-Track files are not valid \ac{BED} \textbf{file}s --- valid \ac{BED} \textbf{file}s must not have any browser or track lines.
+Track files are not valid \ac{BED} \textbf{file}s---valid \ac{BED} \textbf{file}s must not have any browser or track lines.
 To distinguish between \ac{BED} \textbf{file}s and track files, track files should use the file extension~\texttt{.track}.
 
 \section{Acronyms}
 
 % using the optional argument to acronym to set the label width causes it to use the list environment instead of description, which means we can't set nosep easily
-\setlist[description]{labelwidth=\widthof{\textbf{GA4GH}},nosep}
+\setlist[description]{labelwidth=\widthof{\textbf{\acs{GA4GH}}},nosep}
 \begin{acronym}
   \acro{ASCII}{American Standard Code for Information Interchange}
   \acro{BED}{Browser Extensible Data}
@@ -421,8 +479,7 @@ \section{Acronyms}
 \section{Acknowledgments}
 
 We thank W.~James Kent and the \ac{UCSC} Genome Browser team for creating the \ac{BED} format.
-We thank W.~James Kent and Hiram Clawson~(\ac{UCSC}); Eric Roberts~(University Health Network); John Marshall~(University of Glasgow); Aaron R.~Quinlan and Brent S.~Pedersen~(University of Utah); Ting Wang~(Washington University in St.~Louis); Daniel Perrett and Simon Brent (Wellcome Sanger Institute); Jasper Saris (Erasmus Medical Center); Zhenyu Zhang (University of Chicago); Andrew Yates (\ac{EMBL}---European Bioinformatics Institute); and the \ac{GA4GH} File Formats Task Team for comments on this specification.
-
+We thank W.~James Kent and Hiram Clawson~(\ac{UCSC}); Eric Roberts~(University Health Network); John Marshall~(University of Glasgow); Aaron R.~Quinlan and Brent S.~Pedersen~(University of Utah); Ting Wang~(Washington University in St.~Louis); Daniel Perrett and Simon Brent~(Wellcome Sanger Institute); Jasper Saris~(Erasmus Medical Center); Zhenyu Zhang (University of Chicago); Andrew Yates~(\ac{EMBL}---European Bioinformatics Institute); Michael Schatz~(Johns Hopkins University); Igor Dolgalev (New York University); Colin Diesh~(University of California, Berkeley); Alex Reynolds~(Altius Institute for Biomedical Sciences); Junjun Zhang~(Ontario Institute for Cancer Research); and the \ac{GA4GH} File Formats Task Team for comments on this specification.
 
 \end{document}
 
diff --git a/Makefile b/Makefile
index 66873b3c6..2f3842220 100644
--- a/Makefile
+++ b/Makefile
@@ -56,6 +56,8 @@ NEW =
 diff/%.pdf: %.tex
 	BIBINPUTS=:.. TEXINPUTS=:..:../new latexdiff-vc --pdf --dir diff --force --git --only-changes --graphics-markup=none --ignore-warnings --revision $(OLD) $(if $(NEW),--revision $(NEW)) $<
 
+diff/BEDv1.pdf: BEDv1.tex
+	BIBINPUTS=:.. TEXINPUTS=:..:../new latexdiff-vc --config LATEX=lualatex --pdf --dir diff --force --git --only-changes --graphics-markup=none --ignore-warnings --revision $(OLD) $(if $(NEW),--revision $(NEW)) $<
 
 show-styles:
 	@sed -n '/\\usepackage/s/.*{\(.*\)}$$/\1/p' *.tex | sort | uniq -c