chapterTree.tex

\chapter{Tree}

\section{Binary Tree}
\subsection{Basic Operations}
\runinhead{Get parent ref.} To get a parent reference (implicitly), \textit{return the Node} of the current recursion function to its parent to maintain the path. Sample code:
\begin{python}
# delete minimum node in the BST
def delete_min(x: Node) -> Node:
    if not x.left:
        return x.right
    x.left = delete_min(x.left)
    return x
\end{python}

\runinhead{Max depth.} DFS solution
\begin{python}
def probe(self, cur, depth):
    if not root:
        return depth
    else:
        return max(
            self.probe(cur.left, depth+1), 
            self.probe(cur.right, depth+1),
      )
\end{python}

\runinhead{Min depth.} Definition of min depth, lowest depth of leaf node. 

Notice, that the additional checks are necessary of missing either right or left child.
\begin{python}
def probe(self, cur, depth):
    if not cur: 
        return depth
    elif cur.right and not cur.left: 
        return self.probe(cur.right, depth+1)
    elif cur.left and not cur.right: 
        return self.probe(cur.left, depth+1)
    else: 
        return min(
            self.probe(root.left, depth+1), 
            self.probe(root.right, depth+1),
        )
\end{python}

\runinhead{Height.} The height of a node is the number of edges from the node to the deepest leaf.

\begin{python}
def dfs(self, cur):
    if not cur:
        return -1  # leaves index start from 0

    height = 1 + max(
        self.dfs(cur.left), 
        self.dfs(cur.right),
    )
    
    # do something
     
    return height
\end{python}

Application: leaf by leaf traversal by height. 
\begin{python}
def dfs(self, cur, leaves):
    if not cur:
        return -1  # leaves index start from 0

    height = 1 + max(
        self.dfs(cur.left, leaves), 
        self.dfs(cur.right, leaves),
    )
    if height >= len(leaves):
        leaves.append([])  # grow

    leaves[height].append(cur.val)
    return height
\end{python}
\runinhead{Construct path from root to a target.} To search a node in binary tree (not necessarily BST), use dfs:
\begin{python}
def dfs(self, cur, target, path, found: List[bool]):
    # post function call check
    if not cur:
      return        
    if found[0]:
      return 

    path.append(cur)
    if cur == target:
        found[0] = True

    self.dfs(cur.left, target, path, found)
    self.dfs(cur.right, target, path, found)
    if not found[0]:
        path.pop()  # 1 pop() corresponds to 1 append()
\end{python}
The \pyinline{found} is a wrapper for boolean to keep it referenced by all calling stack. 

\runinhead{Lowest common ancestor.} In BST, the searching is straightforward. 
\begin{python}
def find_lca(self, cur, p, q):
    if p.val > cur.val and q.val > cur.val:
        return self.find_lca(cur.right, p, q)
    if p.val < cur.val and q.val < cur.val:
        return self.find_lca(cur.left, p, q)
    return cur
\end{python}

Method 1: In normal binary tree, construct the path from root to $node_1$ and $node_2$ respectively, and \textbf{diff} the two paths. Time complexity: $O(\lg n)$, space complexity: $O(\lg n)$. 

Method 2: If the parent pointer is provided, it is possible to reduce the space complexity to $O(1)$, by using two pointers: 
\begin{python}
def find_lca(n1, n2):
    if not n1 or not n2:
        return None 
        
    d1, d2 = depth(n1), depth(n2)
    if d2 < d1:
        return find_lca(n2, n1)  # swap
        
    # move to the same depth 
    for _ in range(d2-d1):
        n2 = n2.parent  

    while n1 and not n1 == n2:  
        n1 = n1.parent
        n2 = n2.parent
        
    return n1
\end{python}

Method 3: In normal bindary tree and O(1) space.
\begin{python}
def count(self, node, p, q):
    if not node:
        return 0

    lcount = self.count(node.left, p, q)
    rcount = self.count(node.right, p, q)
    mcount = 1 if node in (p, q) else 0
    ret = lcount + rcount + mcount
    # lcount == 1 and rcount == 1 # 
    # or lcount == 1 and mcount == 1 or ...
    if ret == 2:
        self.ans = node
    return ret
\end{python}

\runinhead{Find all paths.} Find all paths from root to leafs. 

For every currently visiting node, add itself to path; search left, search right and pop itself. Record current result when reaching the leaf.
\begin{python}
def dfs(self, cur, path, ret):
    if not cur:
        return

    path.append(cur)
    if not cur.left and not cur.right:
        ret.append("->".join(map(repr, path)))

    self.dfs(cur.left, path, ret)
    self.dfs(cur.right, path, ret)
    path.pop()  # 1 append 1 pop
\end{python}

\runinhead{Leftmost node.} Find the leftmost node. 
\begin{python}
def leftmost(self, cur, l):
    """
    :param l: offset from center 0, negative means left side. 
    """
    if not cur:
        return l
    return min(
        self.leftmost(cur.left, l-1), 
        self.leftmost(cur.right, l+1),
    )
\end{python}

Rightmost node can be similarly found.

\runinhead{Diameter of a tree (graph).} The diameter of a tree $\equiv$ longest path in the tree. Intuitily, we need to find one end of the edge, and then find the other end. 

Core clues:
\begin{enumerate}
\item \rih{Find one end of the edge.} Start from any vertex, bfs to reach the farthest leaf.
\item \rih{Find the other end.} Start from this leaf node, bfs to reach the other farthest leaf. 
\end{enumerate}
\begin{python}
_, _, last = self.bfs(0, V)
level, pi, last = self.bfs(last, V)

def bfs(self, s, V) -> int, List[Vertex], Vertex:
    # bfs
    visited = [False for _ in range(len(V))]
    # predecessor 
    pi = [-1 for _ in range(len(V))]
    last = s
    level = 0
    q = [s]
    while q:
        l = len(q)
        for i in range(l):
            cur = q[i]
            last = cur
            visited[cur] = True
            for nbr in V[cur]:
                if not visited[nbr]:
                    pi[nbr] = cur
                    q.append(nbr)

        q = q[l:]
        level += 1

    return level, pi, last
\end{python}

, where $V$ is the vertices of graph $G$.

\subsection{Morris Traversal} 
Traversal with O(1) space. \footnote{\href{http://www.cnblogs.com/AnnieKim/archive/2013/06/15/MorrisTraversal.html}{ref}}
Three ways of traversal: in-order, pre-order, post-order.
Time complexity $O(3n).$ Find \pyinline{pre} twice, traverse \pyinline{cur} once.
\begin{figure}[hbtp]
\centering
\subfloat{\includegraphics[height=1.2in]{morris_time}}
\caption{Morris traversal time complexity}
\label{fig:morrisTime}
\end{figure}

Core:
\begin{enumerate}
\item Threading from \textbf{in-order} predecessor to \pyinline{cur}.
\item In-order consumes the \pyinline{cur} when going right, pre-order when going left, post-order consumes the left subtree path when going right. 
\end{enumerate}
\subsubsection{In-order}
Given the current node \pyinline{cur}, we know the next child node. But how does its predecessor knows \pyinline{cur}? Assign the current node's in-order predecessor's right child to itself (threading). Two ptr \pyinline{cur}, \pyinline{pre}. 

Process:
\begin{enumerate}
\item If no left, \textit{consume} \pyinline{cur}, go right 
\item If left, find in-order predecessor \pyinline{pre}
\begin{enumerate}
\item If no thread (i.e. no \pyinline{pre} right child), assign it to \pyinline{cur}; go left
\item If thread, \textit{consume} \pyinline{cur}, go right. ($\equiv$ no left). 
\end{enumerate}
\end{enumerate}

\begin{figure*}[!htb]
\centering
\subfloat{\includegraphics[height=2.45in]{morris_inorder}}
\caption{Morris in-order traversal}
\label{fig:morrisInorder}
\end{figure*}

Code:
\begin{python}
def morris_inorder(self, root):
    cur = root
    while cur:
        if not cur.left:
            self.consume(cur)
            cur = cur.right
        else:  
            pre = cur.left
            while pre.right and pre.right != cur:
                pre = pre.right

            if not pre.right:
                pre.right = cur
                cur = cur.left
            else:
                pre.right = None
                self.consume(cur)  # when pop the thread
                cur = cur.right
\end{python}
\subsubsection{Pre-order}
Similar to in-order. Pre-order consume the current node when setting the thread rather than removing the thread as in in-order.

Process:
\begin{enumerate}
\item If no left, \textit{consume} \pyinline{cur}, go right 
\item If left, find in-order predecessor \pyinline{pre}
\begin{enumerate}
\item If no thread (i.e. no \pyinline{pre} right child), assign it to \pyinline{cur}; \textit{consume} \pyinline{cur}, go left
\item If thread, go right. ($\equiv$ no left, but no \textit{consume}, since consume before). 
\end{enumerate}
\end{enumerate}
Code:
\begin{python}
def morris_preorder(self, root):
    cur = root
    while cur:
        if not cur.left:
            self.consume(cur)
            cur = cur.right
        else:
            pre = cur.left
            while pre.right and pre.right != cur:
                pre = pre.right

            if not pre.right:
                pre.right = cur
                self.consume(cur)  # when set the thread
                cur = cur.left
            else:
                pre.right = None
                cur = cur.right
\end{python}
\subsubsection{Post-order}
More tedious but solvable. The process is also similar to in-order.

Process:
\begin{enumerate}
\item Set a temporary var \pyinline{dummy.left = root}
\item If no left, go right
\item If left, find the in-order predecessor \pyinline{pre} in left tree
\begin{enumerate}
\item If no thread, set \pyinline{right = cur} thread; go left.
\item If thread,  set \pyinline{right = None}, reversely \textit{consume} the path from \pyinline{cur.left} to \pyinline{pre}; go right.
\end{enumerate}
\end{enumerate}
\begin{figure*}[!htb]
\centering
\subfloat{\includegraphics[height=2.8in]{morris_postorder}}
\caption{Morris post-order traversal}
\label{fig:morrisInorder}
\end{figure*}
Code:
\begin{python}
def morris_postorder(self, root):
    dummy = TreeNode(0)
    dummy.left = root
    cur = dummy
    while cur:
        if not cur.left:
            cur = cur.right
        else:
            pre = cur.left
            while pre.right and pre.right != cur:
                pre = pre.right

            if not pre.right:
                pre.right = cur
                cur = cur.left
            else:
                pre.right = None
                self.consume_path(cur.left, pre)
                cur = cur.right

def _reverse(self, fr, to):
    """Like reversing linked list"""
    if fr == to: return
    cur = fr
    nxt = cur.right
    while cur and nxt and cur != to:
        nxt.right, cur, nxt = cur, nxt, nxt.right

def consume_path(self, fr, to):
    self._reverse(fr, to)

    cur = to
    self.consume(cur)
    while cur != fr:
        cur = cur.right
        self.consume(cur)

    self._reverse(to, fr)
\end{python}

\section{Binary Search Tree (BST)}
\runinhead{Array and BST.}Given either the \textbf{preorder} or \textbf{postorder} (but not inorder) traversal of a BST containing N distinct keys, it is possible to reconstruct the shape of the BST. 
\subsection{Property} $\forall$ node, the node value is larger than the largest value in its left subtree; and is smaller than the smallest value in the righhht subtree:
$$
\max(node.left) \leq node.val \leq \min(node.right)
$$

Leftmost node is the smallest node of the tree; rightmost node is the largest node of the tree.

Find such info takes $O(n\lg n)$ for all subtrees; and we can cache such info into the following data structure to achieve $O(n)$.

\begin{python}
class BSTInfo:
    def __init__(self, sz, lo, hi):
        self.sz = sz
        self.lo = lo
        self.hi = hi
\end{python}
\subsection{Rank}
\runinhead{Calculates rank.}
\begin{enumerate}
\item When inserting: 
  \begin{enumerate}
  \item insert to an existing node: \pyinline{node.cnt_this += 1}
  \item insert to left subtree: \pyinline{node.cnt_left += 1}
  \item insert to right subtree: do nothing. 
\end{enumerate}
\item When querying rank:
  \begin{enumerate}
  \item query equals current node: \pyinline{return node.cnt_left}
  \item query goes to \textbf{left} node: \pyinline{return rank(node.left, val)};
  \item query goes to \textbf{right} node: \pyinline{return node.cnt_left} \pyinline{+ node.cnt_this + rank(node.right, val)}
  \end{enumerate}
Notice that the \pyinline{rank} calculates a val's rank in a subtree.
\end{enumerate}

\runinhead{Count of smaller number before itself.} Given an array $A$. For each element $A_i$ in the array, count the number of element before this element $A_i$ is smaller than it and return count number array. Average $O(n \log n)$
\\
Clues:
\begin{enumerate}
\item Put $A[:i+1]$ into a BST; so as to count the rank of $A[i]$ in the BST
\end{enumerate}
Codes:
\begin{python}
class Node:
  def __init__(self, val):
    """Records the left subtree size"""
    self.val = val
    self.cnt_left = 0
    self.cnt_this = 0
    self.left, self.right = None, None


class BST:
  def __init__(self):
    self.root = None

  def insert(self, root, val):
    """
    :return: subtree's root after insertion
    """
    if not root:
      root = Node(val)

    if root.val == val:
      root.cnt_this += 1
    elif val < root.val:
      root.cnt_left += 1
      root.left = self.insert(root.left, val)
    else:
      root.right = self.insert(root.right, val)

    return root

  def rank(self, root, val):
    """
    Rank in the root's subtree
    :return: number of items smaller than val
    """
    if not root:
      return 0
    if root.val < val:
      return (root.cnt_this+root.cnt_left+
              self.rank(root.right, val))
    elif root.val == val:
      return root.cnt_left
    else:
      return self.rank(root.left, val)


class Solution:
  def countOfSmallerNumberII(self, A):
    tree = BST()
    ret = []
    for a in A:
      tree.root = tree.insert(tree.root, a)
      ret.append(tree.rank(tree.root, a))

    return ret
\end{python}
Notice: if worst case $O(n \log n)$ is required, need to use Red-Back Tree - Section \ref{rbtree}. However, there is a more elegant way using Segment Tree - Section \ref{segmentTreeInversionCount}.


\subsection{Range search}
\runinhead{1-d range count}
\begin{java}
int size(Key lo, Key hi) {
    if (contains(hi)) return rank(hi)-rank(lo)+1;
    else              return rank(hi)-rank(lo);
}
\end{java}

\runinhead{Closest value} Find the value in BST that is closet to the \pyinline{target}.
\\
Clues:
\begin{enumerate}
\item Find the value just $\leq$ the target.
\item Find the value just $\geq$ the target.
\end{enumerate}
\
\\
Code for finding either the lower value or higher value:
\begin{python}
def find(self, root, target, ret, lower=True):
  """ret: result container"""
  if not root: return

  if root.val == target:
    ret[0] = root.val
    return

  if root.val < target:
    if lower:
      ret[0] = max(ret[0], root.val)

    self.find(root.right, target, ret, lower)
  else:
    if not lower:
      ret[0] = min(ret[0], root.val)

    self.find(root.left, target, ret, lower)
\end{python}

\runinhead{Closet values} Find $k$ values in BST that are closet to the \pyinline{target}.
\\\\
Clues:
\begin{enumerate}
\item Find the predecessors $\triangleq \{node | node.value \leq target\}$. Store in the stack. 
\item Find the successors $\triangleq \{node | node.value \geq target\}$. Store in the stack.
\item Merge the predecessors and successors as in merge in MergeSort to get the $k$ values. 
\end{enumerate}
\
\\
Code for finding the predecessors:
\begin{python}
def predecessors(self, root, target, stk):
  if not root: return

  self.predecessors(root.left, target, stk)
  if root.val <= target:
    stk.append(root.val)
    self.predecessors(root.right, target, stk)
\end{python}


\section{Binary Index Tree (BIT)}\label{BIT}
\subsection{Introduction}
A Fenwick tree or binary indexed tree is a data structure that can efficiently update elements and calculate prefix sums in a table of numbers.

Compared to Segment Tree \ref{section:segmentTree}, BIT is shorter and more elegant. BIT can do most of things that Segment Tree can do and it is easier to code. BIT updates and queries $$i\rightarrow prefixSum$$ in $O(\log n)$ time; however, Segment Tree can but BIT cannot query $$prefixSum \rightarrow i$$
\subsection{Implementation}
Given an array $A$ of length $n $ starting from $1$. prefix sum $s[i]\triangleq A_1+...+A_i$. BIT uses binary to maintain the array of prefix sum for querying and updating. For $i$-th node in the BIT, 
$$
N[i]=A_{j+1}+...+A_i
$$
, where $j=i-lowbit(i)$, i.e. set $i$'s lowest bit 1 to 0. $lowbit(i)$ can be defined as \pyinline{return i & -i}, using 2's complement. Notice that the summation ends with $A_i$ since easier to \pyinline{set}.

For the range, we use $(j, i]$ here instead of $[j, i)$ since more elegant for \pyinline{get(i)} and \pyinline{set(i)}
\\\\
Clues:
\begin{enumerate}
\item Binary 
\item Low bit
\item BIT uses array index starting from \textbf{1}, because 0 doesn't have $lowbit$. 0 is the dummy root.
\end{enumerate}
\begin{figure}[hbtp]
\centering
\subfloat{\includegraphics[height=1.1in]{BITget}}
\caption{Binary Indexed Tree \textit{get} Operation}
\label{fig:LABEL}
\end{figure}

\begin{figure}[hbtp]
\centering
\subfloat{\includegraphics[height=1.05in]{BIT}}
\caption{Binary Indexed Tree \textit{set} Operation}
\label{fig:LABEL}
\end{figure}

Time complexity, longest update is along the leftmost branch, which takes $O(\log_2 n)$ (e.g. 1, 10, 100, 1000, 10000); longest query is along a branch starting with node with all 1's (e.g. 1111, 1110, 1100, 1000), which also takes $O(\log_2 n)$.

Code:
\begin{python}
class BIT:
    def __init__(self, n):
        """
        BIT uses index starting from 1
        0 is the dummy root 
        """
        self.N = [0 for _ in range(n+1)]

    def lowbit(self, i):
        return i & -i

    def get(self, i):
        ret = 0
        while i > 0:
            ret += self.N[i]
            i -= self.lowbit(i)

        return ret
        
    def set(self, i, val):
        while i < len(self.N):
            self.N[i] += val
            i += self.lowbit(i)
\end{python}


\section{Segment Tree}\label{section:segmentTree}
\subsection{Introduction}
Segment Tree is specially built for \textit{range queries}. 

The structure of Segment Tree is a binary tree which each node has two attributes start and end denote an segment/interval. 

Notice that by practice, the interval is normally $[start, end)$ but sometimes it can be $[start, end]$, which depends on the question definition. 

Structure:  
\begin{lstlisting}[columns=flexible]
# a Count Segment Tree
                     [0, 4, count=3]
                     /             \
          [0,2,count=1]             [2,4,count=2]
          /         \               /            \
   [0,1,count=1] [1,2,count=0] [2,3,count=1], [3,4,count=1]
\end{lstlisting}
Variants:
\begin{enumerate}
\item Sum Segment Tree.
\item Min/Max Segment Tree.
\item Count Segment Tree. 
\end{enumerate}

For a Maximum Segment Tree, which each node has an extra value max to store the maximum value in this node's interval.

\subsection{Operations}
Segment Tree does a decent job for range queries.
\\
Components in Segment Tree operations:
\begin{enumerate}
\item Build
\item Query 
\item Modify
\item Search 
\end{enumerate}
Notice:
\begin{enumerate}
\item Only build need to change the start and end recursively.
\item Pre-check is preferred in recursive calls.
\end{enumerate}
Code: Notice the code has abstracted out segment tree functions of sum, min/max or count, by abstracting the subtree combine function to \pyinline{lambda}.
\begin{python}
DEFAULT = 0
f = lambda x, y: x+y


class Node:
    def __init__(self, start, end, val):
        self.lo, self.hi, self.val = lo, hi, val
        self.left, self.right = None, None


class SegmentTree:
    def __init__(self, A):
        self.A = A
        self.root = self.build_tree(0, len(self.A))

    def build_tree(self, lo, hi):
        """
        Bottom-up build
        segment: [lo, hi)
        Either check lo==hi-1 or have root.right 
        only if have root.left
        """
        if lo >= hi:   return None
        if lo == hi-1: return Node(lo, hi, self.A[lo])

        left  = self.build_tree(lo, (lo+hi)/2)
        right = self.build_tree((lo+hi)/2, hi)

        val = DEFAULT
        if left:  val = f(val, left.val)
        if right: val = f(val, right.val)
        root = Node(lo, hi, val)
        root.left  = left
        root.right = right

        return root

    def query(self, root, lo, hi):
        """
        Post-checking
        :type root: Node
        """
        if not root:
            return DEFAULT

        if lo <= root.lo and hi >= root.hi:
            return root.val

        if lo >= root.hi or  hi <= root.hi:
            return DEFAULT

        l = self.query(root.left,  lo, hi)
        r = self.query(root.right, lo, hi)
        return f(l, r)

    def modify(self, root, idx, val):
        """
        :type root: Node
        """
        if not root or idx < root.lo or idx >= root.hi:
            return

        if idx == root.lo and idx == root.hi-1:
            root.val = val
            self.A[idx] = val
            return

        self.modify(root.left,  idx, val)
        self.modify(root.right, idx, val)

        val = DEFAULT
        if root.left:  val = f(val, root.left.val)
        if root.right: val = f(val, root.right.val)
        
        root.val = val
\end{python}
The above code abstracts out segment tree function using \pyinline{lambda}. For a concrete example, see Count Segment Tree \ref{inversionReconstruct}. 

\section{Trie}
\subsection{Basic}
Trie is aka radix tree, prefix tree. 
\begin{figure}[hbtp]
\centering
\subfloat{\includegraphics[scale=.30]{trie.jpg}}
\caption{Trie}
\label{fig:trie} 
\end{figure}
\runinhead{Notice:}
\begin{enumerate}
\item Children are stored in HashMap rather than ArrayList. 
\item \pyinline{self.word} to stores the word and indicates whether a word ends at the current
node. 
\end{enumerate}
Codes:
\begin{python}
class TrieNode:
    def __init__(self, char):
        self.char = char
        self.word = None
        self.children = {}  # map from char to TrieNode


class Trie:
    def __init__(self):
        self.root = TrieNode(None)

    def add(self, word):
        cur = self.root
        for c in word:
            if c not in cur.children:
                cur.children[c] = TrieNode(c)
            cur = cur.children[c]
            
        cur.word = word
\end{python}

\subsection{Advanced}
Storage of words in TrieNode: 
\begin{enumerate}
\item Implicitly store the current word in the trie with a mark of \pyinline{is_ended}. 
\item Store the current char. 
\item When insert new word, do not override the existing TrieNode. A flag to indicate
whether there is a word ending here.
\end{enumerate}
\newpage
Code:
\begin{python}
class TrieNode:
    def __init__(self):
        # Implicit storage
        self.ended = False
        self.children = {}


class Trie:
    def __init__(self):
        self.root = TrieNode()

    def insert(self, word):
        cur = self.root
        for w in word:
            if w not in cur.children:   # not override
                cur.children[w] = TrieNode()

            cur = cur.children[w]

        cur.ended = True

    def search(self, word):
        cur = self.root
        for w in word:
            if w in cur.children:
                cur = cur.children[w]
            else:
                return False

        if not cur.ended:  # not ended here
            return False

        return True

    def startsWith(self, prefix):
        cur = self.root
        for w in prefix:
            if w in cur.children:
                cur = cur.children[w]
            else:
                return False

        return True
\end{python}
\subsection{Simplified Trie}
Simplified trie with dict as TrieNode
\begin{python}
root = {}
ends = []
for word in set(words):
    cur = root
    for c in word:
        nxt = cur.get(c, {})
        cur[c] = nxt
        cur = nxt

    ends.append((cur, len(word)))
\end{python}
\subsection{The Most Simplified Trie}
\begin{python}
# constructor 
TrieNode = lambda: defaultdict(TrieNode) 

# or
class TrieNode:
    def __init__(self):
        self.children = defaultdict(TrieNode)
        self.attr = None  # some attr with default values
        self.word = None  # a word ends here, value or index
\end{python}
\subsection{Extensions}
\runinhead{Search for multiple words} Search for combination of words e.g. ``unitedstates''.  When one word ended, start the search again from the root. One trick is to add threads between tails and the root; thus enable the search for multi-word combinations. 
\begin{figure}[!hbt]
\centering
\subfloat{\includegraphics[width=\linewidth]{trie2.png}}
\caption{Trie with threads from ending point to root}
\label{fig:trie2}
\end{figure}
\subsection{Applications}
\begin{enumerate}
\item Word search in matrix.
\item Word look up in dictionary.
\end{enumerate}