[cvs] / ribopal / transducer.tex Repository:
ViewVC logotype

View of /ribopal/transducer.tex

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.110 - (download) (as text) (annotate)
Thu May 20 23:32:14 2010 UTC (3 months, 3 weeks ago) by oscar
Branch: MAIN
CVS Tags: HEAD
Changes since 1.109: +46 -15 lines
small changes in DP algorithms
\documentclass[10pt]{article}

% amsmath package, useful for mathematical formulas
\usepackage{amsmath}
% amssymb package, useful for mathematical symbols
\usepackage{amssymb}

% graphicx package, useful for including eps and pdf graphics
% include graphics with the command \includegraphics
\usepackage{graphicx}

% Color
\usepackage{color} 

% If-then-else
\usepackage{ifthen}

% parse trees
\input{parsetree.sty}

% algorithms
\usepackage[boxed,noend]{algorithm2e}

% Transducer-related latex command definitions
\newcommand\charat[2]{\mbox{symbol}(#1,#2)}

\newcommand\gappedalphabet[1]{(\Omega_{#1} \cup \{\epsilon\})}
\newcommand\gapsquared{\gappedalphabet{}^2}
\newcommand\gappedpair[2]{\gappedalphabet{#1} \times \gappedalphabet{#2}}

\newcommand\wtrans[4]{#1(#2 : [#3] : #4)}
\newcommand\transequiv{\cong}

\newcommand\compose{\circ}
\newcommand\identity{{\cal I}}

\newcommand\fork{\otimes}
\newcommand\idfork{\Upsilon}
\newcommand\forkn[1]{\idfork(#1)}
\newcommand\forkfun[2]{\forkn{#1, #2}}

\newcommand\States{\Phi}
\newcommand\statesof[1]{\States_{#1}}

\newcommand\Transitions{\tau}
\newcommand\transitionsof[1]{\Transitions_{#1}}

\newcommand\startstate{\phi_S}
\newcommand\laststate{\phi_E}
\newcommand\startstateof[1]{\phi_{S;#1}}
\newcommand\laststateof[1]{\phi_{E;#1}}

\newcommand\weight{{\cal W}}
\newcommand\weightfunof[1]{\weight_{#1}}
\newcommand\transweightfun[1]{\weightfunof{#1}^{\mbox{\small trans}}}
\newcommand\emitweightfun[1]{\weightfunof{#1}^{\mbox{\small emit}}}

\newcommand\sumoverpaths[1]{\transweightfun{#1}(\{\pi_{#1}\})}

\newcommand\transviawait[1]{\weightfunof{#1}^{\mbox{\small via-wait}}}
\newcommand\transtowait[1]{\weightfunof{#1}^{\mbox{\small to-wait}}}

\newcommand\numberofstates[1]{|\statesof{#1}|}
\newcommand\numberoftransitions[1]{|\transitionsof{#1}|}

\newcommand\statesoftype[1]{\States_{#1}}
\newcommand\statetype{\mbox{type}}

\newcommand\dup[1]{\left( \begin{array}{l} #1 \\ #1 \end{array} \right)}

\newcommand\numberofleaves{\kappa}
\newcommand\numberofinternalnodes{\numberofleaves - 1}
\newcommand\numberofnodes{2\numberofleaves - 1}
\newcounter{LeafIndex}
\newcommand\leafnode[1]{\numberofleaves \ifthenelse{\equal{#1}{1}}{}{\setcounter{LeafIndex}{#1} \addtocounter{LeafIndex}{-1} +\arabic{LeafIndex}}}

\newcommand\seqlen[1]{|#1|}

\newcommand\outputs{{\cal S}}
\newcommand\outputn[1]{\outputs_{#1}}
\newcommand\outseqlen[1]{\seqlen{\outputn{#1}}}

\newcommand\order[1]{{\cal O}(#1)}

% State- and type-sets
\newcommand\typeset[1]{{\cal T}_{\mbox{\small #1}}}
\newcommand\stateset[1]{\statesof{\mbox{\small #1}}}

% H_n state tuples, state-sets and type-sets
\newcommand\hstate{(\upsilon,b_l,e_l,b_r,e_r)}
\newcommand\hstatedest{(\upsilon',b'_l,e'_l,b'_r,e'_r)}

\newcommand\externalsuffix{ext}
\newcommand\internalsuffix{int}
\newcommand\leftsuffix{left-int}
\newcommand\rightsuffix{right-int}
\newcommand\waitsuffix{wait}

\newcommand\externalcascades{\stateset{\externalsuffix}}
\newcommand\internalcascades{\stateset{\internalsuffix}}
\newcommand\leftcascades{\stateset{\leftsuffix}}
\newcommand\rightcascades{\stateset{\rightsuffix}}
\newcommand\waitstates{\stateset{\waitsuffix}}

\newcommand\externaltypes{\typeset{\externalsuffix}}
\newcommand\internaltypes{\typeset{\internalsuffix}}
\newcommand\lefttypes{\typeset{\leftsuffix}}
\newcommand\righttypes{\typeset{\rightsuffix}}
\newcommand\waittypes{\typeset{\waitsuffix}}

% M_n state tuples, state-sets and type-sets
\newcommand\mstate{(\rho,\upsilon,b_l,e_l,b_r,e_r)}
\newcommand\mstatedest{(\rho',\upsilon',b'_l,e'_l,b'_r,e'_r)}

% Q_n state tuples, state-sets and type-sets
\newcommand\qstate{(\rho,\upsilon,b_l,b_r)}
\newcommand\qstatedest{(\rho',\upsilon',b'_l,b'_r)}

\newcommand\matchsuffix{match}
\newcommand\nullsuffix{null}
\newcommand\leftinsertsuffix{left-ins}
\newcommand\rightinsertsuffix{right-ins}
\newcommand\leftdeletesuffix{left-del}
\newcommand\rightdeletesuffix{right-del}
\newcommand\leftemitsuffix{left-emit}
\newcommand\rightemitsuffix{right-emit}
\newcommand\qwaitsuffix{wait}

\newcommand\matchstates{\stateset{\matchsuffix}}
\newcommand\nullstates{\stateset{\nullsuffix}}
\newcommand\leftinsertstates{\stateset{\leftinsertsuffix}}
\newcommand\rightinsertstates{\stateset{\rightinsertsuffix}}
\newcommand\leftdeletestates{\stateset{\leftdeletesuffix}}
\newcommand\rightdeletestates{\stateset{\rightdeletesuffix}}
\newcommand\leftemitstates{\stateset{\leftemitsuffix}}
\newcommand\rightemitstates{\stateset{\rightemitsuffix}}
\newcommand\qwaitstates{\stateset{\qwaitsuffix}}

\newcommand\matchtypes{\typeset{\matchsuffix}}
\newcommand\nulltypes{\typeset{\nullsuffix}}
\newcommand\leftinserttypes{\typeset{\leftinsertsuffix}}
\newcommand\rightinserttypes{\typeset{\rightinsertsuffix}}
\newcommand\leftdeletetypes{\typeset{\leftdeletesuffix}}
\newcommand\rightdeletetypes{\typeset{\rightdeletesuffix}}
\newcommand\leftemittypes{\typeset{\leftemitsuffix}}
\newcommand\rightemittypes{\typeset{\rightemitsuffix}}
\newcommand\qwaittypes{\typeset{\qwaitsuffix}}


% DP
\newcommand\envelope[2]{is\_in\_envelope(#1,#2)}

\newcommand\newTransName[1]{t_{#1}}

\newcommand\numStates[1]{N_{#1}}

\newcommand\leftFromI{i_l}
\newcommand\rightFromI{i_r}

\newcommand\rightToI{i'_r}
\newcommand\leftToI{i'_l}

\newcommand\rightProfTo{e'_r}
\newcommand\leftProfTo{e'_l}

\newcommand\rightProfFrom{e_r}
\newcommand\leftProfFrom{e_l}

\newcommand\mTo{m'}
\newcommand\mFrom{m}

\newcommand\qTo{q'}
\newcommand\qFrom{q}

\newcommand\emitProb{\mathcal{E}}
\newcommand\getprofiletype{get\_state\_type}
\newcommand\incomingLeftProfile[1]{\mbox{incoming\_left\_profile\_indices}(#1)}
\newcommand\incomingRightProfile[1]{\mbox{incoming\_right\_profile\_indices}(#1)}
\newcommand\incomingM[1]{\mbox{incoming\_match\_states}(#1)}
\newcommand\incomingL[1]{\mbox{incoming\_left\_emit\_states}(#1)}
\newcommand\incomingR[1]{\mbox{incoming\_right\_emit\_states}(#1)}

\newcommand\addToDPFunction{sum\_paths\_to}   % not sure if this is the best name, but better than fillZ I think - IH
\newcommand\addToDP[3]{\addToDPFunction(#1,#2,#3)} % the arguments are q', e'_l, e'_r

\newcommand\profTrans[1]{t_{#1}}
\newcommand\profiledelete[1]{\phi_D^{(#1)}}
\newcommand\profileunknown[1]{\phi_{\tau}^{(#1)}}
\newcommand\profilewait[1]{\phi_W^{(#1)}}
\newcommand\profileterminate{\profilewait{\mbox{\small end}}}

\newcommand\currentstate{m'}
\newcommand\newstate{m}
\newcommand\instates{states\_in}
\newcommand\inweights{weights\_in}
\newcommand\sample{sample}
\newcommand\stateindex[2]{index\_of_{#1}(#2)}

\newcommand\emptyarray{ ( )}



% Begin
\begin{document}



% Notes by IH
\newcommand\redpen[1]{{\bf \textcolor{red}{#1}} \textcolor{black}{}}


% Start of document

\tableofcontents

\section{Transducer formalism}
In this section we make some of our definitions more precise,
 including notation for state types, weights (i.e. probabilities),
 transducer composition, etc.

Notation relating to sequences (sequence length, sequence concatenation, etc.) is deferred to the end of the document,
 so as not to interrupt the flow.

We first review the letter transducer $T$,
 transduction weight $\wtrans{\weight}{x}{T}{y}$ and
 equivalence $T \transequiv T'$.

We then define two operations for combining transducers:
 composition ($T \compose U$) which unifies $T$'s output with $U$'s input,
and
 fork ($T \fork U$) which unifies $T$'s and $U$'s input.

We define our ``normal'' form for letter transducers,
 partitioning the states and transitions into types $\{S,M,D,I,N,W,E\}$ based on their input/output labeling.
This normal form is common in the bioinformatics literature (see e.g. Durbin et al)
 and forms the basis for our previous constructions of phylogenetic transducers (Holmes 2003; Bradley and Holmes, 2009).

We define exact-match and identity transducers, and give constructions of these.

We define our hierarchy of phylogenetic transducers, and give constructions and inference algorithms.

\subsection{Input-output automata}
{\em The letter transducer} is a tuple $T = (\Omega_I, \Omega_O, \States, \startstate, \laststate, \Transitions, \weight)$
where
 $\Omega_I$ is an input alphabet,
 $\Omega_O$ is an output alphabet,
$\States$ is a set of states,
$\startstate \in \States$ is the start state,
$\laststate \in \States$ is the end state,
$\Transitions \subseteq \States \times \gappedalphabet{I} \times \gappedalphabet{O} \times \States$ is the transition relation, and
$\weight:\Transitions \to [0,\infty)$ is the transition weight function.

{\em Transition paths:}
The transitions in $\Transitions$ correspond to the edges of a labeled multidigraph over states in $\States$.
Let $\Pi \subset \Transitions^\ast$ be the set of all labeled transition paths from $\startstate$ to $\laststate$.

{\em I/O sequences:}
Let
$S_I:\Pi \to \Omega_I^\ast$ and
$S_O:\Pi \to \Omega_O^\ast$
be functions returning the input and output sequences of a transition path,
obtained by concatenating the respective transition labels.

{\em Transduction weight:}
For a transition path $\pi \in \Pi$,
define the path weight $\weight(\pi)$ and
(for sequences $x \in \Omega_I^\ast, y \in \Omega_O^\ast$)
the transduction weight $\wtrans{\weight}{x}{T}{y}$

\begin{eqnarray*}
\weight(\pi) & = & \prod_{\Transitions \in \pi} \weight(\Transitions) \\
\wtrans{\weight}{x}{T}{y} & = & \sum_{\pi \in \Pi, S_I(\pi)=x, S_O(\pi)=y} \weight(\pi)
\end{eqnarray*}

{\em Equivalence:}
If for transducers $T,T'$ it is true that $\wtrans{\weight}{x}{T}{y}=\wtrans{\weight'}{x}{T'}{y}\ \forall x,y$ then the transducers are equivalent in weight, $T \transequiv T'$.

\subsection{State types}
{\em Types of state and transition:}
If there exists a state type function, $\statetype:\States \to {\cal T}$, mapping states to types in ${\cal T} = \{S,M,D,I,N,W,E\}$,
and functions $\transweightfun{}: \States^2 \to [0,\infty)$ and $\emitweightfun{}: \gappedpair{I}{O} \times \States \to [0,\infty)$,
such that
\begin{eqnarray*}
\States_U & = & \{ \phi: \phi\in\States, \statetype(\phi) \in U \subseteq {\cal T} \} \\
\statesoftype{S} & = & \{ \startstate \} \\
\statesoftype{E} & = & \{ \laststate \} \\
\States & \equiv & \statesoftype{SMDINWE} \\   % redundant
\Transitions_M & \subseteq & \statesoftype{W} \times \Omega_I \times \Omega_O \times \statesoftype{M} \\
\Transitions_D & \subseteq & \statesoftype{W} \times \Omega_I \times \{\epsilon\} \times \statesoftype{D} \\
\Transitions_I & \subseteq & \statesoftype{SMDIN} \times \{\epsilon\} \times \Omega_O \times \statesoftype{I} \\
\Transitions_N & \subseteq & \statesoftype{SMDIN} \times \{\epsilon\} \times \{\epsilon\} \times \statesoftype{N} \\
\Transitions_W & \subseteq & \statesoftype{SMDIN} \times \{\epsilon\} \times \{\epsilon\} \times \statesoftype{W} \\
\Transitions_E & \subseteq & \statesoftype{W} \times \{\epsilon\} \times \{\epsilon\} \times \statesoftype{E} \\
\Transitions & = & \Transitions_M \cup \Transitions_D \cup \Transitions_I \cup \Transitions_N \cup \Transitions_W \cup \Transitions_E \\
\weight(\phi_{\mbox{src}},\omega_{\mbox{in}},\omega_{\mbox{out}},\phi_{\mbox{dest}}) & \equiv & \transweightfun{}(\phi_{\mbox{src}},\phi_{\mbox{dest}}) \emitweightfun{}(\omega_{\mbox{in}},\omega_{\mbox{out}},\phi_{\mbox{dest}})
\end{eqnarray*}
then the transducer is in {\em (weak) normal form}.
If, additionally, $\statesoftype{N} = \emptyset$, then the transducer is in {\em strict normal form}.

{\em Interpretation:}
A normal-form transducer can be thought of as associating inputs and outputs with states, rather than transitions.
The state types are
 start ($S$) and end ($E$);
 wait ($W$), in which the transducer waits for input;
 match ($M$) and delete ($D$), which process input symbols;
 insert ($I$), which writes additional output symbols; and
 null ($N$), which has no associated input or output.
All transitions also fall into one of these types, via the destination states;
thus, $\Transitions_M$ is the set of transitions ending in a match state, etc.
The transition weight ($\weight$) factors into a term that is independent of the input/output label ($\transweightfun{}$)
and a term that is independent of the source state ($\emitweightfun{}$).

{\em Universality:}
For any weak-normal form transducer $T$ there exists an equivalent in strict-normal form which can be found by applying the state-marginalization algorithm to eliminate null states.
For any transducer, there is an equivalent letter transducer in weak normal form, and therefore, in strict normal form.

\subsection{Composition ($T\compose U$) unifies output of $T$ with input of $U$}

{\em Transducer composition:}
Given letter transducers
 $T = (\Omega_X, \Omega_Y, \States, \startstate, \laststate, \Transitions, \weight)$ and
 $U = (\Omega_Y, \Omega_Z, \States', \startstate', \laststate', \Transitions', \weight')$,
there exists a letter transducer $T\compose U = (\Omega_X, \Omega_Z, \States'' \ldots \weight'')$ 
such that $\forall x \in \Omega_X^\ast, z \in \Omega_Z^\ast$:
\[
\wtrans{\weight''}{x}{T\compose U}{z} = \sum_{y\in\Omega_Y^\ast} \wtrans{\weight}{x}{T}{y} \wtrans{\weight'}{y}{U}{z}
\]

{\em Example construction:}
Assume without loss of generality that $T$ and $U$ are in strict normal form.
Then
\begin{eqnarray*}
\States'' & \subseteq & \States \times \States' \\
\weight''((t,u),\omega_x,\omega_z,(t',u')) & = & \left\{ \begin{array}{ll}
\delta(t=t') \delta(\omega_x=\epsilon) \weight'(u,\epsilon,\omega_z,u') & \mbox{if $\statetype(u) \neq W$} \\
\weight(t,\omega_x,\epsilon,t') \delta(\omega_z=\epsilon) \delta(u=u') & \mbox{if $\statetype(u) = W,\statetype(t') \notin \{M,I\}$} \\
\displaystyle
\sum_{\omega_y \in \Omega_Y} \weight(t,\omega_x,\omega_y,t') \weight'(u,\omega_y,\omega_z,u') & \mbox{if $\statetype(u) = W,\statetype(t') \in \{M,I\}$} \\
0 & \mbox{otherwise}
\end{array} \right.
\end{eqnarray*}
The resulting transducer is in weak-normal form (it can be converted to a strict-normal form transducer by eliminating null states).

\subsection{Fork ($T\fork U$) unifies input of $T$ with input of $U$}

{\em Transducer fork:}
Given letter transducers
 $T = (\Omega_X, \Omega_T, \States, \startstate, \laststate, \Transitions, \weight)$ and
 $U = (\Omega_X, \Omega_U, \States', \startstate', \laststate', \Transitions', \weight')$,
there exists a letter transducer $T\fork U = (\Omega_X, \Omega_V, \States'' \ldots \weight'')$
where $\Omega_V \subseteq \gappedpair{T}{U}$
such that $\forall x \in \Omega_X^\ast, t \in \Omega_T^\ast, u \in \Omega_U^\ast$:
\[
\wtrans{\weight}{x}{T}{t} \wtrans{\weight'}{x}{U}{u} = \wtrans{\weight''}{x}{T\fork U}{(t,u)}
\]
where the term on the right is defined as follows
\[
\wtrans{\weight''}{x}{T\fork U}{(t,u)} = \sum_{v \in \Omega_V^\ast, S_1(v)=t, S_2(v)=u } \wtrans{\weight''}{x}{T\fork U}{v}
\]

{\em Example construction:}
Assume without loss of generality that $T$ and $U$ are in strict normal form.
Then
\begin{eqnarray*}
\States'' & \subseteq & \States \times \States' \\
\weight''((t,u),\omega_x,(\omega_y,\omega_z),(t',u')) & = & \left\{ \begin{array}{ll}
\delta(t=t') \delta(\omega_x=\omega_y=\epsilon) \weight'(u,\epsilon,\omega_z,u') & \mbox{if $\statetype(u) \neq W$} \\
\weight(t,\epsilon,\omega_x,t') \delta(\omega_x=\omega_z=\epsilon) \delta(u=u') & \mbox{if $\statetype(u) = W,\statetype(t) \neq W$} \\
\weight(t,\omega_x,\omega_y,t') \weight'(u,\omega_x,\omega_z,u') & \mbox{if $\statetype(t) = \statetype(u) = W$} \\
0 & \mbox{otherwise}
\end{array} \right.
\end{eqnarray*}
The resulting transducer is in weak-normal form (it can be converted to a strict-normal form transducer by eliminating null states).

\subsection{Identities}

{\em Identity:}
There exists a transducer $\identity=(\Omega,\Omega\ldots)$ that copies its input identically to its output.
An example construction (not in normal form) is
\begin{eqnarray*}
\identity & = & (\Omega,\Omega,\{\phi\},\phi,\phi,\transitionsof{\identity},1) \\
\transitionsof{\identity} & = & \left\{(\phi,\omega,\omega,\phi):\omega\in\Omega\right\}
\end{eqnarray*}

{\em Identity-fork:}
There exists a transducer $\idfork=(\Omega,\Omega^2\ldots)$ that duplicates its input in parallel.
That is, for input $x_1 x_2 x_3 \ldots$ it gives output $\dup{x_1}\dup{x_2}\dup{x_3}\ldots$.
An example construction (not in normal form) is
\begin{eqnarray*}
\idfork & = & (\Omega,\Omega^2,\{\phi\},\phi,\phi,\transitionsof{\idfork},1) \\
\transitionsof{\idfork} & = & \left\{\left(\phi,\omega,\dup{\omega},\phi\right):\omega\in\Omega\right\}
\end{eqnarray*}
It can be seen that $\idfork \transequiv \identity \fork \identity$.

Define $\forkfun{T}{U} \equiv T \fork U$.   % \equiv not \transequiv because we are defining
Thus, a fork is like a parallel composition, since this is a unification of $\idfork$'s two outputs with the inputs of $T$ and $U$.

We can illustrate this with a diagram:
\begin{parsetree}
 ( .$\idfork$. .$T$. .$U$. )
\end{parsetree}

We use the notation $\forkfun{T}{U}$ in several places.
In our phylogenetic transducer, for example,
\[
H_n = \forkfun{B_l \compose E_l}{B_r \compose E_r} = (B_l \compose E_l) \fork (B_r \compose E_r)
\]
and we write $H_n$-states as a tuple $\hstate$
where $\upsilon\in\{S,W,M,E\}$ denotes the state of a normal-form construction of $\idfork$.

\subsection{Exact-match transducers}

{\em Exact-match transducer:}
For $S \in \Omega^\ast$, there exists a transducer $\Delta(S) = (\Omega,\emptyset\ldots \weight)$
that accepts the specific sequence $S$ with weight one, but rejects all other input sequences
\[
\wtrans{\weight}{x}{\Delta(S)}{\epsilon} = \delta(x=S)
\]
Note that $\Delta(S)$ has a null output alphabet, and so its only possible output is the empty string.

In general, if $T=(\Omega_X,\Omega_Y \ldots \weight')$ is any transducer then $\forall x \in \Omega_X^\ast, y \in \Omega_Y^\ast$
\[
\wtrans{\weight'}{x}{T}{y} \equiv \wtrans{\weight}{x}{T \compose \Delta(y)}{\epsilon}
\]

An example construction (not in normal form) is
\begin{eqnarray*}
\Delta(S) & = & (\Omega,\emptyset,\mathbb{Z}_{\seqlen{S}+1},0,\seqlen{S},\transitionsof{\Delta},1) \\
\transitionsof{\Delta} & = & \left\{\left(n,\charat{S}{n+1},\epsilon,n+1\right):n \in \mathbb{Z}_{\seqlen{S}}\right)\}
\end{eqnarray*}
where $\mathbb{Z}_N$ is the set of integers modulo $N$,
 and $\charat{S}{k}$ is the $k$'th position of $S$ (for $1 \leq k \leq \seqlen{S}$).
Note that this construction has $\seqlen{S}+1$ states.

For later convenience it is useful to define the function
\begin{eqnarray*}
\profTrans{\Delta(S)}(i,j) & = & \transweightfun{\Delta(S)}(i,j) \\
& = & \delta(i+1 = j)
\end{eqnarray*}

% A construction in strict-normal form is
% \begin{eqnarray*}
% \Delta(S) & = & (\Omega,\emptyset,\statesof{\Delta},\startstate,\laststate,\transitionsof{\Delta},1) \\
% \statesof{\Delta} & = & \{ \startstate, \laststate \}\ \cup\ \{ \phi_W^{(n)}: n \in \mathbb{Z}_{\seqlen{S}+1} \}\ \cup\ \{ \phi_D^{(n+1)}: n \in \mathbb{Z}_{\seqlen{S}} \} \\
% \transitionsof{\Delta} & = & \{(\startstate,\epsilon,\epsilon,\phi_W^{(0)}),\ (\phi_W^{(\seqlen{S})},\epsilon,\epsilon,\laststate)\} \\
% & & \ \cup\ \{(\phi_W^{(n)},\charat{S}{n+1},\epsilon,\phi_D^{(n+1)}) : n \in \mathbb{Z}_{\seqlen{S}} \} \\
% & & \ \cup\ \{(\phi_D^{(n+1)},\epsilon,\epsilon,\phi_W^{(n+1)}) : n \in \mathbb{Z}_{\seqlen{S}} \}
% \end{eqnarray*}

\subsection{Generative and singlet transducers}

{\em Generative transducers:}
A transducer $T$ is generative if it has a null input alphabet, and so rejects any input except the empty string.
Then $T$ may be regarded as a state machine that generates an output, equivalent to a Hidden Markov Model.
Define the probability (weight) distribution over the output sequence
\[
P(x|T) \equiv \wtrans{\weight}{\epsilon}{T}{x}
\]

{\em Singlet transducers:}
If the output alphabet of $T$ is a simple alphabet $\Omega$, rather than (say) a compound alphabet $\gapsquared$,
we refer to $T$ as a {\em singlet} transducer.
(This is not a very precise distinction, but it does not need to be; it's sufficient to think of ``singlet transducers'' and ``generative transducers'' as the same thing.)

\subsection{Complexities}
\begin{eqnarray*}
\numberofstates{T \compose U} & \lesssim & \numberofstates{T} \numberofstates{U} \\
\numberofstates{T \fork U} & \lesssim & \numberofstates{T} \numberofstates{U} \\
\numberofstates{\Delta(S)} & \sim & \seqlen{S} + 1
\end{eqnarray*}

\redpen{What about the numbers of transitions?}

The complexity of computing $\wtrans{\weight}{x}{T}{y}$ is similar to the Forward algorithm:
the time complexity is $\order{\numberoftransitions{T} \seqlen{x} \seqlen{y}}$ and
the memory complexity is $\order{\numberofstates{T} \min\left(\seqlen{x},\seqlen{y}\right)}$.
Memory complexity rises to $\order{\numberofstates{T} \seqlen{x} \seqlen{y}}$ if a traceback is required.
Analogously to the Forward algorithm, there are checkpointing versions which trade memory complexity for time complexity.


\subsection{Chapman-Kolmogorov equation}

If $T_t$ is a transducer parameterized by a continuous time parameter $t$,
modeling the evolution of a sequence for time $t$ under a continuous-time Markov process,
then the Chapman-Kolmogorov equation can be expressed as a transducer equivalence
\[
T_t \compose T_u \transequiv T_{t+u}
\]

The TKF91 transducers, for example, have this property.
Furthermore, for TKF91, $T_{t+u}$ has the same number of states and transitions as $T_t$,
so this is a kind of self-similarity.

\section{Hierarchy of phylogenetic transducers}
\label{hierarchy}
Suppose we have an evolutionary model defined on a rooted binary phylogenetic tree,
and a set of $\numberofleaves$ observed sequences associated with the leaf nodes of the tree.
Since the tree is binary, each node $n$ has a single incoming branch with length $t_n$, and two child nodes denoted $C_n$. 

The nodes are numbered in preorder, with internal nodes $(1 \ldots \numberofinternalnodes)$ preceding leaf nodes $(\leafnode{1} \ldots \numberofnodes)$. Node $1$ is the root.

Let $\outputs = (\outputn{\leafnode{1}}, \outputn{\leafnode{2}} \ldots \outputn{\numberofnodes})$ denote the observed leaf-node sequences,
with $\outputn{n} \in \Omega^\ast\ \forall n$.

\subsection{Model structure}
\subsubsection{The original model}

If $n$ is a leaf node, define $F_n = \identity$.
Otherwise, let $(l,r)$ denote the left and right child nodes, and define
\[
F_n = \forkfun{B_l \compose F_l}{B_r \compose F_r}
\]

which we can represent as
\begin{parsetree}
 ( .$\idfork$. ( .$B_l$. .$F_l$. ) ( .$B_r$. .$F_r$. )  )
\end{parsetree}
\\

The complete, generative transducer is
$F_0 = R \compose F_1$
where $R$ is a singlet transducer modeling the distribution of ancestral sequences at the root node.

The output alphabet of $F_0$ is $\gappedalphabet{}^\numberofleaves$ where $\numberofleaves$ is the number of leaf sequences.
Letting $S_n:\Transitions^\ast \to \Omega^\ast$ denote the map from a transition path $\pi$ to the $n$'th output leaf sequence (with gaps removed),
we define the output distribution
\[
P(\outputs|F_0) = \wtrans{\weight}{\epsilon}{F_0}{\outputs} = \sum_{\pi: S_n(\pi)=\outputn{n} \forall n} \weight(\pi)
\]

Note that $\numberofstates{F_0} \simeq \prod_n^{\numberofnodes} \numberofstates{B_n}$ where $\numberofnodes$ is the number of nodes in the tree.
So the state space grows exponentially with the size of the tree---and this is before we have even introduced any sequences.
We seek to avoid this with our hierarchy of approximate models, which will have state spaces that are bounded in size.

First, however, we expand the state space even more, by introducing the observed sequences explicitly into the model.

\subsubsection{The evidence-expanded model}

Inference with stochastic grammars often uses a dynamic programming matrix (e.g. the Inside matrix)
to track the ways that a given evidential sequence can be produced by a given grammar.

For our purposes it is useful to introduce the evidence in a different way,
by transforming the model to incorporate the evidence directly.
We augment the state space so that the model is no longer capable of generating any sequences {\em except} the observed $\{\outputn{n}\}$,
by composing $F_0$ with exact-match transducers that will only accept the observed sequences.
This yields a model whose state space is very large and, in fact, is directly analogous to the Inside dynamic programming matrix.

If $n$ is a leaf node, define $G_n = \Delta(S_n)$.
The number of states is $\numberofstates{G_n} \sim \outseqlen{n}$.

Otherwise, let $(l,r)$ denote the left and right child nodes, and define
\[
G_n = \forkfun{B_l \compose G_l}{B_r \compose G_r}
\]

which we can represent as
\begin{parsetree}
 ( .$\idfork$. ( .$B_l$. .$G_l$. ) ( .$B_r$. .$G_r$. )  )
\end{parsetree}
\\

The complete evidence-expanded model is
$G_0 = R \compose G_1$.

The probability that the original model $F_0$ generates the evidential sequences $\outputs$
is identical to the probability that the evidence-expanded model $G_0$ generates the empty string
\[
P(\outputs | F_0) = \wtrans{\weight}{\epsilon}{F_0}{\outputs} = \wtrans{\weight}{\epsilon}{G_0}{\epsilon}
\]

Note the astronomical number of states in $G_0$
\[
\numberofstates{G_0} \simeq \left( \prod_{n=1}^{\numberofleaves} \outseqlen{n} \right) \left( \prod_{n=1}^{\numberofnodes} \numberofstates{B_n} \right)
\]
This is even worse than $F_0$; in fact, it is the same as the number of cells in the Inside matrix for computing $P(\outputs|F_0)$.
The good news is we are about to start constraining it.


\subsubsection{The constrained-expanded model}
\label{Constrained_expanded}
We now introduce a progressive series of approximating constraints to make inference under the model more tractable.

If $n$ is a leaf node, define $H_n = \Delta(S_n) \equiv G_n$.
The number of states is $\numberofstates{H_n} \simeq \outseqlen{n}$, just as with $G_n$.

Otherwise, let $(l,r)$ denote the left and right child nodes, and define
\[
H_n = \forkfun{B_l \compose E_l}{B_r \compose E_r}
\]

where $\statesof{E_n} \subseteq \statesof{H_n}$.

We can represent $H_n$ diagramatically as
\begin{parsetree}
 ( .$\idfork$. ( .$B_l$. .$E_l$. ) ( .$B_r$. .$E_r$. )  )
\end{parsetree}
\\

Transducer $E_n$, which is what we mean by the ``constrained-expanded model'', is effectively a profile of sequences that might plausibly appear at node $n$, given the observed descendants of that node.
The profile is constructed as follows.

The general idea is to generate a set of candidate sequences at node $n$, by sampling from the posterior distribution of such sequences {\bf given only the descendants of node $n$,}
ignoring (for the moment) the nodes outside the $n$-rooted subtree.
To do this, we need to introduce a prior distribution over the sequence at node $n$.
This prior is an approximation to replace the true (but as yet unknown) posterior distribution due to nodes outside the $n$-rooted subtree
(including $n$'s parent, and ancestors all the way back to the root, as well as siblings, cousins etc.)

A plausible choice for this prior, equivalent to assuming {\em stationarity} of the underlying evolutionary process, is the same prior that we use for the root node; that is, the singlet model $R$.
We define
\begin{eqnarray*}
M_n & = & R \compose H_n \\
& = & R \compose \forkfun{B_l \compose E_l}{B_r \compose E_r}
\end{eqnarray*}
to be the corresponding generative transducer.

We can represent $M_n$ diagramatically as
\begin{parsetree}
 ( .$R$. ( .$\idfork$. ( .$B_l$. .$E_l$. ) ( .$B_r$. .$E_r$. )  ) )
\end{parsetree}
\\

The transducer $Q_n = R \compose \forkfun{B_l}{B_r}$, which forms the comparison kernel of $M_n$, is also useful.
It can be represented as
\begin{parsetree}
 ( .$R$. ( .$\idfork$. .$B_l$. .$B_r$. ) )
\end{parsetree}
\\

\subsubsection{Component state tuples}
Suppose that $a \in \statesof{A}, b \in \statesof{B}, \upsilon \in \statesof{\idfork}$.
Our construction of composite transducers allows us to represent any state in $\forkfun{A}{B}$ as a tuple $(\upsilon, a,b)$.
Similarly, any state in $A \compose B$ can be represented as $(a,b)$.
Each state in $M_n$ can thus be written as a tuple $\mstate$ of component states, where
\begin{itemize}
\item $\rho$ is the state of the singlet transducer $R$
\item $\upsilon$ is the state of the identity-fork transducer $\idfork$
\item $b_l$ is the state of the left-branch transducer $B_l$
\item $e_l$ is the state of the left child profile transducer $E_l$
\item $b_r$ is the state of the right-branch transducer $B_l$
\item $e_r$ is the state of the right child profile transducer $E_r$
\end{itemize}
Similarly, each state in $H_n$ (and $E_n$) can be written as a tuple $\hstate$.

\subsubsection{Constructing $E_n$ from $H_n$}
The construction of $E_n$ as a sub-model of $H_n$ proceeds as follows:
\begin{enumerate}
\item sample a set of $K$ paths from $P(\pi | M_n) = \weight(\pi) / \wtrans{\weight}{\epsilon}{M_n}{\epsilon}$;
\item identify the set of $M_n$-states $\{\mstate\}$ used by the sampled paths;
\item strip off the leading $\rho$'s from these $M_n$-states to find the associated set of $H_n$-states $\{\hstate\}$;
\item the set of $H_n$-states so constructed is the subset of $E_n$'s states that have type $D$ (wait states must be added to place it in strict normal form).
\end{enumerate}

Here $K$ plays the role of a bounding parameter.
For the constrained-expanded transducer, $\numberofstates{E_n} \simeq KL$, where $L = \max_n \outseqlen{n}$.
Models $H_n$ and $M_n$, however, contain $\order{b^2 K^2 L^2}$ states,
where $b = \max_n \numberofstates{B_n}$,
as they are constructed by forking two $\order{bKL}$-state transducers ($B_l \compose E_l$ and $B_r \compose E_r$).

\subsection{Explicit construction of $Q_n$}

\begin{eqnarray*}
Q_n & = & R \compose \forkfun{B_l}{B_r} \\
& = & (\emptyset,\gapsquared,\statesof{Q_n},\startstateof{Q_n},\laststateof{Q_n},\transitionsof{Q_n},\weightfunof{Q_n}) \\
\startstateof{Q_n} & = & (\startstateof{R},\startstateof{\idfork},\startstateof{B_l},\startstateof{B_r}) \\
\laststateof{Q_n} & = & (\laststateof{R},\laststateof{\idfork},\laststateof{B_l},\laststateof{B_r})
\end{eqnarray*}

\subsubsection{States of $Q_n$}

Define $\statetype(\phi_1,\phi_2,\phi_3 \ldots) = (\statetype(\phi_1), \statetype(\phi_2), \statetype(\phi_3) \ldots)$.

Let $q=\qstate\in\statesof{Q_n}$.
We construct $\statesof{Q_n}$ from classes, adopting the convention that each class of states is defined by its associated types:
\[
\stateset{class} = \{ q: \statetype\qstate \in \typeset{class} \}
\]

The state typings are
\begin{eqnarray*}
\matchtypes & = & \{ (I,M,M,M) \}  \\
\rightdeletetypes & = & \{ (I,M,M,D) \}  \\
\leftdeletetypes & = & \{ (I,M,D,M) \}  \\
\nulltypes & = & \{ (I,M,D,D) \}  \\
\rightinserttypes & = & \{ (S,S,S,I),\ (I,M,M,I),\ (I,M,D,I) \}  \\
\leftinserttypes & = & \{ (S,S,I,W),\ (I,M,I,W) \}  \\
\qwaittypes & = & \{ (W,W,W,W) \} \\
\rightemittypes & = & \leftdeletetypes\ \cup\ \rightinserttypes \\
\leftemittypes & = & \leftinserttypes\ \cup\ \rightdeletetypes
\end{eqnarray*}

The state space of $Q_n$ is
\[
\statesof{Q_n} = \{ \startstateof{Q_n},\ \laststateof{Q_n} \}\ \cup\ \matchstates\ \cup\ \leftemitstates\ \cup\ \rightemitstates\ \cup\ \nullstates\ \cup\ \qwaitstates
\]

It is possible to calculate transition/emission weights of $Q_n$
by starting with the example constructions given for $T \compose U$ and $T \fork U$,
then eliminating states that are not in the above set.
This gives the results described in the following sections.

\subsubsection{Emission weights of $Q_n$}

Let $(\omega_l,\omega_r)\ \in \gapsquared$.

The emission weight function for $Q_n$ is
\[
\emitweightfun{Q_n}(\epsilon,(\omega_l,\omega_r),q) = \left\{
\begin{array}{ll}
\displaystyle
\sum_{\omega \in \Omega} \emitweightfun{R}(\epsilon,\omega,R)
 \emitweightfun{B_l}(\omega,\omega_l,b_l)
 \emitweightfun{B_r}(\omega,\omega_r,b_r)
 & \mbox{if $q \in \matchstates$} \\
\displaystyle
\sum_{\omega \in \Omega} \emitweightfun{R}(\epsilon,\omega,R)
 \emitweightfun{B_r}(\omega,\omega_r,b_r)
 & \mbox{if $q \in \leftdeletestates$} \\
\displaystyle
\sum_{\omega \in \Omega} \emitweightfun{R}(\epsilon,\omega,R)
 \emitweightfun{B_l}(\omega,\omega_l,b_l)
 & \mbox{if $q \in \rightdeletestates$} \\
 \emitweightfun{B_l}(\epsilon,\omega_l,b_l)
 & \mbox{if $q \in \leftinsertstates$} \\
 \emitweightfun{B_r}(\epsilon,\omega_r,b_r)
 & \mbox{if $q \in \rightinsertstates$} \\
1
 & \mbox{otherwise}
\end{array}
\right.
\]


\subsubsection{Transition weights of $Q_n$}

The transition weight between two states
$q=\qstate$ and
$q'=\qstatedest$
always takes the form
\[
\transweightfun{Q_n}(q,q') \equiv \sumoverpaths{R} . \sumoverpaths{\idfork} . \sumoverpaths{B_l} . \sumoverpaths{B_r}
\]
where $\sumoverpaths{T}$ represents a sum over a set of paths through component transducer $T$.
The allowed paths $\{\pi_T\}$ are constrained by the types of $q,q'$ as shown in Table~\ref{QTransitionTypes}.
\begin{table}
\[
\begin{array}{ll|cccc}
\statetype\qstate & \statetype\qstatedest & |\pi_R| & |\pi_{\idfork}| & |\pi_{B_l}| & |\pi_{B_r}| \\
\hline
(S,S,S,*) & (S,S,S,I) & 0 & 0 & 0 & 1 \\
          & (S,S,I,W) & 0 & 0 & 1 & 1 \\
          & (I,M,M,M) & 1 & 2 & 2 & 2 \\
          & (I,M,M,D) & 1 & 2 & 2 & 2 \\
          & (I,M,D,M) & 1 & 2 & 2 & 2 \\
          & (I,M,D,D) & 1 & 2 & 2 & 2 \\
          & (W,W,W,W) & 1 & 1 & 1 & 1 \\
\hline
(S,S,I,W) & (S,S,I,W) & 0 & 0 & 1 & 0 \\
          & (I,M,M,M) & 1 & 2 & 2 & 1 \\
          & (I,M,M,D) & 1 & 2 & 2 & 1 \\
          & (I,M,D,M) & 1 & 2 & 2 & 1 \\
          & (I,M,D,D) & 1 & 2 & 2 & 1 \\
          & (W,W,W,W) & 1 & 1 & 1 & 0 \\
\hline
(I,M,M,*) & (I,M,M,I) & 0 & 0 & 0 & 1 \\
          & (I,M,I,W) & 0 & 0 & 1 & 1 \\
          & (I,M,M,M) & 1 & 2 & 2 & 2 \\
          & (I,M,M,D) & 1 & 2 & 2 & 2 \\
          & (I,M,D,M) & 1 & 2 & 2 & 2 \\
          & (I,M,D,D) & 1 & 2 & 2 & 2 \\
          & (W,W,W,W) & 1 & 1 & 1 & 1 \\
\hline
(I,M,D,*) & (I,M,D,I) & 0 & 0 & 0 & 1 \\
          & (I,M,I,W) & 0 & 0 & 1 & 1 \\
          & (I,M,M,M) & 1 & 2 & 2 & 2 \\
          & (I,M,M,D) & 1 & 2 & 2 & 2 \\
          & (I,M,D,M) & 1 & 2 & 2 & 2 \\
          & (I,M,D,D) & 1 & 2 & 2 & 2 \\
          & (W,W,W,W) & 1 & 1 & 1 & 1 \\
\hline
(I,M,I,W) & (I,M,I,W) & 0 & 0 & 1 & 0 \\
          & (I,M,M,M) & 1 & 2 & 2 & 1 \\
          & (I,M,M,D) & 1 & 2 & 2 & 1 \\
          & (I,M,D,M) & 1 & 2 & 2 & 1 \\
          & (I,M,D,D) & 1 & 2 & 2 & 1 \\
          & (W,W,W,W) & 1 & 1 & 1 & 0 \\
\hline
(W,W,W,W) & (E,E,E,E) & 1 & 1 & 1 & 1 \\
\end{array}
\]
\caption{
\label{QTransitionTypes}
Transition types of $Q_n$.
}
\end{table}
Table~\ref{QTransitionTypes} uses the following conventions:
\begin{itemize}
\item A $0$ in any column means that the corresponding state must remain unchanged.
For example, if $|\pi_{B_l}|=0$ then
\[
\sumoverpaths{B_l} \equiv \delta(b_l=b'_l)
\]
\item A $1$ in any column means that the corresponding transducer makes a single transition.
For example, if $|\pi_{B_l}|=1$ then
\[
\sumoverpaths{B_l} \equiv \transweightfun{B_l}(b_l,b'_l)
\]
\item A $2$ in any column means that the corresponding transducer makes two transitions, via an intermediate state.
For example, if $|\pi_{B_l}|=2$ then
\[
\sumoverpaths{B_l} \equiv \sum_{b''_l \in \statesof{B_l}} \transweightfun{B_l}(b_l,b''_l) \transweightfun{B_l}(b''_l,b'_l)
\]
(Since the transducers are in strict normal form, and given the context in which the $2$'s appear,
it will always be the case that the intermediate state $b''_l$ has type $W$.)
\item An asterisk ($*$) in a type-tuple is interpreted as a wildcard; for example, $(S,S,S,*)$ corresponds to $\{ (S,S,S,S),\ (S,S,S,I) \}$.
\item If a transition does not appear in the above table, or if any of the $\transweightfun{}$'s are zero,
then $\nexists (h,\omega,\omega',h') \in \transitionsof{H_n}$.
\end{itemize}


\subsubsection{State types of $Q_n$}

\[
\statetype(q) = \left\{ \begin{array}{ll}
S & \mbox{if $q = \startstateof{Q_n}$} \\
E & \mbox{if $q = \laststateof{Q_n}$} \\
W & \mbox{if $q \in \qwaitstates$} \\
I & \mbox{if $q \in \matchstates\ \cup\ \leftemitstates\ \cup\ \rightemitstates$} \\
N & \mbox{if $q \in \nullstates$}
\end{array} \right.
\]

Note that $Q_n$ contains null states ($\nullstates$) corresponding to coincident deletions on branches $n \to l$ and $n \to r$.
These states have $\statetype\qstate=(I,M,D,D)$.
There are transition paths that go through these states, including paths that cycle indefinitely among these states.

We need to eliminate these states before constructing $M_n$.
Let $Q'_n \transequiv Q_n$ denote the transducer obtained from $Q_n$ by marginalizing $\nullstates$
\[
\statesof{Q'_n} = \{ \startstateof{Q_n},\ \laststateof{Q_n} \}\ \cup\ \matchstates\ \cup\ \leftemitstates\ \cup\ \rightemitstates\ \cup\ \qwaitstates
\]

% \redpen{Describe how to construct $\transweightfun{Q'_n}$ explicitly - i.e. using matrix inversion.}

\redpen{How to restore these states when constructing $E_n$? Ortheus samples them randomly, but (anecdotally via Ben) you need a lot of samples before you have any chance of guessing the right number, and in practice it makes little difference. I think it would be possible to leave them in as self-looping delete states in $E_n$, which obviously makes $E_n$ cyclic. If we do this, we might want to postpone the null-cycle elimination until after we have constructed $M_n$.}

\subsection{Explicit construction of $M_n$ using $Q'_n$, $E_l$ and $E_r$}

Refer to the previous section for definitions pertaining to $Q'_n$.

\begin{eqnarray*}
M_n & = & R \compose \forkfun{B_l \compose E_l}{B_r \compose E_r} \\
Q'_n & \transequiv & R \compose \forkfun{B_l}{B_r}
\end{eqnarray*}

\subsubsection{States of $M_n$}
The complete set of $M_n$-states is
\begin{eqnarray*}
\statesof{M_n} & = & \{ \startstateof{M_n},\ \laststateof{M_n} \} \\
& & \quad \cup\ \{ \mstate: \qstate \in \matchstates,\ \statetype(e_l)=\statetype(e_r)=D \} \\
& & \quad \cup\ \{ \mstate: \qstate \in \leftemitstates,\ \statetype(e_l)=D, \statetype(e_r)=W \} \\
& & \quad \cup\ \{ \mstate: \qstate \in \rightemitstates,\ \statetype(e_l)=W, \statetype(e_r)=D \} \\
& & \quad \cup\ \{ \mstate: \qstate \in \waitstates,\ \statetype(e_l)=\statetype(e_r)=W \}
\end{eqnarray*}

\subsubsection{Emission weights of $M_n$}
Let $m = \mstate$ be an $M_n$-state and $q = \qstate$ the subsumed $Q'_n$-state.

Similarly, let $m' = \mstatedest$ and $q' = \qstatedest$.

The emission weight function for $M_n$ is
\[
\emitweightfun{M_n}(\epsilon,\epsilon,m) = \left\{
\begin{array}{ll}
\displaystyle
\sum_{\omega_l \in \Omega}
\sum_{\omega_r \in \Omega}
 \emitweightfun{Q'_n}(\epsilon,(\omega_l,\omega_r),q)
. \emitweightfun{E_l}(\omega_l,\epsilon,e_l)
. \emitweightfun{E_r}(\omega_r,\epsilon,e_r)
 & \mbox{if $q \in \matchstates$} \\
\displaystyle
\sum_{\omega_l \in \Omega}
 \emitweightfun{Q'_n}(\epsilon,(\omega_l,\epsilon),q)
. \emitweightfun{E_l}(\omega_l,\epsilon,e_l)
 & \mbox{if $q \in \leftemitstates$} \\
\displaystyle
\sum_{\omega_r \in \Omega}
 \emitweightfun{Q'_n}(\epsilon,(\epsilon,\omega_r),q)
. \emitweightfun{E_r}(\omega_r,\epsilon,e_r)
 & \mbox{if $q \in \rightemitstates$} \\
1
 & \mbox{otherwise}
\end{array}
\right.
\]

\subsubsection{Transitions of $M_n$}

As before,
\begin{eqnarray*}
q & = & \qstate \\
m & = & \mstate \\
q' & = & \qstatedest \\
m' & = & \mstatedest
\end{eqnarray*}
An ``upper bound'' (i.e. superset) of the transition set of $M_n$ is as follows
\begin{eqnarray*}
\transitionsof{M_n} & \subseteq & \{ (m,\epsilon,\epsilon,m'): q'\in\matchstates,\statetype(q)\in\{S,I\},\statetype(e'_l,e'_r)=(D,D) \} \\
 & & \ \cup\ \{ (m,\epsilon,\epsilon,m'): q'\in\leftemitstates,\statetype(q)\in\{S,I\},\statetype(e'_l,e'_r)=(D,W) \} \\
 & & \ \cup\ \{ (m,\epsilon,\epsilon,m'): q'\in\rightemitstates,\statetype(q)\in\{S,I\},\statetype(e'_l,e'_r)=(W,D) \} \\
 & & \ \cup\ \{ (m,\epsilon,\epsilon,m'): q'\in\waitstates,\statetype(q)\in\{S,I\},\statetype(e'_l,e'_r)=(W,W) \} \\
 & & \ \cup\ \{ (m,\epsilon,\epsilon,m'): \statetype(q,e_l,e_r)=(W,W,W),\statetype(q',e'_l,e'_r)=(E,E,E) \}
\end{eqnarray*}
More precisely, $\transitionsof{M_n}$ contains the transitions in the above set for which the transition weight (defined in the next section) is nonzero.
(This ensures that the individual transition paths $q \to q'$, $e_l \to e'_l$ and $e_r \to e'_r$ exist with nonzero weight.)

\subsubsection{Transition weights of $M_n$}

Let $\transviawait{E_n}(e,e')$ be the weight of {\bf either} the direct transition $e \to e'$,
{\bf or} a double transition $e \to e'' \to e'$ summed over all intermediate states $e''$
\[
\transviawait{E_n}(e,e') = \left\{
\begin{array}{ll}
\displaystyle
\sum_{e'' \in \statesof{E_n}}
\transweightfun{E_n}(e,e'')
\transweightfun{E_n}(e'',e')
& \mbox{if $\statetype(e) \in \{S,D\}$} \\
\transweightfun{E_n}(e,e')
& \mbox{if $\statetype(e) = W$} \\
\end{array}
\right.
\]

Let $\transtowait{E_n}(e,e')$ be the weight of a transition (or non-transition) that leaves $E_n$ in a wait state
\[
\transtowait{E_n}(e,e') = \left\{
\begin{array}{ll}
\transweightfun{E_n}(e,e')
& \mbox{if $\statetype(e) \in \{S,D\},\ \statetype(e') = W$} \\
1
& \mbox{if $e=e',\ \statetype(e') = W$} \\
0
& \mbox{otherwise} \\
\end{array}
\right.
\]

The transition weight function for $M_n$ is
\[
\transweightfun{M_n}(m,m') = \transweightfun{Q'_n}(q,q') \times \left\{
\begin{array}{ll}
\transviawait{E_l}(e_l,e'_l) \transviawait{E_r}(e_r,e'_r)
 & \mbox{if $q' \in \matchstates $} \\
\transviawait{E_l}(e_l,e'_l) \transtowait{E_r}(e_r,e'_r)
 & \mbox{if $q' \in \leftemitstates$} \\
\transtowait{E_l}(e_l,e'_l) \transviawait{E_r}(e_r,e'_r)
 & \mbox{if $q' \in \rightemitstates$} \\
\transtowait{E_l}(e_l,e'_l) \transtowait{E_r}(e_r,e'_r)
 & \mbox{if $q' \in \waitstates$} \\
\transweightfun{E_l}(e_l,e'_l) \transweightfun{E_r}(e_r,e'_r)
 & \mbox{if $q' = \laststateof{Q'_n}$}
\end{array}
\right.
\]


\subsection{Explicit construction of $H_n$}

This construction is somewhat redundant, since we construct $M_n$ from $Q_n$, $E_l$ and $E_r$, rather than from $R \compose H_n$.
It is retained for comparison.

\begin{eqnarray*}
H_n & = & \forkfun{B_l \compose E_l}{B_r \compose E_r} \\
& = & (\Omega,\emptyset,\statesof{H_n},\startstateof{H_n},\laststateof{H_n},\transitionsof{H_n},\weightfunof{H_n}) \\
\end{eqnarray*}

Assume $B_l,B_r,E_l,E_r$ in strict-normal form.

\subsubsection{States of $H_n$}

Define $\statetype(\phi_1,\phi_2,\phi_3 \ldots) = (\statetype(\phi_1), \statetype(\phi_2), \statetype(\phi_3) \ldots)$.

Let $h=\hstate\in\statesof{H_n}$.
We construct $\statesof{H_n}$ from classes, adopting the convention that each class of states is defined by its associated types:
\[
\stateset{class} = \{ h: \statetype\hstate \in \typeset{class} \}
\]

% A {\em cascade} is a transition of a composite transducer $T \compose U$ representing the output of a symbol by $T$ and the simultaneous input of that symbol to $U$.

Define $\externalcascades \subset \statesof{H_n}$ to be the subset of $H_n$-states that follow {\em externally-driven cascades}
\begin{eqnarray*}
\externaltypes & = & \{ (M,M,D,M,D),\ (M,M,D,D,W), \\
& & \quad (M,D,W,M,D),\ (M,D,W,D,W) \}
\end{eqnarray*}

Define $\internalcascades \subset \statesof{H_n}$ to be the subset of $H_n$-states that follow {\em internal cascades}
\begin{eqnarray*}
\internaltypes & = & \lefttypes \cup \righttypes \\
\lefttypes & = & \{ (S,I,D,W,W),\ (M,I,D,W,W) \} \\
\righttypes & =  & \{ (S,S,S,I,D),\ (M,M,D,I,D),\ (M,D,W,I,D) \}
\end{eqnarray*}

Remaining states are the start, end, and wait states:
\begin{eqnarray*}
\startstateof{H_n} & = & (\startstateof{\idfork},\startstateof{B_l},\startstateof{E_l},\startstateof{B_r},\startstateof{E_r}) \\
\laststateof{H_n} & = & (\laststateof{\idfork},\laststateof{B_l},\laststateof{E_l},\laststateof{B_r},\laststateof{E_r}) \\
\waittypes & = & \{ (W,W,W,W,W) \}
\end{eqnarray*}
The complete set of $H_n$-states is
\[
\statesof{H_n} = \{ \startstateof{H_n},\ \laststateof{H_n} \}\ \cup\ \externalcascades\ \cup\ \internalcascades\ \cup\ \waitstates
\]

It is possible to calculate transition/emission weights of $H_n$
by starting with the example constructions given for $T \compose U$ and $T \fork U$,
then eliminating states that are not in the above set.
This gives the results described in the following sections.

\subsubsection{Emission weights of $H_n$}

Let $\omega,\omega' \in \gappedalphabet{}$.

Let $C_n(b_n,e_n)$ be the emission weight function for $B_n \compose E_n$ on a transition into composite state $(b_n,e_n)$ where $\statetype(b_n,e_n)=(I,D)$
\[
C_n(b_n,e_n) = \sum_{\omega \in \Omega} \emitweightfun{B_n}(\epsilon,\omega,b_n) \emitweightfun{E_n}(\omega,\epsilon,e_n)
\]

Let $D_n(\omega,b_n,e_n)$ be the emission weight function for $B_n \compose E_n$ on a transition into composite state $(b_n,e_n)$
 where $\statetype(b_n,e_n) \in \{(M,D),(D,W)\}$ with input symbol $\omega$ 
\[
D_n(\omega,b_n,e_n) = \left\{
\begin{array}{ll}
\displaystyle
\sum_{\omega' \in \Omega} \emitweightfun{B_n}(\omega,\omega',b_n) \emitweightfun{E_n}(\omega',\epsilon,e_n)
 & \mbox{if $\statetype(b_n,e_n)=(M,D)$} \\
\emitweightfun{B_n}(\omega,\epsilon,b_n)
 & \mbox{if $\statetype(b_n,e_n)=(D,W)$}
\end{array}
\right.
\]

The emission weight function for $H_n$ is
\[
\emitweightfun{H_n}(\omega,\epsilon,h) = \left\{
\begin{array}{ll}
D_l(\omega,b_l,e_l) D_r(\omega,b_r,e_r)
 & \mbox{if $h \in \externalcascades$} \\
C_l(b_l,e_l)
 & \mbox{if $h \in \leftcascades$} \\
C_r(b_r,e_r)
 & \mbox{if $h \in \rightcascades$} \\
1
 & \mbox{otherwise}
\end{array}
\right.
\]

\subsubsection{Transition weights of $H_n$}

The transition weight between two states
$h=\hstate$ and
$h'=\hstatedest$
always takes the form
\[
\transweightfun{H_n}(h,h') \equiv \sumoverpaths{\idfork} . \sumoverpaths{B_l} . \sumoverpaths{E_l} . \sumoverpaths{B_r} . \sumoverpaths{E_r}
\]
where the RHS terms again represent sums over paths, with the allowed paths depending on the types of $h,h'$ as shown in Table~\ref{HTransitionTypes}.
\begin{table}
\[
\begin{array}{ll|ccccc}
\statetype\hstate & \statetype\hstatedest & |\pi_{\idfork}| & |\pi_{B_l}| & |\pi_{E_l}| & |\pi_{B_r}| & |\pi_{E_r}| \\
\hline
(S,S,S,S,S) & (S,S,S,I,D) & 0 & 0 & 0 & 1 & 2 \\
            & (S,I,D,W,W) & 0 & 1 & 2 & 1 & 1 \\
            & (W,W,W,W,W) & 1 & 1 & 1 & 1 & 1 \\
\hline
(S,S,S,I,D) & (S,S,S,I,D) & 0 & 0 & 0 & 1 & 2 \\
            & (S,I,D,W,W) & 0 & 1 & 2 & 1 & 1 \\
            & (W,W,W,W,W) & 1 & 1 & 1 & 1 & 1 \\
\hline
(S,I,D,W,W) & (S,I,D,W,W) & 0 & 1 & 2 & 0 & 0 \\
            & (W,W,W,W,W) & 1 & 1 & 1 & 0 & 0 \\
\hline
(W,W,W,W,W) & (M,M,D,M,D) & 1 & 1 & 1 & 1 & 1 \\
            & (M,M,D,D,W) & 1 & 1 & 1 & 1 & 0 \\
            & (M,D,W,M,D) & 1 & 1 & 0 & 1 & 1 \\
            & (M,D,W,D,W) & 1 & 1 & 0 & 1 & 0 \\
            & (E,E,E,E,E) & 1 & 1 & 1 & 1 & 1 \\
\hline
(M,M,D,M,D) & (M,M,D,I,D) & 0 & 0 & 0 & 1 & 2 \\
            & (M,I,D,W,W) & 0 & 1 & 2 & 1 & 1 \\
            & (W,W,W,W,W) & 1 & 1 & 1 & 1 & 1 \\
\hline
(M,M,D,D,W) & (M,M,D,I,D) & 0 & 0 & 0 & 1 & 1 \\
            & (M,I,D,W,W) & 0 & 1 & 2 & 1 & 0 \\
            & (W,W,W,W,W) & 1 & 1 & 1 & 1 & 0 \\
\hline
(M,D,W,M,D) & (M,D,W,I,D) & 0 & 0 & 0 & 1 & 2 \\
            & (M,I,D,W,W) & 0 & 1 & 1 & 1 & 1 \\
            & (W,W,W,W,W) & 1 & 1 & 0 & 1 & 1 \\
\hline
(M,D,W,D,W) & (M,D,W,I,D) & 0 & 0 & 0 & 1 & 1 \\
            & (M,I,D,W,W) & 0 & 1 & 1 & 1 & 0 \\
            & (W,W,W,W,W) & 1 & 1 & 0 & 1 & 0 \\
\hline
(M,M,D,I,D) & (M,M,D,I,D) & 0 & 0 & 0 & 1 & 2 \\
            & (M,I,D,W,W) & 0 & 1 & 2 & 1 & 1 \\
            & (W,W,W,W,W) & 1 & 1 & 1 & 1 & 1 \\
\hline
(M,D,W,I,D) & (M,D,W,I,D) & 0 & 0 & 0 & 1 & 2 \\
            & (M,I,D,W,W) & 0 & 1 & 1 & 1 & 1 \\
            & (W,W,W,W,W) & 1 & 1 & 0 & 1 & 1 \\
\hline
(M,I,D,W,W) & (M,I,D,W,W) & 0 & 1 & 2 & 0 & 0 \\
            & (W,W,W,W,W) & 1 & 1 & 1 & 0 & 0 \\
\end{array}
\]
\caption{
\label{HTransitionTypes}
Transition types of $H_n$.
}
\end{table}
Table~\ref{HTransitionTypes} uses the same conventions as Table~\ref{QTransitionTypes}.

\subsubsection{State types of $H_n$}

\[
\statetype(h) = \left\{ \begin{array}{ll}
S & \mbox{if $h = \startstateof{H_n}$} \\
E & \mbox{if $h = \laststateof{H_n}$} \\
W & \mbox{if $h \in \waitstates$} \\
D & \mbox{if $h \in \externalcascades$} \\
N & \mbox{if $h \in \internalcascades$}
\end{array} \right.
\]

Since $H_n$ contains states of type $N$ (the internal cascades),
it is necessary to eliminate these states from $E_n$ (after sampling paths through $M_n$),
so as to guarantee that $E_n$ will be in strict normal form.



\subsection{Explicit construction of $M_n$ using $R$ and $H_n$}

This construction is somewhat redundant, since we construct $M_n$ from $Q_n$, $E_l$ and $E_r$, rather than from $R \compose H_n$.
It is retained for comparison.

The following construction uses the fact that $M_n = R \compose H_n$ so that we can compactly define $M_n$ by referring back to the previous construction of $H_n$.
In practice, it will be more efficient to precompute $Q_n = R \compose \forkfun{B_l}{B_r}$.

Refer to the previous section (``Explicit construction of $H_n$'') for definitions of
$\externalcascades,\internalcascades,\waitstates,\transweightfun{H_n}(h,h'),\emitweightfun{H_n}(\omega,\omega',h)$.

Assume that $R$ is in strict normal form.

\begin{eqnarray*}
M_n & = & R \compose H_n \\
& = & R \compose \forkfun{B_l \compose E_l}{B_r \compose E_r} \\
& = & (\emptyset,\emptyset,\statesof{M_n},\startstateof{M_n},\laststateof{M_n},\transitionsof{M_n},\weightfunof{M_n}) \\
\startstateof{M_n} & = & (\startstateof{R},\startstateof{\idfork},\startstateof{B_l},\startstateof{E_l},\startstateof{B_r},\startstateof{E_r}) \\
\laststateof{M_n} & = & (\laststateof{R},\laststateof{\idfork},\laststateof{B_l},\laststateof{E_l},\laststateof{B_r},\laststateof{E_r})
\end{eqnarray*}

\subsubsection{States of $M_n$}
The complete set of $M_n$-states is
\begin{eqnarray*}
\statesof{M_n} & = & \{ \startstateof{M_n},\ \laststateof{M_n} \} \\
& & \quad \cup\ \{ \mstate: \hstate \in \externalcascades,\ \statetype(\rho)=I \} \\
& & \quad \cup\ \{ \mstate: \hstate \in \waitstates,\ \statetype(\rho)=W \} \\
& & \quad \cup\ \{ \mstate: \hstate \in \internalcascades,\ \statetype(\rho)=\statetype(\upsilon)=S \} \\
& & \quad \cup\ \{ \mstate: \hstate \in \internalcascades,\ \statetype(\rho)=I,\ \statetype(\upsilon)=M \}
\end{eqnarray*}

\subsubsection{Emission weights of $M_n$}
Let $m = \mstate$ be an $M_n$-state and $h = \hstate$ the subsumed $H_n$-state.

Similarly, let $m' = \mstatedest$ and $h' = \hstatedest$.

The emission weight function for $M_n$ is
\[
\emitweightfun{M_n}(\epsilon,\epsilon,m) = \left\{
\begin{array}{ll}
\displaystyle
\sum_{\omega \in \Omega} \emitweightfun{R}(\epsilon,\omega,\rho) \emitweightfun{H_n}(\omega,\epsilon,h)
 & \mbox{if $h \in \externalcascades$} \\
\emitweightfun{H_n}(\epsilon,\epsilon,h)
 & \mbox{otherwise}
\end{array}
\right.
\]

\subsubsection{Transition weights of $M_n$}
The transition weight function for $M_n$ is
\[
\transweightfun{M_n}(m,m') = \left\{
\begin{array}{ll}
\displaystyle
\transweightfun{H_n}(h,h')
 & \mbox{if $h' \in \internalcascades$} \\
\displaystyle
\transweightfun{R}(\rho,\rho') \sum_{h'' \in \waitstates} \transweightfun{H_n}(h,h'') \transweightfun{H_n}(h'',h')
 & \mbox{if $h' \in \externalcascades$} \\
\transweightfun{R}(\rho,\rho') \transweightfun{H_n}(h,h')
 & \mbox{otherwise}
\end{array}
\right.
\]

If $E_l$ and $E_r$ are acyclic, then $H_n$ and $E_n$ will be acyclic too.
However, $M_n$ does contain cycles among states of type $(I,M,D,W,D,W)$.
These correspond to characters output by $R$ that are then deleted by both $B_l$ and $B_r$.
It is necessary to eliminate these states from $M_n$ by marginalization, and to then restore them probabilistically when sampling paths through $M_n$.

\section{Inference algorithms}

\subsection{Dynamic programming}

The recursion for $\wtrans{\weight}{\epsilon}{M_n}{\epsilon}$ is
\begin{eqnarray*}
\wtrans{\weight}{\epsilon}{M_n}{\epsilon} & = & Z(\laststate) \\
Z(m') & = & \sum_{m : (m,\epsilon,\epsilon,m') \in \Transitions} Z(m) \weightfunof{}(m,\epsilon,\epsilon,m')  \quad\quad \forall m' \neq \startstate \\
Z(\startstate) & = & 1
\end{eqnarray*}

The algorithm to fill $Z(m)$ has the general structure shown in Algorithm~\ref{ForwardTransducer}.
(Some optimization of this algorithm is desirable, since not all tuples $\mstate$ are states of $M_n$.
If $E_n$ is in strict-normal form its $W$- and $D$-states will occur in pairs
(c.f. the strict-normal version of the exact match transducer $\Delta(S)$).
These $(D,W)$ pairs are largely redundant: the choice between $D$ and $W$ is dictated by the parent $B_n$,
as can be seen from Table~\ref{HTransitionTypes} and the construction of $\statesof{H_n}$.)

\begin{algorithm}
  Initialize $Z(\startstate) \leftarrow 1$;
  \BlankLine
  \ForEach(\tcc*[f]{topologically-sorted}){$e_l \in \statesof{E_l}$} {
    \ForEach(\tcc*[f]{topologically-sorted}){$e_r \in \statesof{E_r}$} {
      \ForEach(\tcc*[f]{topologically-sorted}){$\qstate \in \statesof{Q_n}$} {
        Let $m = \mstate$;
        \BlankLine
        \If{$m \in \statesof{M_n}$}{
          Compute $Z(m)$;
        }
      }
    }
  }
  Return $Z(\laststate)$.
\caption{\label{ForwardTransducer}
The analog of the Forward algorithm for transducer $M_n$.
}
\end{algorithm}

For comparison, the Forward algorithm for computing the probability of two sequences $(S_l,S_r)$
being generated by a Pair Hidden Markov Model $(M)$ has the general structure shown in Algorithm~\ref{ForwardPairHMM}.

\begin{algorithm}
  Initialize cell $(0,0,\mbox{START})$;
  \BlankLine
  \ForEach(\tcc*[f]{ascending order}){$0 \leq i_l \leq \seqlen{S_l}$} {
    \ForEach(\tcc*[f]{ascending order}){$0 \leq i_r \leq \seqlen{S_r}$} {
      \ForEach(\tcc*[f]{topologically-sorted}){$\sigma \in M$} {
        Compute the sum-over-paths up to cell $(i_l,i_r,\sigma)$;
      }
    }
  }
  Return cell $(\seqlen{S_l},\seqlen{S_r},\mbox{END})$.
\caption{\label{ForwardPairHMM}
The Forward algorithm for a Pair HMM.
}
\end{algorithm}

The generative transducer $Q_n \equiv R \compose \forkfun{B_l}{B_r}$
in Algorithm~\ref{ForwardTransducer} is effectively identical to the Pair HMM in Algorithm~\ref{ForwardPairHMM}.






\subsection{Pseudocode for DP recursion}

We outline a more precise version of the Forward-like DP recursion in Algorithm~\ref{ForwardTransducerFull} and the associated Function $\addToDPFunction$. %~\ref{fillZ} 
Let $\getprofiletype(q,side)$ return the state type for the profile on $side$ which is consistent with $q$.  

\subsubsection{Transition sets}

Since  all transitions in the state spaces $Q'_n, E_l$, and $E_r$ are known, we can define the following sets :
\begin{eqnarray*}
\incomingLeftProfile{j} &=& \{i: \newTransName{l}(i,j) \neq 0 \} \\
\incomingRightProfile{j} &=& \{i: \newTransName{r}(i,j) \neq 0 \} \\
\incomingM{q'} &=& \{ q:q \in \matchstates, \transweightfun{Q'_n}(q,q') \neq 0 \}\\
\incomingL{q'} &=& \{ q:q \in \leftemitstates, \transweightfun{Q'_n}(q,q') \neq 0 \}\\
\incomingR{q'} &=& \{ q:q \in \rightemitstates, \transweightfun{Q'_n}(q,q') \neq 0 \}
\end{eqnarray*}


\begin{algorithm}
  Initialize $Z(\startstate) \leftarrow 1$;
  \BlankLine
  \ForEach{$1 \leq \rightToI \leq N_r$}{ 
    \ForEach{$\qTo \in \{ q:type(q) = (S,S,S,I)\}$}{ 
      Let $(\leftProfTo,\rightProfTo)  = (\startstate, \profiledelete{\rightToI})$; \\
      $\addToDP{\qTo}{\leftProfTo}{\rightProfTo}$;
    }
  }

  \ForEach{$1 \leq \leftToI \leq N_l$}{ 
    \ForEach{ $1 \leq \rightToI \leq N_r$}{ 
      \If{$\envelope{\leftToI}{\rightToI}$  }{
      % q' is a match state.  
      \ForEach{$\qTo \in \matchstates $} { 
        Let $(\leftProfTo,\rightProfTo)  = (\profiledelete{\leftToI}, \profiledelete{\rightToI})$; \\
        $\addToDP{\qTo}{\leftProfTo}{\rightProfTo}$;
        }

      % q' is a left-emit state.  
      \ForEach{$\qTo \in \leftemitstates $} { 
        Let $(\leftProfTo, \rightProfTo) = (\profiledelete{\leftToI}, \profilewait{\rightToI})$; \\
        $\addToDP{\qTo}{\leftProfTo}{\rightProfTo}$;
    }
      % q' is a right-emit state.  
      \ForEach{$\qTo \in \rightemitstates$} { 
        \If{$type(\qTo) == (S,S,S,I)$}{continue}
        Let $\tau = \getprofiletype(\qTo, left)$; \\
        $(\leftProfTo, \rightProfTo) = (\profileunknown{\leftToI}, \profiledelete{\rightToI})$; \\
        $\addToDP{\qTo}{\leftProfTo}{\rightProfTo}$;
      }
      % q' is a wait state.  
      \ForEach{$\qTo \in \waitstates $} { 
        Let $(\leftProfTo, \rightProfTo) = (\profilewait{\leftToI}, \profilewait{\rightToI})$; \\
        $\addToDP{\qTo}{\leftProfTo}{\rightProfTo}$;
      }
      }
    }
  }
  % left emit, right side is profileterminate
  \ForEach{$1 \leq \leftToI \leq N_l$}{ 
    \ForEach{$\qTo \in \leftemitstates $} { 
      Let $(\leftProfTo, \rightProfTo) = (\profiledelete{\leftToI},  \profileterminate )$; \\
      $\addToDP{\qTo}{\leftProfTo}{\rightProfTo}$;
    }
  }
  % right emit, left side is profileterminate
    \ForEach{ $1 \leq \rightToI \leq N_r$}{ 
      \ForEach{$\qTo \in \rightemitstates $} { 
      Let $(\leftProfTo, \rightProfTo) = (\profileterminate, \profiledelete{\rightToI})$; \\
      $\addToDP{\qTo}{\leftProfTo}{\rightProfTo}$;
    }
  }
      

  % q' is a wait state and both profiles are in the terminating-wait state
  \ForEach{$\qTo \in \waitstates $} { 
    Let $(\leftProfTo, \rightProfTo) = (\profileterminate, \profileterminate)$; \\
    $\addToDP{\qTo}{\leftProfTo}{\rightProfTo}$;
  }


  % q' is the end state.  
  Initialize $Z(\laststate) \leftarrow 0$; \\
  \ForEach{$\qFrom \in \waitstates $ } {
    Let $\mFrom = (\qFrom,\profileterminate, \profileterminate)$; \\
    $Z(\laststate) \leftarrow Z(\laststate) + Z(\mFrom) \transweightfun{Q'_n}(\qFrom,\laststateof{Q'_n})$;
  }

\caption{\label{ForwardTransducerFull}
The full version of the analog of the Forward algorithm for transducer $M_n$.
Function \addToDPFunction(\ldots) is defined separately.
}
\end{algorithm}

% TODO: incorporate, or at least think carefully about:         \redpen{It's probably a good idea to pre-calculate as much as you can towards computing $\emitProb$, for example pre-multiplying the absorption profile of $E_n$ with the emission profile of each state of $Q_n$ (ask me for explanation if this isn't clear).} \\



\begin{function}
\KwIn{$(\qTo, \leftProfTo, \rightProfTo)$.}
\KwResult{The cell in $Z$ for $m=(\qTo, \leftProfTo, \rightProfTo)$ is filled.}
\BlankLine
Let $\mTo = (\qTo,\leftProfTo, \rightProfTo)$; \\
% Let $(\leftToI, \rightToI) = (\stateindex{E_l}{\leftProfTo}, \stateindex{E_r}{\rightProfTo})$\\
Let $\emitProb = \emitweightfun{M_n}(\epsilon,\epsilon,\mTo)$; \\

Initialize $Z(\mTo) \leftarrow \transweightfun{Q'_n}(\startstate,\qTo) \profTrans{l}(0,\leftToI) \profTrans{r}(0,\rightToI) \emitProb$; \\
\ForEach{ $\leftFromI \in \incomingLeftProfile{\leftToI} $}{
  \ForEach{ $\rightFromI \in \incomingRightProfile{\rightToI} $} {
      Let $(\leftProfFrom, \rightProfFrom) = (\profiledelete{\leftFromI}, \profiledelete{\rightFromI})$; \\
    % incoming q is also a match state.  both profiles are D states
    \ForEach{ $\qFrom \in \incomingM{\qTo} $ } {
      
      Let $\mFrom = (\qFrom,\leftProfFrom, \rightProfFrom)$; \\
      $Z(\mTo) \leftarrow Z(\mTo) + Z(\mFrom) \transweightfun{Q'_n}(\qFrom,\qTo) \profTrans{l}(\leftFromI,\leftToI)
      \profTrans{r}(\rightFromI,\rightToI) \emitProb$
     }
    }
}

  
\ForEach{ $\leftFromI \in \incomingLeftProfile{\leftToI} $}{
  \ForEach{ $\qFrom \in \incomingL{\qTo} $ } {
  % incoming q is a left-insert state.  l,r profiles are D,W states,respectively. Note we've exited loop rightFromI 
  %Note we've exited loop rightFromI and initiated a new leftFromI
    Let $(\leftProfFrom, \rightProfFrom) = (\profiledelete{\leftFromI}, \profilewait{\rightToI})$; \\
    Let $\mFrom =( \qFrom,\leftProfFrom, \rightProfFrom)$; \\
    $Z(\mTo) \leftarrow Z(\mTo) + Z(\mFrom) \transweightfun{Q'_n}(\qFrom,\qTo) \profTrans{l}(\leftFromI,\leftToI)\emitProb$;
  }    
}


\ForEach{ $\rightFromI \in \incomingRightProfile{\rightToI} $} {
 % incoming q is a right-insert state.  l,r profiles are W,D states,respectively. 
  %Note we've exited loop leftFromI and initiated a new rightFromI
  \ForEach{ $\qFrom \in \incomingR{\qTo} $ } {
    Let $\tau = \getprofiletype(\qFrom, left)$; \\
    Let $(\leftProfFrom, \rightProfFrom) = (\profileunknown{\leftToI}, \profiledelete{\rightFromI})$; \\
    Let $\mFrom = (\qFrom,\leftProfFrom, \rightProfFrom)$; \\
    $Z(\mTo) \leftarrow Z(\mTo) + Z(\mFrom) \transweightfun{Q'_n}(\qFrom,\qTo) \profTrans{r}(\leftFromI,\leftToI)\emitProb$;
  }
}


\label{fillZ}
  
\caption{\addToDPFunction()   used by Algorithm~\ref{ForwardTransducer}. }% $fillZ(\toQ,\leftProfTo, \rightProfTo$), called in } 

\end{function}

\subsubsection{Alignment envelopes}



Note that states $e \in \statesof{E_n}$ of the constrained-expanded model, as with states $g \in \statesof{G_n}$ of the expanded model,
can be associated with a vector of subsequence co-ordinates
(one subsequence co-ordinate for each leaf-sequence in the clade descended from node $n$).
For example, in our non-normal construction of $\Delta(S)$, the state $\phi \in \mathbb{Z}_{|S|+1}$ is itself the co-ordinate.
The co-ordinate information associated with $e_l$ and $e_r$ can, therefore, be used to define some sort of {\em alignment envelope}, as in (Holmes 2005).
For example, we could exclude $(e_l,e_r)$ pairs if they result in alignment cutpoints that are too far from the main diagonal of the sequence co-ordinate hypercube.  


Let the function $\envelope{i_l}{i_r}$ return true if the state-index pair $(i_l,i_r)$ is allowed by the alignment envelope, and false otherwise.
Further, assume that $Z(m)$ is a sparse container that returns 0 if $m=\mstate$ is not in the envelope.

\subsubsection{Traceback}


Sampling a path from $P(\pi|M_n)$ is analogous to stochastic traceback through the Forward matrix.  The basic traceback algorithm is presented in Algorithm~\ref{traceback.short}, and a more precise version is presented in Algorithm~\ref{traceback.full}.  

Let the function $\sample(set, weights)$ input two equal-length vectors and return a randomly-chosen element of $set$, such that  $set_i$ is sampled
with probability $\frac{weights_i}{sum(weights)}$.


\begin{algorithm}
\KwIn{Z}
\KwOut{A path  $\pi$ through $M_n$ sampled proportional to $\transweightfun{M_n}(\pi)$.}
\BlankLine
Initialize $\pi \leftarrow  (\laststateof{M_n})$ \\
Initialize $\currentstate \leftarrow \laststateof{M_n}$ \\
\BlankLine
\While { $\currentstate \neq \startstateof{M_n}$ } {
Let $K = \{ k \in M_n: \transweightfun{M_n}(k,\currentstate) \neq 0 \} $ \\
With probability $\frac{Z(\newstate)\transweightfun{M_n}(\newstate, \currentstate)}
{\sum_{k \in K} Z(k)\transweightfun{M_n}(k, \currentstate)}$, prepend $\newstate$ to $\pi$.\\
Set $\currentstate \leftarrow \newstate$
}
\KwRet{$\pi$}

\label{traceback.short}
\caption{Stochastic traceback for sampling paths through transducer $M_n$.} 
\end{algorithm}




\begin{algorithm}
\KwIn{Z}
\KwOut{A path  $\pi$ through $M_n$ sampled proportional to $\transweightfun{M_n}(\pi)$.}
\BlankLine
Initialize $\pi \leftarrow  (\laststateof{M_n})$ \\
Initialize $\currentstate \leftarrow \laststateof{M_n}$ \\
\BlankLine
\While { $\currentstate \neq \startstateof{M_n}$ } {
Initialize $\instates \leftarrow \emptyarray $ \\
Initialize $\inweights \leftarrow \emptyarray $ \\
Set $ (\qTo, \leftProfTo, \rightProfTo) \leftarrow \mTo $\\
% Set $ (\leftToI, \rightToI)  \leftarrow ( \stateindex{E_l}{\leftProfTo}, \stateindex{E_r}{\leftProfTo})$


\If {$\currentstate = \laststateof{M_n}$}{
\ForEach { $\qFrom \in \waitstates$ } {
  Add $(\qTo, \profileterminate, \profileterminate)$ to $\instates$ \\
  Add $Z((\qFrom, \profileterminate, \profileterminate))\transweightfun{Q'_n}(\qFrom, \laststateof{Q'_n})$ to $\inweights$\\
}
}
\Else{
  \If {$\transweightfun{Q'_n}(\startstateof{Q'_n}, qTo) \profTrans{l}(0, \leftToI)\profTrans{r}(0, \rightToI) \neq0$}{
    Add $(\startstateof{Q'_n}, \startstate, \startstate)$ to $\instates$ \\
    Add $\transweightfun{Q'_n}(\startstateof{Q'_n}, qTo) \profTrans{l}(0, \leftToI) \profTrans{r}(0, \rightToI)$ to $\inweights$\\
  }
  \ForEach{ $\leftFromI \in \incomingLeftProfile{\leftToI} $}{
    \ForEach{ $\rightFromI \in \incomingRightProfile{\rightToI} $} {
      Let $(\leftProfFrom, \rightProfFrom) = (\profiledelete{\leftFromI}, \profiledelete{\rightFromI})$; \\
      % q is a match state.  both profiles are D states                                                           
      \ForEach{ $\qFrom \in \incomingM{\qTo} $ } {
        
        Add $(\qFrom,\leftProfFrom, \rightProfFrom)$ to $\instates$; \\
        Add $Z((\qFrom, \leftProfFrom, \rightProfFrom))\transweightfun{Q'_n}(\qFrom,\qTo) \profTrans{l}(\leftFromI,\leftToI)\profTrans{r}(\rightFromI,\rightToI)$ to $\inweights$; \\  
      }
    }
  }
  

\ForEach{ $\leftFromI \in \incomingLeftProfile{\leftToI} $}{
    Let $(\leftProfFrom, \rightProfFrom) = (\profiledelete{\leftFromI}, \profilewait{\rightToI})$; \\
    \ForEach{ $\qFrom \in \incomingL{\qTo} $ } {
  % incoming q is a left-insert state.  l,r profiles are D,W states,respectively. Note we've exited loop rightFromI     
   Add $(\qFrom,\leftProfFrom, \rightProfFrom)$ to $\instates$; \\
   Add $Z((\qFrom, \leftProfFrom, \rightProfFrom))\transweightfun{Q'_n}(\qFrom,\qTo) \profTrans{l}(\leftFromI,\leftToI)$ to $\inweights$; \\  
   }
}

\ForEach{ $\rightFromI \in \incomingRightProfile{\rightToI} $} {
  Let $\tau = \getprofiletype(\qTo, left)$; \\
  Let $(\leftProfFrom, \rightProfFrom) = (\profileunknown{\leftToI}, \profiledelete{\rightFromI})$; \\
  % incoming q is a right-insert state.  l,r profiles are W,unknown states,respectively.           
    \ForEach{ $\qFrom \in \incomingR{\qTo} $ } {
    Add $(\qFrom,\leftProfFrom, \rightProfFrom)$ to $\instates$; \\
    Add $Z((\qFrom, \leftProfFrom, \rightProfFrom))\transweightfun{Q'_n}(\qFrom,\qTo) \profTrans{r}(\rightFromI,\rightToI)$ to $\inweights$; \\  
    }
  }
}
\BlankLine
Set $ \newstate \leftarrow \sample(\instates, \inweights)$\\
Prepend $\newstate$ to $\pi$\\
Set $\currentstate \leftarrow \newstate$\\
} %end of while loop


\KwRet{$\pi$}

\label{traceback.full}
\caption{Pseudocode for stochastic traceback for sampling paths through transducer $M_n$.} 
\end{algorithm}







\subsection{Explicit construction of profile $E_n$ from $M_n$, following DP}

Having sampled a set of paths through $M_n$, profile $E_n$ is constructed by applying a series of transformations.

Refer to the previous sections for definitions pertaining to $M_n$ and $E_n$.

\subsubsection{Transformation $M_n \to M'_n$: sampled paths}

Let $\Pi' \subseteq \Pi_{M'_n}$ be a set of complete paths through $M_n$,
corresponding to $K$ random samples from $P(\pi|M_n)$.

The state space of $M'_n$ is the union of all states used by the paths in  $\Pi_{M'_n}$

\[ 
\statesof{M'_n} = \bigcup_{\pi \in \Pi'} \bigcup_{i=1}^{|\pi|} \pi_i  
\]
where $\pi_i$ is the $i^{th}$ state in a path $\pi \in (\transitionsof{M_n})^\ast$.

The emission and transition weight functions for $M'_n$ are the same as those of $M_n$:

\begin{eqnarray*}
\transweightfun{M'_n}(m,m') & = & \transweightfun{M_n}(m,m') \\
\emitweightfun{M'_n}(\epsilon,\epsilon,m') & = & \emitweightfun{M_n}(\epsilon,\epsilon,m')
\end{eqnarray*}


\subsubsection{Transformation $M'_n \to E''_n$: stripping out the prior}

Let $E''_n$ be a the transducer constructed via removing the $R$ states from the states of $M'_n$.  

\[
\statesof{E''_n} = \{ \hstate : \exists \mstate \in M'_n \}
\]

The emission and transition weight functions for $E''_n$ are the same as those of $H_n$:

\begin{eqnarray*}
\transweightfun{E''_n}(e,e') & = & \transweightfun{H_n}(e,e') \\
\emitweightfun{E''_n}(\epsilon,\epsilon,e') & = & \emitweightfun{H_n}(\epsilon,\epsilon,e')
\end{eqnarray*}


\subsubsection{Transformation $E''_n \to E'_n$: eliminating null states}
Let $E'_n$ be the transducer derived from $E''_n$ by marginalizing its null states.  

The state space of $E'_n$ is the set of non-null states in $E''_n$:

\[
\statesof{E'_n} = \{ e : e \in E''_n, type(e) \in \{S,E,D,W\} \}
\]

The transition weight function is the same as that of $H_n$ (and also $E''_n$) with paths through null states marginalized.
Let 
%this is similar to the \sumoverpaths definition, but not quite...so I introduced the \pi_transducer(state1, state1) set


\begin{eqnarray*}
\pi_{ {E''_n}(e,e')} &  =  & \{  \pi: \pi_1 =e, \pi_{|\pi|}=e', type(\pi_i) = N\ \forall\ i: 2 \leq i < |\pi| \} \\
 \transweightfun{E'_n}(e,e') & = & \sum_{\pi \in \pi_{E''_n(e,e') }} \transweightfun{H_n}(\pi) 
\end{eqnarray*}

The transition weight function resulting from summing over null states in $E''_n$ can be done with a preorder traversal of the state graph, outlined in Algorithm~\ref{removeNullStates}.   The $stack$ data structure has operations $stack.push(e)$, which adds $e$ to the top of the stack, and $stack.pop()$, which removes and returns the top element of the stack.  The $weights$ container maps states in $\statesof{E''_n}$ to real-valued weights.  

\newcommand\newTransHash{t_{new}}
\begin{algorithm}
\KwIn{$\statesof{E''_n}, \transweightfun{E''_n}$}
\KwOut{$\transweightfun{E'_n}$}
Let $\statesof{E'_n} = \{ e : e \in E''_n, type(e) \in \{S,E,D,W\} \}$\\
Initialize $\newTransHash(e,e') = 0$ $ \forall (e,e') \in \statesof{E'_n}$\\
\ForEach { $source\_state \in \statesof{E''_n}$}{
Initialize $stack = [source\_state]$\\
Initialize $weights[source\_state] = 0$\\


\BlankLine

\While{$stack \neq [] $}{

Set $e=stack.pop()$\\
\ForEach {$e' \in \{e' \in \statesof{E''_n}: \transweightfun{E''_n}(e,e') \neq 0\}$ }{

\If{ $type(e') \neq N$}{ 
  $\newTransHash(source, e') += weights[e]\cdot  \transweightfun{E''_n}(e,e')$\\
}
\Else{
$stack.push(e')$\\
$weights[e'] = weight[e] \cdot \transweightfun{E''_n}(e,e')$\\
}

}  %end e' loop
}  %end while
}  %end source_state loop
\KwRet{ $\transweightfun{E'_n}(e,e') \equiv \newTransHash(e,e')$ $\forall (e,e') \in \statesof{E'_n}$}
\label{removeNullStates}
\caption{Pseudocode for transforming the transition weight function of $E''_n$ into that of $E'_n$ via summing over null state paths.  } 
\end{algorithm}


Besides the states $\{ \startstate, \laststate, \profileterminate \}$, the remaining states in $E'_n$ are of type $D$, which we now index in ascending topological order: 

\[
\statesof{E'_n} = \{ \startstate, \laststate , \profileterminate \} \cup  \{\profiledelete{n} : 1 \leq n \leq \numStates{n} \}
\]

where $ 1\leq i,j\leq \numStates{n}$,

\begin{eqnarray*}
\newTransName{n}(i,j) & \equiv & \transweightfun{E'_n}(\profiledelete{i},\profiledelete{j}) \\
\newTransName{n}(0,j) & \equiv & \transweightfun{E'_n}(\startstate,\profiledelete{j})  \\
\newTransName{n}(i,\numStates{n}+1) & \equiv & \transweightfun{E'_n}(\profiledelete{i}, \profileterminate)  \\
% Commented out this last line as it seems redundant and a bit confusing - IH 4/26/2010
% \newTransName{n}(\numStates{n}+1, \numStates{n}+2) & \equiv & \transweightfun{E'_n}(\profileterminate, \laststate) \equiv 1  \\
\end{eqnarray*}




\subsubsection{Transformation $E'_n \to E_n$: adding wait states}


Let $E_n$ be the transducer derived from transforming $E'_n$ into strict normal form.  Since $N$ states were removed in the transformation from $E''_n \rightarrow E'_n$, we need only add $W$ states before each $D$ state:

\[
\statesof{E_n} = \{ \startstate, \laststate , \profileterminate \} \cup  \{\profiledelete{n} : 1 \leq n \leq N \}
\cup  \{\profilewait{n} : 1 \leq n \leq N \}
\]

% We also define the reverse (degenerate) mapping of states to indices:
% 
% \[ 
% \stateindex{E_n}{e} =  \left\{
% \begin{array}{ll}
% \displaystyle
% 0  & \mbox{if $e = \startstate$ }\\
% i & \mbox{if $e \in \{ \profiledelete{i},  \profilewait{i} \} $} \\
% N_n+1  & \mbox{if $e = \profileterminate$ }\\
% N_n+2  & \mbox{if $e = \laststate$ }\\
% \end{array}
% \right.
% \]

 




Note the following correspondences between the previously-defined  $\transweightfun{E_n}(e,e')$ and new notation. 

\begin{eqnarray*}
\transweightfun{E_n}(\profiledelete{i},\profilewait{j}) & = & \newTransName{n}(i,j) \\
\transweightfun{E_n}(\profilewait{j},\profiledelete{j}) & = & 1 \\
\transviawait{E_n}(\profiledelete{i},\profiledelete{j}) & = & \newTransName{n}(i,j) \\
\transtowait{E_n}(\profiledelete{i},\profilewait{j}) & = & \newTransName{n}(i,j) \\
\transtowait{E_n}(\profilewait{i},\profilewait{j}) & = & \delta(i=j) \\
\end{eqnarray*}




\section{Appendix: additional notation}
This section defines commonplace notation used earlier in the document.
\subsection{Sequences and alignments}
{\em Sequence notation:}
Let $\oplus$ denote sequence concatenation and $\Omega^\ast$ the set of sequences over $\Omega$, including the empty sequence $\epsilon$, and $\Omega$ itself.

{\em Gapped-pair alignment:}
Let $a,b,c\ldots \in \gappedpair{1}{2}$ represent single columns of a gapped pairwise alignment;
then $(a \oplus b \oplus c \oplus \ldots) \in \left(\gappedpair{1}{2}\right)^\ast$ is a gapped pairwise alignment
(in the form of a sequence of alignment columns).

{\em Homomorphism from alignment to sequences:}
The functions $S_k:\left(\gappedpair{1}{2}\right)^\ast \to \Omega_k^\ast$ return the individual sequences of a pairwise alignment, with gaps removed
\[
S_k(x) = \bigoplus_{(\omega_1,\omega_2) \in x} \omega_k, \quad k \in \{1,2\}
\]

{\em Input and output sequences:}
For a transition path $\pi \in (\States \times \gappedalphabet{I} \times \gappedalphabet{O} \times \States)^\ast$,
define the input and output sequences
$S_I:\Pi \to \Omega_I^\ast$ and
$S_O:\Pi \to \Omega_O^\ast$
\begin{eqnarray*}
S_I(\pi) & = & \bigoplus_{(\phi_1,\omega_I,\omega_O,\phi_2) \in \pi} \omega_I \\
S_O(\pi) & = & \bigoplus_{(\phi_1,\omega_I,\omega_O,\phi_2) \in \pi} \omega_O
\end{eqnarray*}

\end{document}

Questions? Mail ihh at fruitfly dot org
ViewVC Help
Powered by ViewVC 1.0.3