% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dmm.R
\name{fitdmm}
\alias{fitdmm}
\title{Point by point estimates of a k-th order drifting Markov Model}
\usage{
fitdmm(
  sequences,
  order,
  degree,
  states,
  init.estim = c("mle", "freq", "prod", "stationary", "unif"),
  fit.method = c("sum"),
  ncpu = 2
)
}
\arguments{
\item{sequences}{A list of character vector(s) representing one (several) sequence(s)}

\item{order}{Order of the Markov chain}

\item{degree}{Degree of the polynomials (e.g., linear drifting if \code{degree}=1, etc.)}

\item{states}{Vector of states space of length s > 1}

\item{init.estim}{Default="mle". Method used to estimate the initial law.
If \code{init.estim} = "mle", then the classical Maximum Likelihood Estimator
is used, if \code{init.estim} = "freq", then, the initial distribution \code{init.estim}
is estimated by taking the frequences of the words of length k for all
sequences. If \code{init.estim} = "prod", then, \code{init.estim} is estimated by using
the product of the frequences of each letter (for all the sequences) in
the word of length k. If \code{init.estim} = "stationary", then \code{init.estim} is
estimated by using the stationary law of the point of support transition
matrices of each letter. If \code{init.estim} = "unif",
then, \code{init.estim} of each letter is estimated by using \eqn{\frac{1}{s}}. Or
`init.estim`= customisable vector of length \eqn{|E|^k}. See Details for the formulas.}

\item{fit.method}{If \code{sequences} is a list of several character vectors of the same length,
the usual LSE over the sample paths is proposed when \code{fit.method}="sum" (a list of a single character vector
is its special case).}

\item{ncpu}{Default=2. Represents the number of cores used to parallelized computation. If ncpu=-1, then it uses all available cores.}
}
\value{
An object of class \code{dmm}
}
\description{
Estimation of d+1 points of support transition matrices and \eqn{|E|^{k}} initial law of a k-th
  order drifting Markov Model starting from one or several sequences.
}
\details{
The \link[drimmR]{fitdmm} function creates a drifting Markov model object \code{dmm}.

Let \eqn{E={1,\ldots, s}}, s < \eqn{\infty} be random system with finite state space,
with a time evolution governed by discrete-time stochastic process of values in \eqn{E}.
A sequence \eqn{X_0, X_1, \ldots, X_n} with state space \eqn{E= {1, 2, \ldots, s}} is said to be a
linear drifting Markov chain (of order 1) of length \eqn{n} between the Markov transition matrices
\eqn{\Pi_0} and  \eqn{\Pi_1} if the distribution of \eqn{X_t}, \eqn{t = 1, \ldots, n}, is defined by
\eqn{P(X_t=v \mid X_{t-1}	= u, X_{t-2}, \ldots ) = \Pi_{\frac{t}{n}}(u, v), ; u, v \in E}, where
\eqn{\Pi_{\frac{t}{n}}(u, v) = ( 1 - \frac{t}{n}) \Pi_0(u, v) + \frac{t}{n} \Pi_1(u, v), \; u, v \in E}.
The linear drifting Markov model of order \eqn{1} can be generalized to polynomial drifting Markov model of
order \eqn{k} and degree \eqn{d}.Let \eqn{\Pi_{\frac{i}{d}} = (\Pi_{\frac{i}{d}}(u_1, \dots, u_k, v))_{u_1, \dots, u_k,v \in E}}
be \eqn{d} Markov transition matrices (of order \eqn{k}) over a state space \eqn{E}.


The estimation of DMMs is carried out for 4 different types of data :
\describe{
   \item{One can observe one sample path :}{It is denoted by \eqn{H(m,n):= (X_0,X_1, \ldots,X_{m})},
    where m denotes the length of the sample path and \eqn{n} the length of the drifting Markov chain.
    Two cases can be considered: \enumerate{
    \item m=n (a complete sample path),
    \item m < n (an incomplete sample path).}}
    \item{One can also observe \eqn{H} i.i.d. sample paths :}{It is denoted by \eqn{H_i(m_i,n_i), i=1, \ldots, H}.
     Two cases cases are considered : \enumerate{
    \item \eqn{m_i=n_i=n \forall i=1, \ldots, H} (complete sample paths of drifting Markov chains of the same length),
    \item \eqn{n_i=n  \forall i=1, \ldots, H} (incomplete sample paths of drifting Markov chains of the same length).
    In this case, an usual LSE over the sample paths is used.}}
 }


 The initial distribution of a k-th order drifting Markov Model is defined as
 \eqn{\mu_i = P(X_1 = i)}. The initial distribution of the k first letters is freely
 customisable by the user, but five methods are proposed for the estimation
 of the latter :
 \describe{
   \item{Estimation based on the Maximum Likelihood Estimator:}{
     The Maximum Likelihood Estimator for the initial distribution. The
     formula is: \eqn{\widehat{\mu_i} = \frac{Nstart_i}{L}}, where
     \eqn{Nstart_i} is the number of occurences of the word \eqn{i} (of
     length \eqn{k}) at the beginning of each sequence and \eqn{L} is the
     number of sequences. This estimator is reliable when the number of
     sequences \eqn{L} is high.}
   \item{Estimation based on the frequency:}{The initial distribution is
     estimated by taking the frequences of the words of length k for all
     sequences. The formula is \eqn{\widehat{\mu_i} = \frac{N_i}{N}}, where
     \eqn{N_i} is the number of occurences of the word \eqn{i} (of length \eqn{k})
     in the sequences and \eqn{N} is the sum of the lengths of the sequences.}
   \item{Estimation based on the product of the frequences of each state:}{
     The initial distribution is estimated by using the product of the
     frequences of each state (for all the sequences) in the word of length
     \eqn{k}.}
      \item{Estimation based on the stationary law of point of support
      transition matrix for a word of length k :}{
     The initial distribution is estimated using \eqn{\mu(\Pi_{\frac{k-1}{n}})
     }}
      \item{Estimation based on the uniform law :}{
      \eqn{\frac{1}{s}}}
 }
}
\examples{
data(lambda, package = "drimmR")
states <- c("a","c","g","t")
order <- 1
degree <- 1
fitdmm(lambda,order,degree,states, init.estim = "freq",fit.method="sum")
}
\references{
\insertRef{BaVe2018}{drimmR}
\insertRef{Ver08}{drimmR}
}
\author{
Geoffray Brelurut, Alexandre Seiller
}
