% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/seededlda.R
\name{textmodel_seededlda}
\alias{textmodel_seededlda}
\title{Semisupervised Latent Dirichlet allocation}
\usage{
textmodel_seededlda(
  x,
  dictionary,
  valuetype = c("glob", "regex", "fixed"),
  case_insensitive = TRUE,
  residual = 0,
  weight = 0.01,
  uniform = TRUE,
  max_iter = 2000,
  alpha = 0.5,
  beta = 0.1,
  gamma = 0,
  ...,
  verbose = quanteda_options("verbose")
)
}
\arguments{
\item{x}{the dfm on which the model will be fit.}

\item{dictionary}{a \code{\link[quanteda:dictionary]{quanteda::dictionary()}} with seed words that define
topics.}

\item{valuetype}{see \link[quanteda:valuetype]{quanteda::valuetype}}

\item{case_insensitive}{see \link[quanteda:valuetype]{quanteda::valuetype}}

\item{residual}{the number of undefined topics. They are named "other" by
default, but it can be changed via \code{base::options(slda_residual_name)}.}

\item{weight}{determines the size of pseudo counts given to matched seed
words.}

\item{uniform}{if \code{FALSE}, adjusts the weights of seed words to make their
total amount equal across topics.}

\item{max_iter}{the maximum number of iteration in Gibbs sampling.}

\item{alpha}{the value to smooth topic-document distribution.}

\item{beta}{the value to smooth topic-word distribution.}

\item{gamma}{a parameter to determine change of topics between sentences or
paragraphs. When \code{gamma > 0}, Gibbs sampling of topics for the current
document is affected by the previous document's topics.}

\item{...}{passed to \link[quanteda:dfm_trim]{quanteda::dfm_trim} to restrict seed words based on
their term or document frequency. This is useful when glob patterns in the
dictionary match too many words.}

\item{verbose}{logical; if \code{TRUE} print diagnostic information during
fitting.}
}
\description{
Implements semisupervised Latent Dirichlet allocation
(Seeded LDA). \code{textmodel_seededlda()} allows users to specify
topics using a seed word dictionary. Users can run Seeded Sequential LDA by
setting \code{gamma > 0}.
}
\examples{
\donttest{
require(seededlda)
require(quanteda)

corp <- head(data_corpus_moviereviews, 500)
toks <- tokens(corp, remove_punct = TRUE, remove_symbols = TRUE, remove_number = TRUE)
dfmt <- dfm(toks) \%>\%
    dfm_remove(stopwords('en'), min_nchar = 2) \%>\%
    dfm_trim(min_termfreq = 0.90, termfreq_type = "quantile",
             max_docfreq = 0.1, docfreq_type = "prop")

dict <- dictionary(list(people = c("family", "couple", "kids"),
                        space = c("alien", "planet", "space"),
                        moster = c("monster*", "ghost*", "zombie*"),
                        war = c("war", "soldier*", "tanks"),
                        crime = c("crime*", "murder", "killer")))
slda <- textmodel_seededlda(dfmt, dict, residual = TRUE, min_termfreq = 10)
terms(slda)
topics(slda)

}
}
\references{
Lu, Bin et al. (2011). "Multi-aspect Sentiment Analysis with
Topic Models". doi:10.5555/2117693.2119585. \emph{Proceedings of the 2011 IEEE
11th International Conference on Data Mining Workshops}.

Watanabe, Kohei & Zhou, Yuan (2020). "Theory-Driven Analysis of Large
Corpora: Semisupervised Topic Classification of the UN Speeches".
doi:10.1177/0894439320907027. \emph{Social Science Computer Review}.

Watanabe, Kohei & Baturo, Alexander. (forthcoming). "Seeded Sequential LDA:
A Semi-supervised Algorithm for Topic-specific Analysis of Sentences".
\emph{Social Science Computer Review}.
}
