\name{guess_substr_info}
\alias{guess_substr_info}
\title{Look Up Translations for All Substrings of a Sumerian Text}
\description{
Converts a Sumerian text string into cuneiform tokens, generates all contiguous substrings, and looks up the most frequent translation for each substring in one or more dictionaries.
}
\usage{
guess_substr_info(x, dic, mapping = NULL)
}
\arguments{
  \item{x}{A character string of length 1 containing Sumerian text (transliteration, sign names, or cuneiform characters). May contain brackets as used by \code{\link{skeleton}}.}
  \item{dic}{A dictionary, a list of dictionaries, or a character vector of file paths to dictionary files. If file paths are given, each file is loaded with \code{\link{read_dictionary}}. Dictionaries are tried in order: the first dictionary that contains a translation for a given substring wins.}
  \item{mapping}{A data frame containing the sign mapping table with columns \code{syllables}, \code{name}, and \code{cuneiform}. If \code{NULL} (the default), the package's internal mapping file \file{etcsl_mapping.txt} is loaded.}
}
\details{
The function performs the following steps:

\enumerate{
  \item If \code{dic} is a character vector of file paths, the dictionaries are loaded with \code{\link{read_dictionary}}. If \code{dic} is a single data frame, it is wrapped in a list.
  \item The input string \code{x} is converted to cuneiform with \code{\link{as.cuneiform}} and split into individual tokens with \code{\link{split_sumerian}}.
  \item A data frame of all contiguous substrings is created with \code{\link{init_substr_info}}.
  \item A \code{sign_name} column is added by converting each substring expression with \code{\link{as.sign_name}}.
  \item For each substring, the dictionaries are searched in order. The most frequent translation (highest \code{count} among rows with \code{row_type == "trans."}) from the first dictionary that contains a match is used to fill in the \code{type} and \code{translation} columns.
}
}
\value{
A data frame with one row per substring and the following columns:
  \item{start}{Integer. The token position of the first token in the substring (1-based).}
  \item{n_tokens}{Integer. The number of tokens in the substring.}
  \item{expr}{Character. The concatenated cuneiform tokens of the substring.}
  \item{type}{Character. The grammatical type of the most frequent translation (e.g. \code{"S"}, \code{"V"}), or \code{""} if no translation was found.}
  \item{translation}{Character. The most frequent translation from the dictionaries, or \code{""} if no translation was found.}
  \item{sign_name}{Character. The sign name representation of the substring.}

The rows are ordered as in \code{\link{init_substr_info}} (by \code{n_tokens} descending, then \code{start} ascending), so that row indices can be computed with \code{\link{substr_position}}.
}
\seealso{
\code{\link{init_substr_info}} for creating the substring data frame,
\code{\link{substr_position}} for computing row indices,
\code{\link{read_dictionary}} for loading dictionaries,
\code{\link{look_up}} for interactive dictionary lookup,
\code{\link{skeleton}} for creating translation templates
}
\examples{
# Load the built-in dictionary
dic <- read_dictionary()

# Look up translations for all substrings
x <- "lugal kur-ra-ke4"
df <- guess_substr_info(x, dic)

# Show rows that have a translation
df[df$translation != "", ]

# Use multiple dictionaries (ordered by reliability -> first match wins)
file1 <- system.file("extdata", "sumer-dictionary.txt", package = "sumer")
df <- guess_substr_info(x, file1)

}
\keyword{utilities}
\keyword{character}
