% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/rm_citation.R
\name{rm_citation}
\alias{rm_citation}
\title{Remove/Replace/Extract Citations}
\usage{
rm_citation(text.var, trim = !extract, clean = TRUE,
  pattern = "@rm_citation", replacement = "", extract = FALSE,
  dictionary = getOption("regex.library"), ...)
}
\arguments{
\item{text.var}{The text variable.}

\item{trim}{logical.  If \code{TRUE} removes leading and trailing white
spaces.}

\item{clean}{trim logical.  If \code{TRUE} extra white spaces and escaped
character will be removed.}

\item{pattern}{A character string containing a regular expression (or
character string for \code{fixed = TRUE}) to be matched in the given
character vector (see \bold{Details} for additional information).  Default,
\code{@rm_citation} uses the \code{rm_citation} regex from the regular
expression dictionary from the \code{dictionary} argument.}

\item{replacement}{Replacement for matched \code{pattern}.}

\item{extract}{logical.  If \code{TRUE} the dates are extracted into a
list of vectors.}

\item{dictionary}{A dictionary of canned regular expressions to search within
if \code{pattern} begins with \code{"@rm_"}.}

\item{\dots}{Ignored.}
}
\value{
Returns a character string with citations removed.
}
\description{
Remove/replace/extract APA6 style citations from a string.
}
\details{
The default regular expression used by \code{rm_citation} finds
in-text and parenthetical citations.  This behavior can be altered by using a
secondary regular expression from the \code{\link[qdapRegex]{regex_usa}}
data (or other dictionary) via (\code{pattern = "@rm_citation2"} or
\code{pattern = "@rm_citation3"}). See \bold{Examples} for example usage.
}
\examples{
## All Citations
x <- c("Hello World (V. Raptor, 1986) bye",
    "Narcissism is not dead (Rinker, 2014)",
    "The R Core Team (2014) has many members.",
    paste("Bunn (2005) said, \\"As for elegance, R is refined, tasteful, and",
        "beautiful. When I grow up, I want to marry R.\\""),
    "It is wrong to blame ANY tool for our own shortcomings (Baer, 2005).",
    "Wickham's (in press) Tidy Data should be out soon.",
    "Rinker's (n.d.) dissertation not so much.",
    "I always consult xkcd comics for guidance (Foo, 2012; Bar, 2014).",
    "Uwe Ligges (2007) says, \\"RAM is cheap and thinking hurts\\""
)

rm_citation(x)
rm_citation(x, extract=TRUE)
rm_citation(x, replacement="[CITATION HERE]")
\dontrun{
qdapTools::vect2df(sort(table(unlist(rm_citation(x, extract=TRUE)))),
    "citation", "count")
}

## In-Text
rm_citation(x, extract=TRUE, pattern="@rm_citation2")

## Parenthetical
rm_citation(x, extract=TRUE, pattern="@rm_citation3")

\dontrun{
## Mining Citation
url_dl("http://umlreading.weebly.com/uploads/2/5/2/5/25253346/whole_language_timeline-updated.docx")

(txt <- read_docx("whole_language_timeline-updated.docx"))

library(qdapTools); library(ggplot2); library(qdap)
txt <- rm_non_ascii(txt)

parts <- split_vector(txt, split = "References", include = TRUE, regex=TRUE)

parts[[1]]

rm_citation(unbag(parts[[1]]), extract=TRUE)[[1]]

## By line
rm_citation(parts[[1]], extract=TRUE)

## Frequency
left_just(cites <- list2df(sort(table(rm_citation(unbag(parts[[1]]),
    extract=TRUE)), T), "freq", "citation")[2:1])

## Distribution of citations (find locations and then plot)
cite_locs <- do.call(rbind, lapply(cites[[1]], function(x){
    m <- gregexpr(x, unbag(parts[[1]]), fixed=TRUE)
    data.frame(
        citation=x,
        start = m[[1]] -5,
        end =  m[[1]] + 5 + attributes(m[[1]])[["match.length"]]
    )
}))

ggplot(cite_locs) +
    geom_segment(aes(x=start, xend=end, y=citation, yend=citation), size=3,
        color="yellow") +
    xlab("Duration") +
    scale_x_continuous(expand = c(0,0),
        limits = c(0, nchar(unbag(parts[[1]])) + 25)) +
    theme_grey() +
    theme(
        panel.grid.major=element_line(color="grey20"),
        panel.grid.minor=element_line(color="grey20"),
        plot.background = element_rect(fill="black"),
        panel.background = element_rect(fill="black"),
        panel.border = element_rect(colour = "grey50", fill=NA, size=1),
        axis.text=element_text(color="grey50"),
        axis.title=element_text(color="grey50")
    )
}
}
\seealso{
\code{\link[base]{gsub}},
\code{\link[stringi]{stri_extract_all_regex}}

Other rm_.functions: \code{\link{as_numeric}},
  \code{\link{as_numeric2}}, \code{\link{rm_number}};
  \code{\link{as_time}}, \code{\link{as_time2}},
  \code{\link{rm_time}}, \code{\link{rm_transcript_time}};
  \code{\link{rm_abbreviation}}; \code{\link{rm_angle}},
  \code{\link{rm_bracket}},
  \code{\link{rm_bracket_multiple}},
  \code{\link{rm_curly}}, \code{\link{rm_round}},
  \code{\link{rm_square}}; \code{\link{rm_between}},
  \code{\link{rm_between_multiple}};
  \code{\link{rm_caps_phrase}}; \code{\link{rm_caps}};
  \code{\link{rm_citation_tex}};
  \code{\link{rm_city_state_zip}};
  \code{\link{rm_city_state}}; \code{\link{rm_date}};
  \code{\link{rm_default}}; \code{\link{rm_dollar}};
  \code{\link{rm_email}}; \code{\link{rm_emoticon}};
  \code{\link{rm_endmark}}; \code{\link{rm_hash}};
  \code{\link{rm_nchar_words}}; \code{\link{rm_non_ascii}};
  \code{\link{rm_non_words}}; \code{\link{rm_percent}};
  \code{\link{rm_phone}}; \code{\link{rm_postal_code}};
  \code{\link{rm_repeated_characters}};
  \code{\link{rm_repeated_phrases}};
  \code{\link{rm_repeated_words}}; \code{\link{rm_tag}};
  \code{\link{rm_title_name}};
  \code{\link{rm_twitter_url}}, \code{\link{rm_url}};
  \code{\link{rm_white}}, \code{\link{rm_white_bracket}},
  \code{\link{rm_white_colon}},
  \code{\link{rm_white_comma}},
  \code{\link{rm_white_endmark}},
  \code{\link{rm_white_lead}},
  \code{\link{rm_white_lead_trail}},
  \code{\link{rm_white_multiple}},
  \code{\link{rm_white_punctuation}},
  \code{\link{rm_white_trail}}; \code{\link{rm_zip}}
}
\keyword{citation}

