% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/kanjistat-package.R
\docType{data}
\name{kanjidata}
\alias{kanjidata}
\alias{kbase}
\alias{kmorph}
\title{Data on kanji}
\format{
kbase is a tibble with 13,108 rows and 13 variables:
\describe{
\item{kanji}{the kanji}
\item{unicode}{the Unicode codepoint}
\item{strokes}{the number of strokes}
\item{class}{one of four classes: "kyouiku", "jouyou", "jinmeiyou" or "hyougai"}
\item{grade}{a number from 1-11, basically a finer version of class, same as in KANJIDIC2,
except that we assgined an 11 for all hyougaiji (rather than an NA value)}
\item{kanken}{at what level the kanji appears in the Nihon Kanji Nouryoku Kentei (Kanken)}
\item{jlpt}{at what level the kanji appears in the Japanese Language Proficiency Test
(Nihongou Nouryoku Shiken)}
\item{wanikani}{at what level the kanji is learned on the kanji learning website Wanikani}
\item{frank}{the frequency rank (1 = most frequent) "based on several averages (Wikipedia,
novels, newspapers, ...)"}
\item{frank_news}{the frequency rank (1 = most frequent) based on news paper data (2501
most frequent kanji over four years in the Mainichi Shimbun)}
\item{read_on, read_kun}{a single ON reading in katakana}
\item{read_kun}{a single kun reading in hiragana}
\item{mean}{a single English meaning of the kanji}
}

kmorph is a tibble with 13,108 rows and 15 variables:
\describe{
\item{kanji}{the kanji}
\item{strokes}{the number of strokes}
\item{radical}{the traditional (Kangxi) radical used for indexing kanji (one of 214)}
\item{radvar}{the variant of the radical if it is different, otherwise \code{NA}}
\item{nelson_c}{the Nelson radical if it differs from the traditional one, otherwise \code{NA}}
\item{idc}{ideographic description character (plus sometimes a number or a letter) describing
the shape of the kanji}
\item{components}{visible components of the kanji; originally from KRADFILE}
\item{skip}{the kanji's SKIP code}
\item{mean}{a single English meaning of the kanji (same as in kbase)}
}
}
\source{
Most of the data is directly from the KANJIDIC2 file.
\url{https://www.edrdg.org/wiki/index.php/KANJIDIC_Project}\cr
Variables \code{jlpt}, \code{frank}, \code{idc}, \code{components} were taken from the Kanjium data base
\url{https://github.com/mifunetoshiro/kanjium}\cr
Variable \code{components} is originally from RADKFILE/KRADFILE.
\url{https://www.edrdg.org/})

The use of this data is covered in each case by a Creative Commons BY-SA 4.0 License.
See the package's LICENSE file for details and copyright holders.

Variable "class" is derived from "grade".\cr
Variable "kanken" was compiled based on the Wikipedia description of the test levels (as of September 2022).
}
\usage{
kbase

kmorph
}
\description{
The tibbles kbase and kmorph provide basic and morphologic information, respectively, for
all kanji contained in the KANJIDIC2 file (see below)
}
\details{
The single ON and kun readings and the single meaning are for easy identification
of the more difficult kanji. They are the first entry in the KANJIDIC2 file which may not
always be the most important one. For full readings/meanings use the function \code{\link{lookup}}
or consult a dictionary.
}
\keyword{datasets}
