% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/extractInfo.R
\name{extractInfo}
\alias{extractInfo}
\title{Extract information from raw data}
\usage{
extractInfo(trainpath, test)
}
\arguments{
\item{trainpath}{train dataset path}

\item{test}{test dataset(list) including fam, bed, bim(generated from plink files, plink2R::read_plink is recommended)}
}
\value{
A list including processed training data (train) and testing data (bed, bim, fam)
}
\description{
The first step of the algorithm, to clean the dataset and extract information from raw data.
(Please notice that there are some requirements for the training and testing datasets.)
}
\details{
The raw training data should be a file with
8 columns including CHROM, POS, A1, A2, OR, P, SNP, N in order.
The CHROM column should only be a number from 1 to 22. The SNP column
is the rsid number.

"test" file can be generated from read_plink("test_plink_file")
The raw testing data could be the files transformed from plink2R (using plink bfiles).

test is a list including fam (6 columns with information on samples), bim (6 columns with information on SNPs), bed (genotypes 0, 1, 2)
}
\references{
Song, S., Jiang, W., Hou, L. and Zhao, H. Leveraging effect size distributions to improve polygenic risk scores derived from genome-wide association studies. \emph{Submitted}.
}
\seealso{
\url{https://github.com/gabraham/plink2R}
}
\author{
Shuang Song, Wei Jiang, Lin Hou and Hongyu Zhao
}
