## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup, echo = FALSE------------------------------------------------------
library(BinaryDosage)

## ----files--------------------------------------------------------------------
vcf_file   <- system.file("extdata", "set1a.vcf.gz", package = "BinaryDosage")
bd4_file   <- system.file("extdata", "vcf1a.bdose",  package = "BinaryDosage")
bdose_file <- file.path(tempdir(), "set1a.bdose")

## ----convert, eval = TRUE, echo = TRUE, message = FALSE, warning = FALSE------
if (requireNamespace("vcfppR", quietly = TRUE)) {
  vcftobd(vcffile = vcf_file, bdose_file = bdose_file)
} else {
  updatebd(bdfiles = bd4_file, bdose_file = bdose_file)
}

## ----convert_region, eval = F, echo = T---------------------------------------
# vcftobd(vcffile    = vcf_file,
#          bdose_file = bdose_file,
#          region     = "1:10000-15000")

## ----load_info, eval = TRUE, echo = T, message = F, warning = F---------------
bd5 <- getbdinfo(bdose_file)

## ----meta, eval = TRUE, echo = T----------------------------------------------
cat("Class:          ", class(bd5), "\n")
cat("Uses family ID: ", bd5$usesfid, "\n")
cat("One chromosome: ", bd5$onechr, "\n")
cat("SNP ID format:  ", bd5$snpidformat, "\n")
cat("Format:         ", bd5$additionalinfo$format, "\n")
cat("Header size:    ", bd5$additionalinfo$headersize, "bytes\n")

## ----samples, eval = TRUE, echo = T-------------------------------------------
cat("Number of samples:", nrow(bd5$samples), "\n")
knitr::kable(head(bd5$samples, 5), caption = "First 5 samples")

## ----snps, eval = TRUE, echo = T----------------------------------------------
cat("Number of SNPs:", nrow(bd5$snps), "\n")
knitr::kable(bd5$snps, caption = "SNP table")

## ----indices, eval = TRUE, echo = T-------------------------------------------
cat("Indices (bytes):", bd5$indices, "\n")

## ----snp_by_index, eval = TRUE, echo = T, message = F, warning = F------------
snp1 <- getbd5snp(bd5info = bd5, snp = 1L)

snp1_df <- data.frame(
  SampleID = bd5$samples$sid,
  Dosage   = round(snp1$dosage, 4),
  P_00     = round(snp1$p0,     4),
  P_01     = round(snp1$p1,     4),
  P_11     = round(snp1$p2,     4)
)

knitr::kable(head(snp1_df, 10),
             caption = paste("SNP", bd5$snps$snpid[1], "— first 10 subjects"))

## ----snp_by_id, eval = TRUE, echo = T, message = F, warning = F---------------
snp_id <- "1:12000:T:C"
snp3   <- getbd5snp(bd5info = bd5, snp = snp_id)

snp3_df <- data.frame(
  SampleID = bd5$samples$sid,
  Dosage   = round(snp3$dosage, 4),
  P_00     = round(snp3$p0,     4),
  P_01     = round(snp3$p1,     4),
  P_11     = round(snp3$p2,     4)
)

knitr::kable(head(snp3_df, 10),
             caption = paste("SNP", snp_id, "— first 10 subjects"))

## ----apply_bdapply, eval = TRUE, echo = T, message = F, warning = F-----------
getaaf <- function(dosage, p0, p1, p2) mean(dosage, na.rm = TRUE) / 2

aaf_list  <- bdapply(bdinfo = bd5, func = getaaf)
aaf_table <- data.frame(snpid = bd5$snps$snpid,
                        aaf   = round(unlist(aaf_list), 4))

knitr::kable(aaf_table, caption = "Alternate allele frequencies via bdapply")

## ----apply_con, eval = TRUE, echo = T, message = F, warning = F---------------
n_snps <- nrow(bd5$snps)
n_samp <- nrow(bd5$samples)
dosage <- numeric(n_samp)
p0     <- numeric(n_samp)
p1     <- numeric(n_samp)
p2     <- numeric(n_samp)

con <- openbd5con(bd5)
aaf <- numeric(n_snps)
for (i in seq_len(n_snps)) {
  getbd5snp_con(bd5info = bd5, snp = i,
                dosage = dosage, p0 = p0, p1 = p1, p2 = p2,
                bd5con = con)
  aaf[i] <- mean(dosage, na.rm = TRUE) / 2
}
closebd5con(con)

knitr::kable(data.frame(snpid = bd5$snps$snpid, aaf = round(aaf, 4)),
             caption = "Alternate allele frequencies via getbd5snp_con")

## ----cleanup, include = FALSE, eval = TRUE------------------------------------
unlink(c(bdose_file, paste0(bdose_file, ".bdi")))

