#####load libraries#######
library(hapFabia)
library(fabia)

#####define segments, overlap, filename #######
shiftSize <- 5000
segmentSize <- 10000
fileName="KPGP1000_chr1" # without type

#####convert from .vcf to _mat.txt: step 2. above#######
vcftoFABIA(fileName=fileName)
system("mv KPGP1000_chr1_mat.txt KPGP1000_chr1_matP.txt")
system("cp KPGP1000_chr1_matG.txt KPGP1000_chr1_mat.txt")

#####split/ generate segments: step 3. above#######
split_sparse_matrix(fileName=fileName,segmentSize=segmentSize,shiftSize=shiftSize,annotation=TRUE)

#####compute how many segments we have#######
ina <- as.numeric(readLines(paste(fileName,"_mat.txt",sep=""),n=2))
samplesN <- ina[1]
snps <- ina[2]
save(samplesN, snps, file = paste(fileName, "_All", ".Rda", sep = ""))
over <- segmentSize%/%shiftSize
N1 <- snps%/%shiftSize
endRunA <- (N1-over+2)
 
#####analyze each segment#######
iterateSegments(startRun=1,endRun=endRunA,shift=shiftSize,segmentSize=segmentSize,fileName=fileName,samples=0,upperBP=0.05,p=10,iter=80,alpha=0.03,cyc=50,IBDlength=50,Lt = 0.1,Zt = 0.2,thresCount=1e-5,minSNPsFactor=3/4,pMAF=0.03,haplotypes=FALSE)

###NOTE: haplotypes=FALSE#####

#####identify duplicates#######
identifyDuplicates(fileName=fileName,startRun=1,endRun=endRunA,shift=shiftSize,segmentSize=segmentSize)

#####analyze results; parallel#######
anaRes <- analyzeIBDs(fileName=fileName,startRun=1,endRun=endRunA,shift=shiftSize,segmentSize=segmentSize)
print("Number haplotypes:")
print(anaRes$noIBD)
print("Haplotye length:")
print(anaRes$avibdLengthS)
print("Number of individuals possessing a haplotye:")
print(anaRes$avnoSampS)
print("Number of tagSNVs of a haplotye:")
print(anaRes$avnoSnpsS)
print("MAF of tagSNVs of a haplotye:")
print(anaRes$avnoFreqS)
print("MAF within the group of tagSNVs of a haplotye:")
print(anaRes$avnoGroupFreqS)
print("Number of changes between major and minor allele frequency:")
print(anaRes$avnosnpChangeS)
print("Number of tagSNVs per individual:")
print(anaRes$avnosnpsPerSampleS)
print("Number of individuals having a tagSNV:")
print(anaRes$avnosamplePerSnpS)

#####load result for segment 50#######
posAll <- 150 # (50-1)*5000 = 245000: segment 245000 to 255000
start <- (posAll-1)*shiftSize
end <- start + segmentSize
pRange <- paste("_",format(start,scientific=FALSE),"_",format(end,scientific=FALSE),sep="")
load(file=paste(fileName,pRange,"_resAnno",".Rda",sep=""))
IBD <- resHapFabia$mergedIBD # $

#####plot result for segment 50#######
#plotIBDs(IBD,filename=paste(fileName,pRange,"_mat",sep=""))
   ##attention: filename without type ".txt"

#####plot specific haplotype (the first) in segment 50#######
plotOneIBD(IBD,filename=paste(fileName,pRange,"_mat",sep=""),ibdC=11)
   ##attention: filename without type ".txt"