#####load libraries####### library(hapFabia) library(fabia) #####define segments, overlap, filename ####### shiftSize <- 5000 segmentSize <- 10000 fileName="KPGP1000_chr1" # without type #####convert from .vcf to _mat.txt: step 2. above####### vcftoFABIA(fileName=fileName) system("mv KPGP1000_chr1_mat.txt KPGP1000_chr1_matP.txt") system("cp KPGP1000_chr1_matG.txt KPGP1000_chr1_mat.txt") #####split/ generate segments: step 3. above####### split_sparse_matrix(fileName=fileName,segmentSize=segmentSize,shiftSize=shiftSize,annotation=TRUE) #####compute how many segments we have####### ina <- as.numeric(readLines(paste(fileName,"_mat.txt",sep=""),n=2)) samplesN <- ina[1] snps <- ina[2] save(samplesN, snps, file = paste(fileName, "_All", ".Rda", sep = "")) over <- segmentSize%/%shiftSize N1 <- snps%/%shiftSize endRunA <- (N1-over+2) #####analyze each segment####### iterateSegments(startRun=1,endRun=endRunA,shift=shiftSize,segmentSize=segmentSize,fileName=fileName,samples=0,upperBP=0.05,p=10,iter=80,alpha=0.03,cyc=50,IBDlength=50,Lt = 0.1,Zt = 0.2,thresCount=1e-5,minSNPsFactor=3/4,pMAF=0.03,haplotypes=FALSE) ###NOTE: haplotypes=FALSE##### #####identify duplicates####### identifyDuplicates(fileName=fileName,startRun=1,endRun=endRunA,shift=shiftSize,segmentSize=segmentSize) #####analyze results; parallel####### anaRes <- analyzeIBDs(fileName=fileName,startRun=1,endRun=endRunA,shift=shiftSize,segmentSize=segmentSize) print("Number haplotypes:") print(anaRes$noIBD) print("Haplotye length:") print(anaRes$avibdLengthS) print("Number of individuals possessing a haplotye:") print(anaRes$avnoSampS) print("Number of tagSNVs of a haplotye:") print(anaRes$avnoSnpsS) print("MAF of tagSNVs of a haplotye:") print(anaRes$avnoFreqS) print("MAF within the group of tagSNVs of a haplotye:") print(anaRes$avnoGroupFreqS) print("Number of changes between major and minor allele frequency:") print(anaRes$avnosnpChangeS) print("Number of tagSNVs per individual:") print(anaRes$avnosnpsPerSampleS) print("Number of individuals having a tagSNV:") print(anaRes$avnosamplePerSnpS) #####load result for segment 50####### posAll <- 150 # (50-1)*5000 = 245000: segment 245000 to 255000 start <- (posAll-1)*shiftSize end <- start + segmentSize pRange <- paste("_",format(start,scientific=FALSE),"_",format(end,scientific=FALSE),sep="") load(file=paste(fileName,pRange,"_resAnno",".Rda",sep="")) IBD <- resHapFabia$mergedIBD # $ #####plot result for segment 50####### #plotIBDs(IBD,filename=paste(fileName,pRange,"_mat",sep="")) ##attention: filename without type ".txt" #####plot specific haplotype (the first) in segment 50####### plotOneIBD(IBD,filename=paste(fileName,pRange,"_mat",sep=""),ibdC=11) ##attention: filename without type ".txt"