load("dups.Rda") load("analyzeResult.Rda") load(file="individuals.Rda") library(hapFabia) library(fabia) #source("prepareAncientGenome.R") #source("plotAncientGenomes.R") startRun <- 1 endRun <- 633 runIndex="" lengthAll <- 3167651 shift <- 5000 intervallAll <- 10000 over <- intervallAll%/%shift N1 <- lengthAll%/%shift endRunAll <- (N1-over+2) # 639 sampB <- c(1,2,3,5,10,20) snpB <- c(8,9,10,12,15,20,30,50) LB <- c(8,50,500,2000,5000,10000,20000,50000,100000,200000,500000,1000000) # > table(indi[,3]) # # AFR AMR ASN EUR # 492 362 572 758 # # AMR = PUR CLM MXL pops <- c("AFR","AMR","ASN","EUR") recode <- c( "AFR", # (1,0,0,0) --> 1 "AMR", # (0,1,0,0) --> 2 "AFR/AMR", # (1,1,0,0) --> 3 "ASN", # (0,0,1,0) --> 4 "AFR/ASN", # (1,0,1,0) --> 5 "AMR/ASN", # (0,1,1,0) --> 6 "AFR/AMR/ASN", # (1,1,1,0) --> 7 "EUR", # (0,0,0,1) --> 8 "AFR/EUR", # (1,0,0,1) --> 9 "AMR/EUR", # (0,1,0,1) --> 10 "AFR/AMR/EUR", # (1,1,0,1) --> 11 "ASN/EUR", # (0,0,1,1) --> 12 "AFR/ASN/EUR", # (1,0,1,1) --> 13 "AMR/ASN/EUR", # (0,1,1,1) --> 14 "AFR/AMR/ASN/EUR" # (1,1,1,1) --> 15 ) allC <- rep(0,15) names(allC) <- recode lsB <- length(sampB) lnB <- length(snpB) lLB <- length(LB) msB <- matrix(rep(0,lsB*15),nrow=lsB,ncol=15) mnB <- matrix(rep(0,lnB*15),nrow=lnB,ncol=15) mLB <- matrix(rep(0,lLB*15),nrow=lLB,ncol=15) colnames(msB) <- recode rownames(msB) <- as.character(sampB) colnames(mnB) <- recode rownames(mnB) <- as.character(snpB) colnames(mLB) <- recode rownames(mLB) <- as.character(LB) allCount <- 0 noSnps <- c() noSamp <- c() ibdLength <- c() ibdFreq <- c() snpL <- c() for (posAll in startRun:endRun) { start <- (posAll-1)*shift end <- start + intervallAll if (end > lengthAll) { end <- lengthAll } pRange <- paste("_",format(start,scientific=FALSE),"_",format(end,scientific=FALSE),sep="") load(file=paste(fileName,pRange,"_resAnno",".Rda",sep="")) IBDannot <- resHapFabia$mergedIBD noIBD <- length(IBDannot) if (noIBD>0) { for (ibdC in 1:noIBD) { allCount <- allCount + 1 if (!dups[allCount]) { noSa <- IBDannot[[ibdC]][[6]] noSamp <- c(noSamp,noSa) noSn <- IBDannot[[ibdC]][[7]] noSnps <- c(noSnps,noSn) ibdL <- IBDannot[[ibdC]][[5]] ibdLength <- c(ibdLength,ibdL) ibdF <- IBDannot[[ibdC]][[18]] ibdFM <- median(ibdF) ibdFreq <- c(ibdFreq,ibdFM) snps <- IBDannot[[ibdC]][[15]] snpL <- c(snpL,snps) samples <- IBDannot[[ibdC]][[8]] tt <- table(c(pops,indi[samples,3])) tt <- tt - 1 aT <- tt>0 aI <- as.integer(aT) codeT <- aI[1]+aI[2]*2+aI[3]*4+aI[4]*8 allC[codeT] <- allC[codeT]+1 if (noSa<(sampB[2]+1)) { sw <- 1 } else { if (noSa<(sampB[3]+1)) { sw <- 2 } else { if (noSa<(sampB[4]+1)) { sw <- 3 } else { if (noSa<(sampB[5]+1)) { sw <- 4 } else { if (noSa<(sampB[6]+1)) { sw <- 5 } else { sw <- 6 } } } } } if (noSn<(snpB[2]+1)) { sn <- 1 } else { if (noSn<(snpB[3]+1)) { sn <- 2 } else { if (noSn<(snpB[4]+1)) { sn <- 3 } else { if (noSn<(snpB[5]+1)) { sn <- 4 } else { if (noSn<(snpB[6]+1)) { sn <- 5 } else { if (noSn<(snpB[7]+1)) { sn <- 6 } else { if (noSn<(snpB[8]+1)) { sn <- 7 } else { sn <- 8 } } } } } } } if (ibdL<(LB[2]+1)) { sL <- 1 } else { if (ibdL<(LB[3]+1)) { sL <- 2 } else { if (ibdL<(LB[4]+1)) { sL <- 3 } else { if (ibdL<(LB[5]+1)) { sL <- 4 } else { if (ibdL<(LB[6]+1)) { sL <- 5 } else { if (ibdL<(LB[7]+1)) { sL <- 6 } else { if (ibdL<(LB[8]+1)) { sL <- 7 } else { if (ibdL<(LB[9]+1)) { sL <- 8 } else { if (ibdL<(LB[10]+1)) { sL <- 9 } else { if (ibdL<(LB[11]+1)) { sL <- 10 } else { if (ibdL<(LB[12]+1)) { sL <- 11 } else { sL <- 12 } } } } } } } } } } } msB[sw,codeT] <-msB[sw,codeT] + 1 mnB[sn,codeT] <-mnB[sn,codeT] + 1 mLB[sL,codeT] <-mLB[sL,codeT] + 1 } } } } save(ibdLength,noSamp,noSnps,allC,msB,mnB,mLB,file="resultsA1.Rda") sBB <- sampB+0.5 endB <- max(noSamp)+0.5 breaksA <- c(sBB,endB) sBB <- snpB+0.5 endB <- max(noSnps)+0.5 breaksB <- c(sBB,endB) sBB <- LB+0.5 endB <- max(ibdLength)+0.5 breaksC <- c(sBB,endB) hSa <- hist(noSamp,breaks=breaksA,plot=FALSE,freq=TRUE,right=TRUE) print(hSa) hSn <- hist(noSnps,breaks=breaksB,plot=FALSE,freq=TRUE,right=TRUE) print(hSn) hL <- hist(ibdLength,breaks=breaksC,plot=FALSE,freq=TRUE,right=TRUE) print(hL) print(allC) for (i in 1:lsB) { print(rownames(msB)[i]) print(msB[i,]) } for (i in 1:lnB) { print(rownames(mnB)[i]) print(mnB[i,]) } for (i in 1:lLB) { print(rownames(mLB)[i]) print(mLB[i,]) } breaksIF <- c(0,0.001,0.002,0.005,0.01,0.02,0.03,0.04,0.05,0.95,0.96,0.97,0.98,0.995,0.998,0.999,1.0) iF <- hist(ibdFreq,breaks=breaksIF,plot=FALSE,freq=TRUE,right=TRUE) print(iF) snpsA <- unique(snpL) length(snpsA)