startRun <- 0 endRun <- 639 sampB <- c(1,2,3,5,10,20) snpB <- c(8,9,10,12,15,20,30,50) LB <- c(8,50,500,2000,5000,10000,20000,50000,100000,200000,500000,1000000) # get dups and un load(file=paste("resDups.Rda",sep="")) # > table(indi_EUeurop[,2]) # # CEU FIN GBR IBS TSI # 170 186 178 28 196 pops <- c("CEU","FIN","GBR","IBS","TSI") recode <- c( "CEU", # (1,0,0,0,0) --> 1 "FIN", # (0,1,0,0,0) --> 2 "CEU/FIN", # (1,1,0,0,0) --> 3 "GBR", # (0,0,1,0,0) --> 4 "CEU/GBR", # (1,0,1,0,0) --> 5 "FIN/GBR", # (0,1,1,0,0) --> 6 "CEU/FIN/GBR", # (1,1,1,0,0) --> 7 "IBS", # (0,0,0,1,0) --> 8 "CEU/IBS", # (1,0,0,1,0) --> 9 "FIN/IBS", # (0,1,0,1,0) --> 10 "CEU/FIN/IBS", # (1,1,0,1,0) --> 11 "GBR/IBS", # (0,0,1,1,0) --> 12 "CEU/GBR/IBS", # (1,0,1,1,0) --> 13 "FIN/GBR/IBS", # (0,1,1,1,0) --> 14 "CEU/FIN/GBR/IBS", # (1,1,1,1,0) --> 15 "TSI", # (0,0,0,0,1) --> 16 "CEU/TSI", # (1,0,0,0,1) --> 17 "FIN/TSI", # (0,1,0,0,1) --> 18 "CEU/FIN/TSI", # (1,1,0,0,1) --> 19 "GBR/TSI", # (0,0,1,0,1) --> 20 "CEU/GBR/TSI", # (1,0,1,0,1) --> 21 "FIN/GBR/TSI", # (0,1,1,0,1) --> 22 "CEU/FIN/GBR/TSI", # (1,1,1,0,1) --> 23 "IBS/TSI", # (0,0,0,1,1) --> 24 "CEU/IBS/TSI", # (1,0,0,1,1) --> 25 "FIN/IBS/TSI", # (0,1,0,1,1) --> 26 "CEU/FIN/IBS/TSI", # (1,1,0,1,1) --> 27 "GBR/IBS/TSI", # (0,0,1,1,1) --> 28 "CEU/GBR/IBS/TSI", # (1,0,1,1,1) --> 29 "FIN/GBR/IBS/TSI", # (0,1,1,1,1) --> 30 "CEU/FIN/GBR/IBS/TSI" # (1,1,1,1,1) --> 31 ) allC <- rep(0,31) names(allC) <- recode lsB <- length(sampB) lnB <- length(snpB) lLB <- length(LB) msB <- matrix(rep(0,lsB*31),nrow=lsB,ncol=31) mnB <- matrix(rep(0,lnB*31),nrow=lnB,ncol=31) mLB <- matrix(rep(0,lLB*31),nrow=lLB,ncol=31) colnames(msB) <- recode rownames(msB) <- as.character(sampB) colnames(mnB) <- recode rownames(mnB) <- as.character(snpB) colnames(mLB) <- recode rownames(mLB) <- as.character(LB) lengthAll <- 3201157 shift <- 5000 intervallAll <- 10000 over <- intervallAll%/%shift N1 <- lengthAll%/%shift endRunAll <- (N1-over+1) # 639 allCount <- 0 noSnps <- c() noSamp <- c() ibdLength <- c() ibdFreq <- c() ibdFreqEU <- c() load(file="../individuals.Rda") EUeuropeans <- c(which(indi[,2]=="CEU"),which(indi[,2]=="FIN"),which(indi[,2]=="GBR"),which(indi[,2]=="IBS"),which(indi[,2]=="TSI")) EUeurop <- as.integer(sort.int(as.integer(unique(EUeuropeans)))) indi_EUeurop <- indi[EUeurop,] for (posAll in (startRun+1):(endRun+1)) { start <- (posAll-1)*shift end <- start + intervallAll if (end > lengthAll) { end <- lengthAll } pRange <- paste("_",format(start,scientific=FALSE),"_",format(end,scientific=FALSE),sep="") load(file=paste("resIBD_chr1",pRange,"annot.Rda",sep="")) noIBD <- length(IBDannot) if (noIBD>0) { for (ibdC in 1:noIBD) { allCount <- allCount + 1 if (!dups[allCount]) { noSa <- IBDannot[[ibdC]][[6]] noSamp <- c(noSamp,noSa) noSn <- IBDannot[[ibdC]][[7]] noSnps <- c(noSnps,noSn) ibdL <- IBDannot[[ibdC]][[5]] ibdLength <- c(ibdLength,ibdL) ibdF <- IBDannot[[ibdC]][[18]] ibdFEU <- IBDannot[[ibdC]][[19]] ibdFM <- median(ibdF) ibdFEUM <- median(ibdFEU) ibdFreq <- c(ibdFreq,ibdFM) ibdFreqEU <- c(ibdFreqEU,ibdFEUM) samples <- IBDannot[[ibdC]][[8]] tt <- table(c(pops,indi_EUeurop[samples,2])) tt <- tt - 1 aT <- tt>0 aI <- as.integer(aT) codeT <- aI[1]+aI[2]*2+aI[3]*4+aI[4]*8+aI[5]*16 allC[codeT] <- allC[codeT]+1 if (noSa<(sampB[2]+1)) { sw <- 1 } else { if (noSa<(sampB[3]+1)) { sw <- 2 } else { if (noSa<(sampB[4]+1)) { sw <- 3 } else { if (noSa<(sampB[5]+1)) { sw <- 4 } else { if (noSa<(sampB[6]+1)) { sw <- 5 } else { sw <- 6 } } } } } if (noSn<(snpB[2]+1)) { sn <- 1 } else { if (noSn<(snpB[3]+1)) { sn <- 2 } else { if (noSn<(snpB[4]+1)) { sn <- 3 } else { if (noSn<(snpB[5]+1)) { sn <- 4 } else { if (noSn<(snpB[6]+1)) { sn <- 5 } else { if (noSn<(snpB[7]+1)) { sn <- 6 } else { if (noSn<(snpB[8]+1)) { sn <- 7 } else { sn <- 8 } } } } } } } if (ibdL<(LB[2]+1)) { sL <- 1 } else { if (ibdL<(LB[3]+1)) { sL <- 2 } else { if (ibdL<(LB[4]+1)) { sL <- 3 } else { if (ibdL<(LB[5]+1)) { sL <- 4 } else { if (ibdL<(LB[6]+1)) { sL <- 5 } else { if (ibdL<(LB[7]+1)) { sL <- 6 } else { if (ibdL<(LB[8]+1)) { sL <- 7 } else { if (ibdL<(LB[9]+1)) { sL <- 8 } else { if (ibdL<(LB[10]+1)) { sL <- 9 } else { if (ibdL<(LB[11]+1)) { sL <- 10 } else { if (ibdL<(LB[12]+1)) { sL <- 11 } else { sL <- 12 } } } } } } } } } } } msB[sw,codeT] <-msB[sw,codeT] + 1 mnB[sn,codeT] <-mnB[sn,codeT] + 1 mLB[sL,codeT] <-mLB[sL,codeT] + 1 } } } } save(ibdLength,noSamp,noSnps,allC,msB,mnB,mLB,file="resultsA1.Rda") sBB <- sampB+0.5 endB <- max(noSamp)+0.5 breaksA <- c(sBB,endB) sBB <- snpB+0.5 endB <- max(noSnps)+0.5 breaksB <- c(sBB,endB) sBB <- LB+0.5 endB <- max(ibdLength)+0.5 breaksC <- c(sBB,endB) hSa <- hist(noSamp,breaks=breaksA,plot=FALSE,freq=TRUE,right=TRUE) print(hSa) hSn <- hist(noSnps,breaks=breaksB,plot=FALSE,freq=TRUE,right=TRUE) print(hSn) hL <- hist(ibdLength,breaks=breaksC,plot=FALSE,freq=TRUE,right=TRUE) print(hL) print(allC) for (i in 1:lsB) { print(rownames(msB)[i]) print(msB[i,]) } for (i in 1:lnB) { print(rownames(mnB)[i]) print(mnB[i,]) } for (i in 1:lLB) { print(rownames(mLB)[i]) print(mLB[i,]) } iF <- hist(ibdFreq,breaks=16,plot=FALSE,freq=TRUE,right=TRUE) print(iF) iFEU <- hist(ibdFreqEU,breaks=16,plot=FALSE,freq=TRUE,right=TRUE) print(iFEU)