load(file=paste("DenisovaMat.Rda",sep="")) load(file=paste("NeanderMat.Rda",sep="")) load(file=paste("AncestMat.Rda",sep="")) load(file=paste("PopulationMat.Rda",sep="")) format(colSums(DenisovaMat),scientific=FALSE) format(colSums(NeanderMat),scientific=FALSE) format(colSums(AncestMat),scientific=FALSE) source("/seppdata/sepp/linkage/release/scripts/funct.R") AFR <- which(PopulationMat[,6]>0.2) #147574 AMR <- which(PopulationMat[,7]>0.2) ASN <- which(PopulationMat[,8]>0.2) #7472 EUR <- which(PopulationMat[,9]>0.2) #10688 AFRd <- which(PopulationMat[,6]>0.5) #141262 AMRd <- which(PopulationMat[,7]>0.5) ASNd <- which(PopulationMat[,8]>0.5) #5161 EURd <- which(PopulationMat[,9]>0.5) #5915 oAFR <- which(PopulationMat[,6]>0.9999) #141262 oAMR <- which(PopulationMat[,7]>0.9999) oASN <- which(PopulationMat[,8]>0.9999) #5161 oEUR <- which(PopulationMat[,9]>0.9999) #5915 LLLAFR <- DenisovaMat[AFR,21] LLLAMR <- DenisovaMat[AMR,21] LLLASN <- DenisovaMat[ASN,21] LLLEUR <- DenisovaMat[EUR,21] LLLAFRd <- DenisovaMat[AFRd,21] LLLAMRd <- DenisovaMat[AMRd,21] LLLASNd <- DenisovaMat[ASNd,21] LLLEURd <- DenisovaMat[EURd,21] LLLoAFR <- DenisovaMat[oAFR,21] LLLoAMR <- DenisovaMat[oAMR,21] LLLoASN <- DenisovaMat[oASN,21] LLLoEUR <- DenisovaMat[oEUR,21] summary(LLLoAFR) summary(LLLoASN) summary(LLLoEUR) summary(LLLoAMR) length(LLLoAFR) length(LLLoASN) length(LLLoAMR) length(LLLoEUR) > summary(LLLoAFR) Min. 1st Qu. Median Mean 3rd Qu. Max. 56 19360 24380 25300 29120 954000 > summary(LLLoASN) Min. 1st Qu. Median Mean 3rd Qu. Max. 1525 18970 24760 27270 29790 544700 > summary(LLLoEUR) Min. 1st Qu. Median Mean 3rd Qu. Max. 1068 18500 24260 27810 29620 566700 > summary(LLLoAMR) Min. 1st Qu. Median Mean 3rd Qu. Max. 2571 19130 23800 25420 28450 308900 > length(LLLoAFR) [1] 93197 > length(LLLoASN) [1] 2522 > length(LLLoAMR) [1] 981 > length(LLLoEUR) [1] 1191 DLLLAFR <- density(LLLoAFR,bw=700,from=0,to=80000) DLLLAMR <- density(LLLoAMR,bw=700,from=0,to=80000) x11() DLLLEUR <- plot(density(LLLoEUR,bw=700,from=0,to=80000)) DLLLASN <- points(density(LLLoASN,bw=700,from=0,to=80000),type="l") library(ggplot2) library(doBy) name="IBDlengthEuropeanAsian" pdf(paste(name,".pdf",sep=""),width=8) #xfig(paste(name,".fig",sep=""),width=8) l1 <- length(LLLoEUR) l2 <- length(LLLoASN) df <- data.frame(IBD_Length = factor( c(rep("European",l1),rep("Asian",l2)) ), proportion = c(LLLoEUR,LLLoASN)) cdf <- summaryBy(data=df, proportion ~ IBD_Length, FUN=mean, na.rm=TRUE) ggplot(df, aes(x=proportion, fill=IBD_Length)) + geom_density(alpha=.3,adjust=0.3) + geom_vline(data=cdf, aes(xintercept=c(25800,24300), colour=IBD_Length), linetype="dashed", size=0.5) + geom_vline(data=cdf, aes(xintercept=c(25800,29000), colour=IBD_Length), linetype="dashed", size=0.5) + geom_vline(data=cdf, aes(xintercept=c(25800,16500), colour=IBD_Length), linetype="dashed", size=0.5) + opts(title = expression("IBD Length Distributions: European vs. Asian")) + scale_y_continuous(breaks=c(0,1e-05,2e-05,3e-05,4e-05,5e-05,6e-05)) + # scale_y_continuous(limits=c(0,4e-04)) + scale_x_continuous(limits=c(0,80000)) + opts(legend.position=c(.8, .5))+ opts(legend.background = theme_rect(fill="gray90", size=0.3,linetype="solid"))+ coord_cartesian(ylim = c(0,6e-05),xlim=c(0,80000)) + ylab("Density") + xlab("IBD Length") dev.off() NAFR <- which(PopulationMat[,6]<0.0001) NAMR <- which(PopulationMat[,7]<0.0001) NASN <- which(PopulationMat[,8]<0.0001) NEUR <- which(PopulationMat[,9]<0.0001) JAFR <- which(PopulationMat[,6]>0.0001) JAMR <- which(PopulationMat[,7]>0.0001) JASN <- which(PopulationMat[,8]>0.0001) JEUR <- which(PopulationMat[,9]>0.0001) ASNEUR <- intersect(intersect(NAFR,NAMR),intersect(JASN,JEUR)) LLLASNEUR <- DenisovaMat[ASNEUR,21] summary(LLLASNEUR) > summary(LLLASNEUR) Min. 1st Qu. Median Mean 3rd Qu. Max. 3423 21580 28310 33100 36410 409400 ASNAFR <- intersect(intersect(NEUR,NAMR),intersect(JASN,JAFR)) LLLASNAFR <- DenisovaMat[ASNAFR,21] summary(LLLASNAFR) > summary(LLLASNAFR) Min. 1st Qu. Median Mean 3rd Qu. Max. 2600 19250 26290 28420 31400 340500 x11() DLLLASNAFR <- plot(density(LLLASNAFR,bw=700,from=0,to=80000)) DLLLASN <- points(density(LLLoASN,bw=700,from=0,to=80000),type="l") name="IBDlengthAsianAFRASN" #pdf(paste(name,".pdf",sep=""),width=8) xfig(paste(name,".fig",sep=""),width=8) l1 <- length(LLLASNAFR) l2 <- length(LLLoASN) df <- data.frame(IBD_Length = factor( c(rep("Asian/African",l1),rep("Asian",l2)) ), proportion = c(LLLASNAFR,LLLoASN)) cdf <- summaryBy(data=df, proportion ~ IBD_Length, FUN=mean, na.rm=TRUE) ggplot(df, aes(x=proportion, fill=IBD_Length)) + geom_density(alpha=.3,adjust=0.3) + geom_vline(data=cdf, aes(xintercept=c(25800,27800), colour=IBD_Length), linetype="dashed", size=0.5) + geom_vline(data=cdf, aes(xintercept=c(16500,10000), colour=IBD_Length), linetype="dashed", size=0.5) + opts(title = expression("IBD Length Distributions: Asian vs. Asian/African")) + scale_y_continuous(breaks=c(0,1e-05,2e-05,3e-05,4e-05,5e-05,5e-05)) + # scale_y_continuous(limits=c(0,4e-04)) + scale_x_continuous(limits=c(0,80000)) + opts(legend.position=c(.8, .5))+ opts(legend.background = theme_rect(fill="gray90", size=0.3,linetype="solid"))+ coord_cartesian(ylim = c(0,6e-05),xlim=c(0,80000)) + ylab("Density") + xlab("IBD Length") dev.off() EURAFR <- intersect(intersect(NASN,NAMR),intersect(JEUR,JAFR)) LLLEURAFR <- DenisovaMat[EURAFR,21] summary(LLLEURAFR) > summary(LLLEURAFR) Min. 1st Qu. Median Mean 3rd Qu. Max. 973 19490 25540 27750 31360 559300 x11() DLLLEURAFR <- plot(density(LLLEURAFR,bw=700,from=0,to=80000)) DLLLEUR <- points(density(LLLoEUR,bw=700,from=0,to=80000),type="l") name="IBDlengthEuropeanAFREUR" pdf(paste(name,".pdf",sep=""),width=8) #xfig(paste(name,".fig",sep=""),width=8) l1 <- length(LLLEURAFR) l2 <- length(LLLoEUR) df <- data.frame(IBD_Length = factor( c(rep("European/African",l1),rep("European",l2)) ), proportion = c(LLLEURAFR,LLLoEUR)) cdf <- summaryBy(data=df, proportion ~ IBD_Length, FUN=mean, na.rm=TRUE) ggplot(df, aes(x=proportion, fill=IBD_Length)) + geom_density(alpha=.3,adjust=0.3) + geom_vline(data=cdf, aes(xintercept=c(25000,27800), colour=IBD_Length), linetype="dashed", size=0.5) + geom_vline(data=cdf, aes(xintercept=c(29000,10000), colour=IBD_Length), linetype="dashed", size=0.5) + geom_vline(data=cdf, aes(xintercept=c(16500,27800), colour=IBD_Length), linetype="dashed", size=0.5) + opts(title = expression("IBD Length Distributions: European vs. European/African")) + scale_y_continuous(breaks=c(0,1e-05,2e-05,3e-05,4e-05,5e-05,6e-05)) + # scale_y_continuous(limits=c(0,4e-04)) + scale_x_continuous(limits=c(0,80000)) + opts(legend.position=c(.8, .5))+ opts(legend.background = theme_rect(fill="gray90", size=0.3,linetype="solid"))+ coord_cartesian(ylim = c(0,6e-05),xlim=c(0,80000)) + ylab("Density") + xlab("IBD Length") dev.off() AMRAFR <- intersect(intersect(NASN,NEUR),intersect(JAMR,JAFR)) LLLAMRAFR <- DenisovaMat[AMRAFR,21] summary(LLLAMRAFR) > summary(LLLAMRAFR) Min. 1st Qu. Median Mean 3rd Qu. Max. 33 424 640 10070 21530 515000 AMREUR <- intersect(intersect(NASN,NAFR),intersect(JAMR,JEUR)) LLLAMREUR <- DenisovaMat[AMREUR,21] summary(LLLAMREUR) > summary(LLLAMREUR) Min. 1st Qu. Median Mean 3rd Qu. Max. 96 618 18660 16930 26080 235400 AMRASN <- intersect(intersect(NEUR,NAFR),intersect(JAMR,JASN)) LLLAMRASN <- DenisovaMat[AMRASN,21] summary(LLLAMRASN) > summary(LLLAMRASN) Min. 1st Qu. Median Mean 3rd Qu. Max. 170.0 463.8 15130.0 15360.0 25880.0 165900.0 x11() plot(density(DenisovaMat[,22],from=0,to=15)) hist(DenisovaMat[,22],breaks=100,ylim=c(0,10000),xlim=c(0,100),plot=TRUE) hist(LLL,breaks=500,ylim=c(0,5500),xlim=c(0,80000),plot=TRUE,col="blue",main="Hi plot(DenisovaMat[,21],DenisovaMat[,22],ylim=c(0,100),xlim=c(0,500)) aa <- which(DenisovaMat[,21]<500) ab <- which(DenisovaMat[,22]>100) ac <- intersect(aa,ab) DenisovaMat[ac[30:40],] allCount1 allCount ibdC posAll #SNPs #Nea #Ref #Alt #New NeaR RefR [1,] 19030 25458 46 69 76 76 54 21 1 1 0.7105263 [2,] 19253 25771 1 70 87 87 65 21 1 1 0.7471264 [3,] 19550 26165 3 71 47 47 44 2 1 1 0.9361702 [4,] 19655 26294 132 71 160 160 63 97 0 1 0.3937500 [5,] 20360 27224 307 73 43 43 43 0 0 1 1.0000000 [6,] 20464 27378 18 74 38 38 34 4 0 1 0.8947368 [7,] 20589 27531 171 74 59 59 55 4 0 1 0.9322034 [8,] 20749 27731 3 75 38 38 34 4 0 1 0.8947368 [9,] 21402 28590 52 77 101 101 59 40 2 1 0.5841584 [10,] 21461 28674 136 77 21 21 21 0 0 1 1.0000000 [11,] 21553 28795 28 78 66 66 49 16 1 1 0.7424242 posAll <- 69 ibdC <- 46 shift=5000 intervallAll <- 10000 lengthAll <- 3201157 library(fabia) start <- (posAll-1)*shift end <- start + intervallAll if (end > lengthAll) { end <- lengthAll } pRange <- paste("_",format(start,scientific=FALSE),"_",format(end,scientific=FALSE),sep="") load(file=paste("resIBD_chr1",pRange,".Rda",sep="")) plotoneIBD(mergedIBD,paste("../ALL.chr1.merged_beagle_mach.20101123.snps_indels_svs.genotypes",pRange,"_mat",sep=""),ibdC) name="denisS2" pdf(paste(name,".pdf",sep=""),width=12) plotoneIBD(mergedIBD,paste("../ALL.chr1.merged_beagle_mach.20101123.snps_indels_svs.genotypes",pRange,"_mat",sep=""),ibdC) dev.off() xfig(paste(name,".fig",sep=""),width=12) plotoneIBD(mergedIBD,paste("../ALL.chr1.merged_beagle_mach.20101123.snps_indels_svs.genotypes",pRange,"_mat",sep=""),ibdC) dev.off()