load(file=paste("DenisovaMat.Rda",sep="")) load(file=paste("NeanderMat.Rda",sep="")) load(file=paste("AncestMat.Rda",sep="")) load(file=paste("PopulationMat.Rda",sep="")) format(colSums(DenisovaMat),scientific=FALSE) format(colSums(NeanderMat),scientific=FALSE) format(colSums(AncestMat),scientific=FALSE) library(ggplot2) library(doBy) source("funct.R") oAFR <- which(PopulationMat[,6]>0.9999) oAMR <- which(PopulationMat[,7]>0.9999) oASN <- which(PopulationMat[,8]>0.9999) oEUR <- which(PopulationMat[,9]>0.9999) LLLoAFR <- DenisovaMat[oAFR,22] LLLoAFRZ <- which(LLLoAFR>10) LLLoAFR <- LLLoAFR[LLLoAFRZ] LLLoAMR <- DenisovaMat[oAMR,22] LLLoAMRZ <- which(LLLoAMR>10) LLLoAMR <- LLLoAMR[LLLoAMRZ] LLLoASN <- DenisovaMat[oASN,22] LLLoASNZ <- which(LLLoASN>10) LLLoASN <- LLLoASN[LLLoASNZ] LLLoEUR <- DenisovaMat[oEUR,22] LLLoEURZ <- which(LLLoEUR>10) LLoEURL <- LLLoEUR[LLLoEURZ] summary(LLLoAFR) summary(LLLoASN) summary(LLLoEUR) summary(LLLoAMR) length(LLLoAFR) length(LLLoASN) length(LLLoAMR) length(LLLoEUR) # > summary(LLLoAFR) # Min. 1st Qu. Median Mean 3rd Qu. Max. # 34 18260 23380 24040 28200 651100 # > summary(LLLoASN) # Min. 1st Qu. Median Mean 3rd Qu. Max. # 1525 17840 23780 26270 28980 544700 # > summary(LLLoEUR) # Min. 1st Qu. Median Mean 3rd Qu. Max. # 1068 18330 23980 27580 29510 566700 # > summary(LLLoAMR) # Min. 1st Qu. Median Mean 3rd Qu. Max. # 2571 19130 23760 25360 28430 308900 # > length(LLLoAFR) # [1] 92913 # > length(LLLoASN) # [1] 2519 # > length(LLLoAMR) # [1] 981 # > length(LLLoEUR) # [1] 1191 DLLLoAFR <- density(LLLoAFR,bw=1400,from=0,to=60000) DLLLoAMR <- density(LLLoAMR,bw=1400,from=0,to=60000) DLLLoEUR <- density(LLLoEUR,bw=1400,from=0,to=60000) DLLLoASN <- density(LLLoASN,bw=1400,from=0,to=60000) name="IBDlengthEuropeanAsian" #pdf(paste(name,".pdf",sep=""),width=8) #xfig(paste(name,".fig",sep=""),width=8) l1 <- length(LLLoEUR) l2 <- length(LLLoASN) df <- data.frame(Population = factor( c(rep("European",l1),rep("Asian",l2)) ), proportion = c(LLLoEUR,LLLoASN)) cdf <- summaryBy(data=df, proportion ~ Population, FUN=mean, na.rm=TRUE) ggplot(df, aes(x=proportion, fill=Population)) + geom_density(alpha=.3,adjust=0.7) + geom_vline(data=cdf, aes(xintercept=c(25800,24300), colour=Population), linetype="dashed", size=0.8) + geom_vline(data=cdf, aes(xintercept=c(25800,29000), colour=Population), linetype="dashed", size=0.8) + geom_vline(data=cdf, aes(xintercept=c(25800,17000), colour=Population), linetype="dashed", size=0.8) + opts(title = expression("IBD Length Distributions: European vs. Asian")) + scale_y_continuous(breaks=c(0,1e-05,2e-05,3e-05,4e-05,5e-05,6e-05)) + # scale_y_continuous(limits=c(0,4e-04)) + scale_x_continuous(limits=c(0,60000)) + opts(legend.position=c(.8, .5))+ opts(legend.background = theme_rect(fill="gray90", size=0.3,linetype="solid"))+ coord_cartesian(ylim = c(0,5.5e-05),xlim=c(0,60000)) + ylab("Density") + xlab("IBD Length") dev.off() NAFR <- which(PopulationMat[,6]<0.0001) NAMR <- which(PopulationMat[,7]<0.0001) NASN <- which(PopulationMat[,8]<0.0001) NEUR <- which(PopulationMat[,9]<0.0001) JAFR <- which(PopulationMat[,6]>0.0001) JAMR <- which(PopulationMat[,7]>0.0001) JASN <- which(PopulationMat[,8]>0.0001) JEUR <- which(PopulationMat[,9]>0.0001) JASNEUR <- intersect(JASN,JEUR) ASNEUR <- intersect(intersect(NAFR,NAMR),intersect(JASN,JEUR)) LLLASNEUR <- DenisovaMat[ASNEUR,22] LLLASNEURZ <- which(LLLASNEUR>10) LLLASNEUR <- LLLASNEUR[LLLASNEURZ] LLLJASNEUR <- DenisovaMat[JASNEUR,22] LLLJASNEURZ <- which(LLLJASNEUR>10) LLLJASNEUR <- LLLJASNEUR[LLLJASNEURZ] name="IBDlengthAsianEuropean-Asian" #pdf(paste(name,".pdf",sep=""),width=8) #xfig(paste(name,".fig",sep=""),width=8) l1 <- length(LLLJASNEUR) l2 <- length(LLLoASN) df <- data.frame(Population = factor( c(rep("European/Asian",l1),rep("Asian",l2)) ), proportion = c(LLLJASNEUR,LLLoASN)) cdf <- summaryBy(data=df, proportion ~ Population, FUN=mean, na.rm=TRUE) ggplot(df, aes(x=proportion, fill=Population)) + geom_density(alpha=.3,adjust=0.7) + geom_vline(data=cdf, aes(xintercept=c(25800,23600), colour=Population), linetype="dashed", size=0.8) + opts(title = expression("IBD Length Distributions: European/Asian vs. Asian")) + scale_y_continuous(breaks=c(0,1e-05,2e-05,3e-05,4e-05,5e-05)) + # scale_y_continuous(limits=c(0,4e-04)) + scale_x_continuous(limits=c(0,60000)) + opts(legend.position=c(.8, .5))+ opts(legend.background = theme_rect(fill="gray90", size=0.3,linetype="solid"))+ coord_cartesian(ylim = c(0,5.3e-05),xlim=c(0,60000)) + ylab("Density") + xlab("IBD Length") dev.off() name="IBDlengthAsianEuropean-AsianNoAfrican" #pdf(paste(name,".pdf",sep=""),width=8) #xfig(paste(name,".fig",sep=""),width=8) l1 <- length(LLLASNEUR) l2 <- length(LLLoASN) df <- data.frame(Population = factor( c(rep("European/Asian",l1),rep("Asian",l2)) ), proportion = c(LLLASNEUR,LLLoASN)) cdf <- summaryBy(data=df, proportion ~ Population, FUN=mean, na.rm=TRUE) ggplot(df, aes(x=proportion, fill=Population)) + geom_density(alpha=.3,adjust=0.7) + geom_vline(data=cdf, aes(xintercept=c(25800,23600), colour=Population), linetype="dashed", size=0.8) + opts(title = expression("IBD Length Distributions: European/Asian vs. Asian")) + scale_y_continuous(breaks=c(0,1e-05,2e-05,3e-05,4e-05,5e-05)) + # scale_y_continuous(limits=c(0,4e-04)) + scale_x_continuous(limits=c(0,60000)) + opts(legend.position=c(.8, .5))+ opts(legend.background = theme_rect(fill="gray90", size=0.3,linetype="solid"))+ coord_cartesian(ylim = c(0,5.3e-05),xlim=c(0,60000)) + ylab("Density") + xlab("IBD Length") dev.off() ASNEUR <- intersect(intersect(NAFR,NAMR),intersect(JASN,JEUR)) LLLASNEUR <- DenisovaMat[ASNEUR,22] LLLASNEURZ <- which(LLLASNEUR>10) LLLASNEUR <- LLLASNEUR[LLLASNEURZ] name="IBDlengthAsianEuropean-European" #pdf(paste(name,".pdf",sep=""),width=8) #xfig(paste(name,".fig",sep=""),width=8) l1 <- length(LLLJASNEUR) l2 <- length(LLLoEUR) df <- data.frame(Population = factor( c(rep("European/Asian",l1),rep("European",l2)) ), proportion = c(LLLJASNEUR,LLLoEUR)) cdf <- summaryBy(data=df, proportion ~ Population, FUN=mean, na.rm=TRUE) ggplot(df, aes(x=proportion, fill=Population)) + geom_density(alpha=.3,adjust=0.7) + geom_vline(data=cdf, aes(xintercept=c(24100,23600), colour=Population), linetype="dashed", size=0.8) + opts(title = expression("IBD Length Distributions: European/Asian vs. European")) + scale_y_continuous(breaks=c(0,1e-05,2e-05,3e-05,4e-05,5e-05)) + # scale_y_continuous(limits=c(0,4e-04)) + scale_x_continuous(limits=c(0,60000)) + opts(legend.position=c(.8, .5))+ opts(legend.background = theme_rect(fill="gray90", size=0.3,linetype="solid"))+ coord_cartesian(ylim = c(0,5.3e-05),xlim=c(0,60000)) + ylab("Density") + xlab("IBD Length") dev.off() name="IBDlengthAsianEuropean-EuropeanNoAfricans" #pdf(paste(name,".pdf",sep=""),width=8) #xfig(paste(name,".fig",sep=""),width=8) l1 <- length(LLLASNEUR) l2 <- length(LLLoEUR) df <- data.frame(Population = factor( c(rep("European/Asian",l1),rep("European",l2)) ), proportion = c(LLLASNEUR,LLLoEUR)) cdf <- summaryBy(data=df, proportion ~ Population, FUN=mean, na.rm=TRUE) ggplot(df, aes(x=proportion, fill=Population)) + geom_density(alpha=.3,adjust=0.7) + geom_vline(data=cdf, aes(xintercept=c(24100,50000), colour=Population), linetype="dashed", size=0.8) + opts(title = expression("IBD Length Distributions: European/Asian vs. European (No AFR)")) + scale_y_continuous(breaks=c(0,1e-05,2e-05,3e-05,4e-05,5e-05)) + # scale_y_continuous(limits=c(0,4e-04)) + scale_x_continuous(limits=c(0,60000)) + opts(legend.position=c(.8, .5))+ opts(legend.background = theme_rect(fill="gray90", size=0.3,linetype="solid"))+ coord_cartesian(ylim = c(0,5.3e-05),xlim=c(0,60000)) + ylab("Density") + xlab("IBD Length") dev.off() AFRASNEUR <- intersect(JAFR,JASNEUR) LLLAFRASNEUR <- DenisovaMat[AFRASNEUR,22] LLLAFRASNEURZ <- which(LLLAFRASNEUR>10) LLLAFRASNEUR <- LLLAFRASNEUR[LLLAFRASNEURZ] name="IBDlengthAfricanAsianEuropean-European" #pdf(paste(name,".pdf",sep=""),width=8) #xfig(paste(name,".fig",sep=""),width=8) l1 <- length(LLLAFRASNEUR) l2 <- length(LLLoEUR) df <- data.frame(Population = factor( c(rep("European/Asian/African",l1),rep("European",l2)) ), proportion = c(LLLAFRASNEUR,LLLoEUR)) cdf <- summaryBy(data=df, proportion ~ Population, FUN=mean, na.rm=TRUE) ggplot(df, aes(x=proportion, fill=Population)) + geom_density(alpha=.3,adjust=0.7) + geom_vline(data=cdf, aes(xintercept=c(24100,5000), colour=Population), linetype="dashed", size=0.8) + opts(title = expression("IBD Length Distributions: European/Asian/African vs. European")) + scale_y_continuous(breaks=c(0,1e-05,2e-05,3e-05,4e-05,5e-05)) + # scale_y_continuous(limits=c(0,4e-04)) + scale_x_continuous(limits=c(0,60000)) + opts(legend.position=c(.8, .5))+ opts(legend.background = theme_rect(fill="gray90", size=0.3,linetype="solid"))+ coord_cartesian(ylim = c(0,5.3e-05),xlim=c(0,60000)) + ylab("Density") + xlab("IBD Length") dev.off() name="IBDlengthAfricanAsianEuropean-Asian" #pdf(paste(name,".pdf",sep=""),width=8) #xfig(paste(name,".fig",sep=""),width=8) l1 <- length(LLLAFRASNEUR) l2 <- length(LLLoASN) df <- data.frame(Population = factor( c(rep("European/Asian/African",l1),rep("Asian",l2)) ), proportion = c(LLLAFRASNEUR,LLLoASN)) cdf <- summaryBy(data=df, proportion ~ Population, FUN=mean, na.rm=TRUE) ggplot(df, aes(x=proportion, fill=Population)) + geom_density(alpha=.3,adjust=0.7) + geom_vline(data=cdf, aes(xintercept=c(25800,5000), colour=Population), linetype="dashed", size=0.8) + opts(title = expression("IBD Length Distributions: European/Asian/African vs. Asian")) + scale_y_continuous(breaks=c(0,1e-05,2e-05,3e-05,4e-05,5e-05)) + # scale_y_continuous(limits=c(0,4e-04)) + scale_x_continuous(limits=c(0,60000)) + opts(legend.position=c(.8, .5))+ opts(legend.background = theme_rect(fill="gray90", size=0.3,linetype="solid"))+ coord_cartesian(ylim = c(0,5.3e-05),xlim=c(0,60000)) + ylab("Density") + xlab("IBD Length") dev.off() summary(LLLASNEUR) # > summary(LLLASNEUR) # Min. 1st Qu. Median Mean 3rd Qu. Max. # 1415 18570 25610 29800 33800 409400 ASNAFR <- intersect(intersect(NEUR,NAMR),intersect(JASN,JAFR)) LLLASNAFR <- DenisovaMat[ASNAFR,22] LLLASNAFRZ <- which(LLLASNAFR>10) LLLASNAFR <- LLLASNAFR[LLLASNAFRZ] summary(LLLASNAFR) # > summary(LLLASNAFR) # Min. 1st Qu. Median Mean 3rd Qu. Max. # 854 14480 21740 23570 27360 337800 DLLLASNAFR <- density(LLLASNAFR,bw=1400,from=0,to=60000) DLLLASN <- density(LLLoASN,bw=1400,from=0,to=60000) name="IBDlengthAsianAFRASN" #pdf(paste(name,".pdf",sep=""),width=8) #xfig(paste(name,".fig",sep=""),width=8) l1 <- length(LLLASNAFR) l2 <- length(LLLoASN) df <- data.frame(Population = factor( c(rep("Asian/African",l1),rep("Asian",l2)) ), proportion = c(LLLASNAFR,LLLoASN)) cdf <- summaryBy(data=df, proportion ~ Population, FUN=mean, na.rm=TRUE) ggplot(df, aes(x=proportion, fill=Population)) + geom_density(alpha=.3,adjust=0.7) + geom_vline(data=cdf, aes(xintercept=c(25800,5000), colour=Population), linetype="dashed", size=0.8) + geom_vline(data=cdf, aes(xintercept=c(25800,22200), colour=Population), linetype="dashed", size=0.8) + opts(title = expression("IBD Length Distributions: Asian vs. Asian/African")) + scale_y_continuous(breaks=c(0,1e-05,2e-05,3e-05,4e-05,5e-05,5e-05)) + # scale_y_continuous(limits=c(0,4e-04)) + scale_x_continuous(limits=c(0,60000)) + opts(legend.position=c(.8, .5))+ opts(legend.background = theme_rect(fill="gray90", size=0.3,linetype="solid"))+ coord_cartesian(ylim = c(0,5.2e-05),xlim=c(0,60000)) + ylab("Density") + xlab("IBD Length") dev.off() EURAFR <- intersect(intersect(NASN,NAMR),intersect(JEUR,JAFR)) LLLEURAFR <- DenisovaMat[EURAFR,22] LLLEURAFRZ <- which(LLLEURAFR>10) LLLEURAFR <- LLLEURAFR[LLLEURAFRZ] summary(LLLEURAFR) # > summary(LLLEURAFR) # Min. 1st Qu. Median Mean 3rd Qu. Max. # 973 16660 22550 24320 28470 559300 DLLLEURAFR <- density(LLLEURAFR,bw=1400,from=0,to=60000) DLLLEUR <- density(LLLoEUR,bw=1400,from=0,to=60000) name="IBDlengthEuropeanAFREUR" #pdf(paste(name,".pdf",sep=""),width=8) #xfig(paste(name,".fig",sep=""),width=8) l1 <- length(LLLEURAFR) l2 <- length(LLLoEUR) df <- data.frame(Population = factor( c(rep("European/African",l1),rep("European",l2)) ), proportion = c(LLLEURAFR,LLLoEUR)) cdf <- summaryBy(data=df, proportion ~ Population, FUN=mean, na.rm=TRUE) ggplot(df, aes(x=proportion, fill=Population)) + geom_density(alpha=.3,adjust=0.7) + geom_vline(data=cdf, aes(xintercept=c(24200,22000), colour=Population), linetype="dashed", size=0.8) + geom_vline(data=cdf, aes(xintercept=c(28900,5000), colour=Population), linetype="dashed", size=0.8) + geom_vline(data=cdf, aes(xintercept=c(16500,22000), colour=Population), linetype="dashed", size=0.8) + opts(title = expression("IBD Length Distributions: European vs. European/African")) + scale_y_continuous(breaks=c(0,1e-05,2e-05,3e-05,4e-05,5e-05)) + # scale_y_continuous(limits=c(0,4e-04)) + scale_x_continuous(limits=c(0,60000)) + opts(legend.position=c(.8, .5))+ opts(legend.background = theme_rect(fill="gray90", size=0.3,linetype="solid"))+ coord_cartesian(ylim = c(0,5.3e-05),xlim=c(0,60000)) + ylab("Density") + xlab("IBD Length") dev.off() AMRAFR <- intersect(intersect(NASN,NEUR),intersect(JAMR,JAFR)) LLLAMRAFR <- DenisovaMat[AMRAFR,22] LLLAMRAFRZ <- which(LLLAMRAFR>10) LLLAMRAFR <- LLLAMRAFR[LLLAMRAFRZ] summary(LLLAMRAFR) # > summary(LLLAMRAFR) # Min. 1st Qu. Median Mean 3rd Qu. Max. # 151 17340 23310 23460 28670 515000 AMREUR <- intersect(intersect(NASN,NAFR),intersect(JAMR,JEUR)) LLLAMREUR <- DenisovaMat[AMREUR,22] LLLAMREURZ <- which(LLLAMREUR>10) LLLAMREUR <- LLLAMREUR[LLLAMREURZ] summary(LLLAMREUR) # > summary(LLLAMREUR) # Min. 1st Qu. Median Mean 3rd Qu. Max. # 956 18410 24160 47490 29490 21250000 AMRASN <- intersect(intersect(NEUR,NAFR),intersect(JAMR,JASN)) LLLAMRASN <- DenisovaMat[AMRASN,22] LLLAMRASNZ <- which(LLLAMRASN>10) LLLAMRASN <- LLLAMRASN[LLLAMRASNZ] summary(LLLAMRASN) # > summary(LLLAMRASN) # Min. 1st Qu. Median Mean 3rd Qu. Max. # 1256 18570 23770 25030 28650 165900