######################################################################################### # Plots histograms of length of IBD segments on the autosomes, # densities of lengths of IBD segments on the autosomes that match the Neandertal or # Denisovan genome and are shared by a particular population or groups of populations ######################################################################################### library(ggplot2) library(doBy) source("1000G_pop_functions.R") load(file = paste("DenisovaMatGNew.Rda", sep = "")) load(file = paste("NeanderMatGNew.Rda", sep = "")) load(file = paste("AncDenisovaMatGNew.Rda", sep = "")) load(file = paste("AncNeanderMatGNew.Rda", sep = "")) load(file = paste("PopulationMatG.Rda", sep = "")) real1 <- which(NeanderMatG[,"IBDlengthCorrected"] > 10) real2 <- which(NeanderMatG[,"#SNVs"] > 7) #real2 <- which(NeanderMatG[,"#SNVs"] > 19) real <- intersect(real1, real2) DenisovaMatG <- DenisovaMatG[real,] NeanderMatG <- NeanderMatG[real,] AncDenisovaMatG <- AncDenisovaMatG[real,] AncNeanderMatG <- AncNeanderMatG[real,] PopulationMatG <- PopulationMatG[real,] de1 <- which(DenisovaMatG[,"AltR"] > 0.15) de2 <- which(DenisovaMatG[,"ancientRatio"] > 0.3) de <- intersect(de1, de2) ne1 <- which(NeanderMatG[,"AltR"] > 0.15) ne2 <- which(NeanderMatG[,"ancientRatio"] > 0.3) ne <- intersect(ne1, ne2) load(file = paste("DenisovaMatG_wChrNew.Rda",sep = "")) DenisovaMatG <- DenisovaMatG[real,] ##### Ancestor extraction #### nanca2 <- which(AncNeanderMatG[,"ancestorRatio"]>=0.3) danca2 <- which(AncDenisovaMatG[,"ancestorRatio"]>=0.3) dnanca2 <- intersect(nanca2,danca2) ade <- intersect(de,danca2) ane <- intersect(ne,nanca2) ##### Ancestor correction #### nanca <- which(AncNeanderMatG[,"ancestorRatio"] < 0.3) danca <- which(AncDenisovaMatG[,"ancestorRatio"] < 0.3) de <- intersect(de, danca) ne <- intersect(ne, nanca) ###### Populations ###### # population is to 1/5 observed AFR <- which(PopulationMatG[,"AFR"] > 0.2) EAS <- which(PopulationMatG[,"EAS"] > 0.2) SAS <- which(PopulationMatG[,"SAS"] > 0.2) EUR <- which(PopulationMatG[,"EUR"] > 0.2) # only this population: exclusively getoPops(PopulationMatG) # not this population: not observed getNPops(PopulationMatG) # at least once this population: observed getJPops(PopulationMatG) ######### Subpopulations ############ rm(NeanderMatG) rm(DenisovaMatG) rm(AncNeanderMatG) rm(AncDenisovaMatG) load("SubPopMatG.Rda") SubPopMatG <- SubPopMatG[real,] # population is to 1/5 observed ACB <- which(SubPopMatG[,"ACBr"] > 0.2) ASW <- which(SubPopMatG[,"ASWr"] > 0.2) BEB <- which(SubPopMatG[,"BEBr"] > 0.2) CDX <- which(SubPopMatG[,"CDXr"] > 0.2) CEU <- which(SubPopMatG[,"CEUr"] > 0.2) CHB <- which(SubPopMatG[,"CHBr"] > 0.2) CHS <- which(SubPopMatG[,"CHSr"] > 0.2) CLM <- which(SubPopMatG[,"CLMr"] > 0.2) ESN <- which(SubPopMatG[,"ESNr"] > 0.2) FIN <- which(SubPopMatG[,"FINr"] > 0.2) GBR <- which(SubPopMatG[,"GBRr"] > 0.2) GIH <- which(SubPopMatG[,"GIHr"] > 0.2) GWD <- which(SubPopMatG[,"GWDr"] > 0.2) IBS <- which(SubPopMatG[,"IBSr"] > 0.2) ITU <- which(SubPopMatG[,"ITUr"] > 0.2) JPT <- which(SubPopMatG[,"JPTr"] > 0.2) KHV <- which(SubPopMatG[,"KHVr"] > 0.2) LWK <- which(SubPopMatG[,"LWKr"] > 0.2) MSL <- which(SubPopMatG[,"MSLr"] > 0.2) MXL <- which(SubPopMatG[,"MXLr"] > 0.2) PEL <- which(SubPopMatG[,"PELr"] > 0.2) PJL <- which(SubPopMatG[,"PJLr"] > 0.2) PUR <- which(SubPopMatG[,"PURr"] > 0.2) STU <- which(SubPopMatG[,"STUr"] > 0.2) TSI <- which(SubPopMatG[,"TSIr"] > 0.2) YRI <- which(SubPopMatG[,"YRIr"] > 0.2) # only this population: exclusively getoSubPops(SubPopMatG) # not this population: not observed getNSubPops(SubPopMatG) # neither observed in EAS, SAS nor EUR NEASEUR <- intersect(NEAS, NEUR) NSASEASEUR <- intersect(NSAS, NEASEUR) # at least once this subpopulation: observed getJSubPops(SubPopMatG) # at least once in AFR without ASW and ACB jafr <- sort(unique(c(JESN, JGWD, JLWK, JMSL, JYRI))) oafr <- intersect(jafr, NSASEASEUR) # only AFR without considering AMR, ASW and ACB oAFR <- oafr ##### get lengths #### rm(SubPopMatG) rm(PopulationMatG) load(file = paste("DenisovaMatGNew.Rda", sep = "")) load(file = paste("NeanderMatGNew.Rda", sep = "")) DenisovaMatG <- DenisovaMatG[real,] NeanderMatG <- NeanderMatG[real,] neL <- NeanderMatG[ne,"ancientLength"] deL <- DenisovaMatG[de,"ancientLength"] neLZ <- which(neL > 10) neL <- neL[neLZ] ne <- ne[neLZ] deLZ <- which(deL > 10) deL <- deL[deLZ] de <- de[deLZ] not_neL <- NeanderMatG[-ne,"IBDlengthCorrected"] not_neLZ <- which(not_neL > 10) not_neL <- not_neL[not_neLZ] not_deL <- DenisovaMatG[-de,"IBDlengthCorrected"] not_deLZ <- which(not_deL > 10) not_deL <- not_deL[not_deLZ] LLL <- DenisovaMatG[,"IBDlengthCorrected"] ################### Denisova ############################### DdeL <- density(deL, bw = 1000, from = 0, to = 30000) deEAS <- intersect(EAS, de) deEASL <- DenisovaMatG[deEAS,"ancientLength"] DdeEASL <- density(deEASL, bw = 1000, from = 0, to = 30000) deSAS <- intersect(SAS, de) deSASL <- DenisovaMatG[deSAS,"ancientLength"] DdeSASL <- density(deSASL, bw = 1000, from = 0, to = 30000) deAFR <- intersect(AFR, de) deAFRL <- DenisovaMatG[deAFR,"ancientLength"] DdeAFRL <- density(deAFRL, bw = 1000, from = 0, to = 30000) deEUR <- intersect(EUR, de) deEURL <- DenisovaMatG[deEUR,"ancientLength"] DdeEURL <- density(deEURL, bw = 1000, from = 0, to = 30000) ################## Neandertal ################################ DneL <- density(neL, bw = 1000, from = 0, to = 30000) neEAS <- intersect(EAS, ne) neEASL <- NeanderMatG[neEAS,"ancientLength"] DneEASL <- density(neEASL, bw = 1000, from = 0, to = 30000) neSAS <- intersect(SAS, ne) neSASL <- NeanderMatG[neSAS,"ancientLength"] DneSASL <- density(neSASL, bw = 1000, from = 0, to = 30000) neAFR <- intersect(AFR, ne) neAFRL <- NeanderMatG[neAFR,"ancientLength"] DneAFRL <- density(neAFRL, bw = 1000, from = 0, to = 30000) neEUR <- intersect(EUR, ne) neEURL <- NeanderMatG[neEUR,"ancientLength"] DneEURL <- density(neEURL, bw = 1000, from = 0, to = 30000) ########## Africans-NonAfricans ############ deoAFR <- intersect(oAFR, de) deoAFRL <- DenisovaMatG[deoAFR,"ancientLength"] DdeoAFRL <- density(deoAFRL, bw = 1400, from = 0, to = 30000) neoAFR <- intersect(oAFR, ne) neoAFRL <- NeanderMatG[neoAFR,"ancientLength"] DneoAFRL <- density(neoAFRL, bw = 1400, from = 0, to = 30000) deNAFR <- intersect(NAFR, de) deNAFRL <- DenisovaMatG[deNAFR,"ancientLength"] DdeNAFRL <- density(deNAFRL, bw = 1400, from = 0, to = 30000) neNAFR <- intersect(NAFR, ne) neNAFRL <- NeanderMatG[neNAFR,"ancientLength"] DneNAFRL <- density(neNAFRL, bw = 1400, from = 0, to = 30000) ##### Ancestor ##### adeoAFR <- intersect(oAFR,ade) adeoAFRL <- DenisovaMatG[adeoAFR,"ancientLength"] DadeoAFRL <- density(adeoAFRL,bw=1400,from=0,to=30000) aneoAFR <- intersect(oAFR,ane) aneoAFRL <- NeanderMatG[aneoAFR,"ancientLength"] DaneoAFRL <- density(aneoAFRL,bw=1400,from=0,to=30000) adeNAFR <- intersect(NAFR,ade) adeNAFRL <- DenisovaMatG[adeNAFR,"ancientLength"] DadeNAFRL <- density(adeNAFRL,bw=1400,from=0,to=30000) aneNAFR <- intersect(NAFR,ane) aneNAFRL <- NeanderMatG[aneNAFR,"ancientLength"] DaneNAFRL <- density(aneNAFRL,bw=1400,from=0,to=30000) #### only AFR vs. obs. AFR ##### deJAFR <- intersect(JAFR, de) deJAFRL <- DenisovaMatG[deJAFR,"ancientLength"] DdeJAFRL <- density(deJAFRL, bw = 1400, from = 0, to = 30000) neJAFR <- intersect(JAFR, ne) neJAFRL <- NeanderMatG[neJAFR,"ancientLength"] DneJAFRL <- density(neJAFRL, bw = 1400, from = 0, to = 30000) JwooAFR <- setdiff(JAFR, oAFR) deJwooAFR <- intersect(JwooAFR, de) deJwooAFRL <- DenisovaMatG[deJwooAFR,"ancientLength"] DdeJwooAFRL <- density(deJwooAFRL, bw = 1400, from = 0, to = 30000) neJwooAFR <- intersect(JwooAFR, ne) neJwooAFRL <- NeanderMatG[neJwooAFR,"ancientLength"] DneJwooAFRL <- density(neJwooAFRL, bw = 1400, from = 0, to = 30000) #### only AFR vs. OoA ##### OoA <- unique(c(JEAS, JSAS, JEUR)) deOoA <- intersect(OoA, de) deOoAL <- DenisovaMatG[deOoA,"ancientLength"] DdeOoAL <- density(deOoAL, bw = 1400, from = 0, to = 30000) neOoA <- intersect(OoA, ne) neOoAL <- NeanderMatG[neOoA,"ancientLength"] DneOoAL <- density(neOoAL, bw = 1400, from = 0, to = 30000) ################################################ ##### Histogramm length #### ################################################ name = paste0("IBDlengthHistogram") #pdf(paste(name, ".pdf", sep = ""), width = 8) #xfig(paste(name, ".fig", sep = ""), width = 8) df <- data.frame(counts = LLL) ggplot(df, aes(x = counts)) + geom_histogram(aes(fill = ..count.. < 10000), colour = "darkgreen", fill = "coral", binwidth = 500) + scale_y_continuous(breaks = seq(0, 55000, 10000)) + scale_x_continuous(limits = c(0, 30000)) + geom_vline(xintercept = c(12500), linetype = "dashed", size = 0.5) + labs(title = paste0("Histogram IBD Lengths Genome")) + theme(legend.position = "none") + coord_cartesian(ylim = c(0, 59000),xlim = c(0, 30000)) + ylab("Counts") + xlab("IBD Length") dev.off() ################################################ #### DENISOVA LENGTH per Population #### ################################################ name = paste0("IBDlengthDensityDenisovaPop") #pdf(paste(name, ".pdf", sep = ""), width = 8) #xfig(paste(name, ".fig", sep = ""), width = 8) df <- data.frame(Length = DdeL$x, Denisova1_African = DdeAFRL$y, Denisova4_Asian = DdeEASL$y, Denisova3_European = DdeEURL$y, Denisova5_South_Asian = DdeSASL$y) ggplot(df, aes(x = Length)) + geom_line(aes(y = Denisova1_African,colour = "Denisova 1African"), size = 1.2) + geom_line(aes(y = Denisova4_Asian,colour = "Denisova 4Asian"), size = 1.2) + geom_line(aes(y = Denisova3_European,colour = "Denisova 3European"), size = 1.2) + geom_line(aes(y = Denisova5_South_Asian,colour = "Denisova 5South_Asian"), size = 1.2) + geom_vline(data = df, aes(xintercept = c(5000), colour = c("Denisova 1African")), linetype = "dashed", size = 0.7) + geom_vline(data = df, aes(xintercept = c(11000), colour = c("Denisova 4Asian")), linetype = "dashed", size = 0.7) + geom_vline(data = df, aes(xintercept = c(11700), colour = c("Denisova 5South_Asian")), linetype = "dashed", size = 0.7) + geom_vline(data = df, aes(xintercept = c(8800), colour = c("Denisova 3European")), linetype = "dashed", size = 0.7) + geom_vline(data = df, aes(xintercept = c(11900), colour = c("Denisova 3European")), linetype = "dashed", size = 0.7) + scale_y_continuous(limits = c(0,1.02*max(df[,2:5])), expand = c(0, 0)) + scale_x_continuous(expand = c(0, 0)) + labs(title = paste0("Denisova-matching IBD Length Distributions per Population")) + theme(legend.position = c(.77, .85)) + theme(legend.background = element_rect(fill = "gray90", size = 0.3, linetype = "solid", colour = "black")) + coord_cartesian(ylim = c(0,1.03*max(df[,2:5])),xlim = c(0,31000)) + ylab("Density") + xlab("IBD Length") + scale_color_hue(name = "Populations", breaks = c("Denisova 1African", "Denisova 2American", "Denisova 3European", "Denisova 4Asian", "Denisova 5South_Asian"), labels = c("Denisova African", "Denisova American", "Denisova European", "Denisova East Asian", "Denisova South Asian")) + theme(legend.text = element_text(size = 16), legend.title = element_text(size = 18)) + theme(plot.title = element_text(size = rel(1.4), vjust = 1), axis.title = element_text(size = rel(1.2))) + theme(axis.text = element_text(size = rel(1.0))) dev.off() ################################################ #### Neander LENGTH per Population #### ################################################ name = paste0("IBDlengthDensityNeanderPop") #pdf(paste(name, ".pdf", sep = ""), width = 8) #xfig(paste(name, ".fig", sep = ""), width = 8) df <- data.frame(Length = DneL$x, Neandertal1_African = DneAFRL$y, Neandertal4_Asian = DneEASL$y, Neandertal3_European = DneEURL$y, Neandertal5_South_Asian = DneSASL$y) ggplot(df, aes(x = Length)) + geom_line(aes(y = Neandertal1_African, colour = "Neandertal 1African"), size = 1.2) + geom_line(aes(y = Neandertal4_Asian, colour = "Neandertal 4Asian"), size = 1.2) + geom_line(aes(y = Neandertal3_European, colour = "Neandertal 3European"), size = 1.2) + geom_line(aes(y = Neandertal5_South_Asian, colour = "Neandertal 5South_Asian"), size = 1.2) + geom_vline(data = df, aes(xintercept = c(5600), colour = c("Neandertal 1African")), linetype = "dashed", size = 0.7) + geom_vline(data = df, aes(xintercept = c(12800), colour = c("Neandertal 4Asian")), linetype = "dashed", size = 0.7) + geom_vline(data = df, aes(xintercept = c(12600), colour = c("Neandertal 5South_Asian")), linetype = "dashed", size = 0.7) + geom_vline(data = df, aes(xintercept = c(12700), colour = c("Neandertal 3European")), linetype = "dashed", size = 0.7) + scale_y_continuous(limits = c(0, 1.02*max(df[,2:5])), expand = c(0, 0)) + scale_x_continuous(expand = c(0, 0)) + labs(title = paste0("Neandertal-matching IBD Length Distributions per Population")) + theme(legend.position = c(.77, .85)) + theme(legend.background = element_rect(fill = "gray90", size = 0.3, linetype = "solid", colour = "black")) + coord_cartesian(ylim = c(0, 1.03*max(df[,2:5])), xlim = c(0, 31000)) + ylab("Density") + xlab("IBD Length") + scale_color_hue(name = "Populations", breaks = c("Neandertal 1African", "Neandertal 2American", "Neandertal 3European", "Neandertal 4Asian", "Neandertal 5South_Asian"), labels = c("Neandertal African", "Neandertal American", "Neandertal European", "Neandertal East Asian", "Neandertal South Asian")) + theme(legend.text = element_text(size = 16), legend.title = element_text(size = 18)) + theme(plot.title = element_text(size = rel(1.4), vjust = 1), axis.title = element_text(size = rel(1.2))) + theme(axis.text = element_text(size = rel(1.0))) dev.off() ################################################ #### Denisova LENGTH Africans-NonAfrica ######## ################################################ name = paste0("IBDlengthDensityDenisova_Africans-NonAfricans") #pdf(paste(name, ".pdf", sep = ""), width = 8) #xfig(paste(name, ".fig", sep = ""), width = 8) l1 <- length(deoAFRL) l2 <- length(deNAFRL) df <- data.frame(Population = factor( c(rep("Africans", l1), rep("Non-Africans", l2)) ), proportion = c(deoAFRL, deNAFRL)) cdf <- summaryBy(data = df, proportion ~ Population, FUN = mean, na.rm = TRUE) ggplot(df, aes(x = proportion, fill = Population)) + geom_density(alpha = .3, adjust = 0.7) + geom_vline(data = cdf, aes(xintercept = c(5000, 12000), colour = Population), linetype = "dashed", size = 1.2) + labs(title = paste0("Denisova IBD Length Distributions: Africans vs. Non-Africans")) + scale_y_continuous(breaks = c(0, 1e-05, 2e-05, 3e-05, 4e-05, 5e-05, 6e-05, 7e-05, 8e-05, 9e-05, 10e-05)) + scale_x_continuous(limits = c(0, 30000)) + theme(legend.position = c(.8, .7)) + theme(legend.background = element_rect(fill = "gray90", size = 0.3, linetype = "solid", colour = "black")) + coord_cartesian(ylim = c(0, 9.5e-05), xlim = c(0, 31000)) + theme(legend.text = element_text(size = 18), legend.title = element_text(size = 18)) + theme(plot.title = element_text(size = rel(1.5), vjust = 1), axis.title = element_text(size = rel(1.2))) + theme(axis.text = element_text(size = rel(1.0))) + ylab("Density") + xlab("IBD Length") dev.off() ################################################ #### Neandertal LENGTH Africans-NonAfrica ###### ################################################ name = paste0("IBDlengthDensityNeandertal_Africans-NonAfricans") #pdf(paste(name, ".pdf", sep = ""), width = 8) #xfig(paste(name, ".fig", sep = ""), width = 8) l1 <- length(neoAFRL) l2 <- length(neNAFRL) df <- data.frame(Population = factor( c(rep("Africans", l1), rep("Non-Africans", l2)) ), proportion = c(neoAFRL, neNAFRL)) cdf <- summaryBy(data = df, proportion ~ Population, FUN = mean, na.rm = TRUE) ggplot(df, aes(x = proportion, fill = Population)) + geom_density(alpha = .3, adjust = 0.7) + geom_vline(data = cdf, aes(xintercept = c(5700,12500), colour = Population), linetype = "dashed", size = 1.2) + labs(title = paste0("Neandertal IBD Length Distributions: Africans vs. Non-Africans")) + scale_y_continuous(breaks = c(0, 1e-05, 2e-05, 3e-05, 4e-05, 5e-05, 6e-05, 7e-05, 8e-05, 9e-05, 10e-05)) + scale_x_continuous(limits = c(0, 30000)) + theme(legend.position = c(.8, .7)) + theme(legend.background = element_rect(fill = "gray90", size = 0.3, linetype = "solid", colour = "black")) + coord_cartesian(ylim = c(0, 9.8e-05), xlim = c(0, 31000)) + theme(legend.text = element_text(size = 18), legend.title = element_text(size = 18)) + theme(plot.title = element_text(size = rel(1.5), vjust = 1), axis.title = element_text(size = rel(1.2))) + theme(axis.text = element_text(size = rel(1.0))) + ylab("Density") + xlab("IBD Length") dev.off() ################################################ #### Denisova LENGTH only AFR vs. obs. AFR ##### ################################################ name = paste0("IBDlengthDensityDenisova_exclAfricans-obsAfricanswoexcl") #pdf(paste(name, ".pdf", sep = ""), width = 8) #xfig(paste(name, ".fig", sep = ""), width = 8) l1 <- length(deoAFRL) l2 <- length(deJwooAFRL) df <- data.frame(Population = factor( c(rep("1only Africans", l1), rep("2Africans", l2))), proportion = c(deoAFRL, deJwooAFRL)) cdf <- summaryBy(data = df, proportion ~ Population, FUN = mean, na.rm = TRUE) ggplot(df, aes(x = proportion, fill = Population)) + geom_density(alpha = .3, adjust = 0.7) + scale_fill_discrete(name = "Population", breaks = c("1only Africans", "2Africans"), labels = c("only Africans", "Africans")) + geom_vline(data = cdf, aes(xintercept = c(5000, 11000), colour = Population), linetype = "dashed", size = 1.2) + geom_vline(data = cdf, aes(xintercept = c(7100,7000), colour = Population), linetype = "dashed", size = 1.2) + scale_colour_discrete(name = "Population", breaks = c("1only Africans", "2Africans"), labels = c("only Africans", "Africans")) + labs(title = paste0("Denisova IBD Length Distributions: only in Africans vs. observed in Africans")) + scale_y_continuous(breaks = c(0, 1e-05, 2e-05, 3e-05, 4e-05, 5e-05, 6e-05, 7e-05, 8e-05, 9e-05, 10e-05), expand = c(0, 0)) + scale_x_continuous(limits = c(0, 31000), expand = c(0, 0)) + theme(legend.position = c(.8, .7)) + theme(legend.background = element_rect(fill = "gray90", size = 0.3, linetype = "solid", colour = "black")) + coord_cartesian(ylim = c(0, 9.5e-05), xlim = c(0, 31000)) + theme(legend.text = element_text(size = 18), legend.title = element_text(size = 18)) + theme(plot.title = element_text(size = rel(1.4), vjust = 1), axis.title = element_text(size = rel(1.2))) + theme(axis.text = element_text(size = rel(1.0))) + ylab("Density") + xlab("IBD Length") dev.off() ################################################# #### Neandertal LENGTH only AFR vs. obs. AFR #### ################################################# name = paste0("IBDlengthDensityNeandertal_exclAfricans-obsAfricanswoexcl") #pdf(paste(name, ".pdf", sep = ""), width = 8) #xfig(paste(name, ".fig", sep = ""), width = 8) l1 <- length(neoAFRL) l2 <- length(neJwooAFRL) df <- data.frame(Population = factor( c(rep("1only Africans", l1), rep("2Africans", l2))), proportion = c(neoAFRL, neJwooAFRL)) cdf <- summaryBy(data = df, proportion ~ Population, FUN = mean, na.rm = TRUE) ggplot(df, aes(x = proportion, fill = Population)) + geom_density(alpha = .3, adjust = 0.7) + scale_fill_discrete(name = "Population", breaks = c("1only Africans", "2Africans"), labels = c("only Africans", "Africans")) + geom_vline(data = cdf, aes(xintercept = c(5600, 12000), colour = Population), linetype = "dashed", size = 1.2) + scale_colour_discrete(name = "Population", breaks = c("1only Africans", "2Africans"), labels = c("only Africans", "Africans")) + labs(title = paste0("Neandertal IBD Length Distributions: only in Africans vs. observed in Africans")) + scale_y_continuous(breaks = c(0, 1e-05, 2e-05, 3e-05, 4e-05, 5e-05, 6e-05, 7e-05, 8e-05, 9e-05, 10e-05), expand = c(0, 0)) + scale_x_continuous(limits = c(0, 31000), expand = c(0, 0)) + theme(legend.position = c(.8, .7)) + theme(legend.background = element_rect(fill = "gray90", size = 0.3, linetype = "solid", colour = "black")) + coord_cartesian(ylim = c(0, 9.5e-05), xlim = c(0, 31000)) + theme(legend.text = element_text(size = 18), legend.title = element_text(size = 18)) + theme(plot.title = element_text(size = rel(1.4), vjust = 1), axis.title = element_text(size = rel(1.2))) + theme(axis.text = element_text(size = rel(1.0))) + ylab("Density") + xlab("IBD Length") dev.off() ################################################ ##### Denisova LENGTH only AFR vs. OoA ######### ################################################ name = paste0("IBDlengthDensityDenisova_exclAfricans-OoA") #pdf(paste(name, ".pdf", sep = ""), width = 8) #xfig(paste(name, ".fig", sep = ""), width = 8) l1 <- length(deoAFRL) l2 <- length(deOoAL) df <- data.frame(Population = factor( c(rep("only Africans", l1), rep("outside of Africa", l2))), proportion = c(deoAFRL, deOoAL)) cdf <- summaryBy(data = df, proportion ~ Population, FUN = mean, na.rm = TRUE) ggplot(df, aes(x = proportion, fill = Population)) + geom_density(alpha = .3, adjust = 0.7) + geom_vline(data = cdf, aes(xintercept = c(5000, 11000), colour = Population), linetype = "dashed", size = 1.2) + labs(title = paste0("Denisova IBD Length Distributions: only in Africans vs. outside of Africa")) + scale_y_continuous(breaks = c(0, 1e-05, 2e-05, 3e-05, 4e-05, 5e-05, 6e-05, 7e-05, 8e-05, 9e-05, 10e-05), expand = c(0, 0)) + scale_x_continuous(limits = c(0, 31000), expand = c(0, 0)) + theme(legend.position = c(.8, .7)) + theme(legend.background = element_rect(fill = "gray90", size = 0.3, linetype = "solid", colour = "black")) + coord_cartesian(ylim = c(0, 9.5e-05), xlim = c(0, 31000)) + theme(legend.text = element_text(size = 18), legend.title = element_text(size = 18)) + theme(plot.title = element_text(size = rel(1.4), vjust = 1), axis.title = element_text(size = rel(1.2))) + theme(axis.text = element_text(size = rel(1.0))) + ylab("Density") + xlab("IBD Length") dev.off() ################################################ ##### Neandertal LENGTH only AFR vs. OoA ####### ################################################ name = paste0("IBDlengthDensityNeandertal_exclAfricans-OoA") #pdf(paste(name, ".pdf", sep = ""), width = 8) #xfig(paste(name, ".fig", sep = ""), width = 8) l1 <- length(neoAFRL) l2 <- length(neOoAL) df <- data.frame(Population = factor( c(rep("only Africans", l1), rep("outside of Africa", l2))), proportion = c(neoAFRL, neOoAL)) cdf <- summaryBy(data = df, proportion ~ Population, FUN = mean, na.rm = TRUE) ggplot(df, aes(x = proportion, fill = Population)) + geom_density(alpha = .3, adjust = 0.7) + geom_vline(data = cdf, aes(xintercept = c(5600, 12500), colour = Population), linetype = "dashed", size = 1.2) + labs(title = paste0("Neandertal IBD Length Distributions: only in Africans vs. outside of Africa")) + scale_y_continuous(breaks = c(0, 1e-05, 2e-05, 3e-05, 4e-05, 5e-05, 6e-05, 7e-05, 8e-05, 9e-05, 10e-05), expand = c(0, 0)) + scale_x_continuous(limits = c(0, 31000), expand = c(0, 0)) + theme(legend.position = c(.8, .7)) + theme(legend.background = element_rect(fill = "gray90", size = 0.3, linetype = "solid", colour = "black")) + coord_cartesian(ylim = c(0, 9.5e-05), xlim = c(0, 31000)) + theme(legend.text = element_text(size = 18), legend.title = element_text(size = 18)) + theme(plot.title = element_text(size = rel(1.4), vjust = 1), axis.title = element_text(size = rel(1.2))) + theme(axis.text = element_text(size = rel(1.0))) + ylab("Density") + xlab("IBD Length") dev.off() ####################################################################### ##### Denisova LENGTH only AFR vs. only AFR and only Ancestor ######### ####################################################################### name=paste0("IBDlengthDensityDenisova_Africa_Ancestor_NoAncestor") #pdf(paste(name,".pdf",sep=""),width=8) #xfig(paste(name,".fig",sep=""),width=8) l1 <- length(deoAFRL) l2 <- length(adeoAFRL) df <- data.frame(Population = factor( c(rep("1not ancestral",l1),rep("2ancestral",l2)) ), proportion = c(deoAFRL,adeoAFRL)) cdf <- summaryBy(data=df, proportion ~ Population, FUN=mean, na.rm=TRUE) ggplot(df, aes(x=proportion, fill=Population)) + geom_density(alpha=.3,adjust=0.7) + scale_fill_discrete(name="Population", breaks = c("1not ancestral", "2ancestral"), labels = c("not ancestral","ancestral")) + geom_vline(data=cdf, aes(xintercept=c(5000, 3700), colour=Population), linetype="dashed", size=1.2) + scale_colour_discrete(name="Population", breaks = c("1not ancestral", "2ancestral"), labels = c("not ancestral","ancestral")) + labs(title=paste0("Denisova IBD Length Distributions: ancestral vs. not ancestral")) + scale_y_continuous(breaks=c(0,1e-05,2e-05,3e-05,4e-05,5e-05,6e-05,7e-05,8e-05,9e-05), expand = c(0, 0)) + scale_x_continuous(limits=c(0,30000), expand = c(0, 0)) + theme(legend.position=c(.8, .7))+ theme(legend.background = element_rect(fill="gray90", size=0.3,linetype="solid",colour="black")) + coord_cartesian(ylim = c(0,9.9e-05),xlim=c(0,31000)) + theme(legend.text = element_text(size=18), legend.title = element_blank()) + theme(plot.title = element_text(size = rel(1.4), vjust=1), axis.title = element_text(size = rel(1.2))) + theme(axis.text = element_text(size = rel(1.0))) + ylab("Density") + xlab("IBD Length") dev.off() ####################################################################### ##### Neandertal LENGTH only AFR vs. only AFR and only Ancestor ####### ####################################################################### name=paste0("IBDlengthDensityNeandertal_Africa_Ancestor_NoAncestor") #pdf(paste(name,".pdf",sep=""),width=8) #xfig(paste(name,".fig",sep=""),width=8) l1 <- length(neoAFRL) l2 <- length(aneoAFRL) df <- data.frame(Population = factor( c(rep("1not ancestral",l1),rep("2ancestral",l2)) ), proportion = c(neoAFRL,aneoAFRL)) cdf <- summaryBy(data=df, proportion ~ Population, FUN=mean, na.rm=TRUE) ggplot(df, aes(x=proportion, fill=Population)) + geom_density(alpha=.3,adjust=0.7) + scale_fill_discrete(name="Population", breaks = c("1not ancestral", "2ancestral"), labels = c("not ancestral","ancestral")) + geom_vline(data=cdf, aes(xintercept=c(5700, 3600), colour=Population), linetype="dashed", size=1.2) + scale_colour_discrete(name="Population", breaks = c("1not ancestral", "2ancestral"), labels = c("not ancestral","ancestral")) + labs(title=paste0("Neandertal IBD Length Distributions: ancestral vs. not ancestral")) + scale_y_continuous(breaks=c(0,1e-05,2e-05,3e-05,4e-05,5e-05,6e-05,7e-05,8e-05,9e-05), expand = c(0, 0)) + scale_x_continuous(limits=c(0,30000), expand = c(0, 0)) + theme(legend.position=c(.8, .7))+ theme(legend.background = element_rect(fill="gray90", size=0.3,linetype="solid",colour="black")) + coord_cartesian(ylim = c(0,9.9e-05),xlim=c(0,31000)) + theme(legend.text = element_text(size=18), legend.title = element_blank()) + theme(plot.title = element_text(size = rel(1.4), vjust=1), axis.title = element_text(size = rel(1.2))) + theme(axis.text = element_text(size = rel(1.0))) + ylab("Density") + xlab("IBD Length") dev.off()