load("dups.Rda") load("analyzeResult.Rda") load(file="/seppdata/sepp/linkage/kpgp/denisovaHg19KPGPChr1.Rda") library(hapFabia) library(fabia) #source("prepareAncientGenome.R") #source("plotAncientGenomes.R") shiftSize <- 5000 segmentSize <- 10000 fileName="KPGP1000_chr1" # without type startRun <- 1 endRun <- 633 runIndex="" lengthAll <- 3167651 shift <- 5000 intervallAll <- 10000 over <- intervallAll%/%shift N1 <- lengthAll%/%shift endRunAll <- (N1-over+2) # 639 pos <- denisovaHg19$V2 nea <- as.character(denisovaHg19$ad1) ref <- as.character(denisovaHg19$V5) alt <- as.character(denisovaHg19$V6) res <- list() allCount <- 0 allCount1 <- 0 for (posAll in startRun:endRun) { start <- (posAll-1)*shift end <- start + intervallAll if (end > lengthAll) { end <- lengthAll } pRange <- paste("_",format(start,scientific=FALSE),"_",format(end,scientific=FALSE),sep="") load(file=paste(fileName,pRange,"_resAnno",".Rda",sep="")) mergedIBD <- resHapFabia$mergedIBD noIBD <- length(mergedIBD) if (noIBD > 0) { for (ibdC in 1:noIBD) { allCount <- allCount + 1 if (!dups[allCount]) { snpPositions <- mergedIBD[[ibdC]][[15]] l1 <-mergedIBD[[ibdC]][[7]] # no SNPs m <- match(snpPositions,pos,nomatch=0) snpPositions <- snpPositions[which(m>0)] g <- m[which(m>0)] l2 <- length(g) # no SNPs found if (l2>0) { allCount1 <- allCount1 + 1 neaG <- nea[g] altG <- alt[g] lno <- 0 # "-" not determined lma <- 0 # "X" match ref lne <- 0 # match denisovatal lnew <- 0 # new base posb <- c() posc <- c() posbt <- c() posct <- c() itt <- 1 for (it in 1:l2) { AAs <- strsplit(as.character(neaG[it]),",")[[1]] if (length(AAs)==1) { if (AAs[1]=="-") { lno <- lno+1 } else { #AAs[1]=="X" lma <- lma+1 posc <- c(posc,it) posct <- c(posct,itt) itt <- itt + 1 } } else { if (AAs[1] == altG[it]) { lne <- lne +1 posb <- c(posb,it) posbt <- c(posbt,itt) itt <- itt + 1 } else { lnew <- lnew +1 } } } if (length(posb)>1) { dad <- diff(posbt) dam <- median(dad) dan <- mean(dad) snpPN <- snpPositions[posb] dN <- diff(snpPN) dNm <- median(dN) } else { dam <- 0 dNm <- 0 dan <- 0 } if (length(posc)>1) { dac <- diff(posct) dcm <- median(dac) dcn <- mean(dac) snpPH <- snpPositions[posc] dH <- diff(snpPH) dHm <- median(dH) } else { dcm <- 0 dHm <- 0 dcn <- 0 } if (l2>0) { dA <- diff(snpPositions) dAm <- median(dA) } else { dAm <- 0 } # l2 number SNPs de <- l2-lno # number determined in Denisovatal if (l2>0) { det <- de/l2 # ratio determined } else { det <- 0 } if (de>0) { refM <- lma/de # ratio reference altM <- lne/de # ratio alternative newM <- lnew/de # ratio new bases } else { refM <- 0 altM <- 0 newM <- 0 } res1 <- c(allCount1,allCount,ibdC,posAll,l2,de,lma,lne,lnew,det,refM,altM,newM,dam,dcm,dan,dcn,dAm,dNm,dHm) res[[allCount1]] <- res1 } cat(allCount," out of 222246\r") } } } } rr <- unlist(res) l <- length(res1) DenisovaMat <- matrix(rr,nrow=allCount1,ncol=l,byrow=TRUE) colnames(DenisovaMat) <- c("allCount1","allCount","ibdC","posAll","#SNPs","#Nea","#Ref","#Alt","#New","NeaR","RefR","AltR","NewR","xAltCon","xRefCon","mAltCon","mRefCon","NeaPosC","AltPosC","RefPosC") write.table(DenisovaMat,file=paste("DenisovaMat",runIndex,".txt",sep="")) save(res,DenisovaMat,file=paste("DenisovaMat",runIndex,".Rda",sep=""))