load("dups.Rda") load("analyzeResult.Rda") load(file="/seppdata/sepp/linkage/kpgp/annotationAChr1.Rda") library(hapFabia) library(fabia) #source("prepareAncientGenome.R") #source("plotAncientGenomes.R") shiftSize <- 5000 segmentSize <- 10000 fileName="KPGP1000_chr1" # without type startRun <- 1 endRun <- 633 runIndex="" lengthAll <- 3167651 shift <- 5000 intervallAll <- 10000 over <- intervallAll%/%shift N1 <- lengthAll%/%shift endRunAll <- (N1-over+2) # 639 # annotationAChr1 #colN <- c("CHROM","POS","ID","REF","ALT","LDAF","AVGPOST","RSQ","ERATE","THETA","CIEND","CIPOS","END","HOMLEN","HOMSEQ","SVLEN","SVTYPE","AC","AN","DEL","GT","DS","GL","AA","AF","AMR_AF","ASN_AF","AFR_AF","EUR_AF","VT","SNPSOURCE") pos <- annotationAChr1[,2] nea <- as.character(annotationAChr1[,24]) ref <- as.character(annotationAChr1[,4]) alt <- as.character(annotationAChr1[,5]) res <- list() allCount <- 0 allCount1 <- 0 for (posAll in startRun:endRun) { start <- (posAll-1)*shift end <- start + intervallAll if (end > lengthAll) { end <- lengthAll } pRange <- paste("_",format(start,scientific=FALSE),"_",format(end,scientific=FALSE),sep="") load(file=paste(fileName,pRange,"_resAnno",".Rda",sep="")) mergedIBD <- resHapFabia$mergedIBD noIBD <- length(mergedIBD) if (noIBD > 0) { for (ibdC in 1:noIBD) { allCount <- allCount + 1 if (!dups[allCount]) { snpPositions <- mergedIBD[[ibdC]][[15]] l1 <-mergedIBD[[ibdC]][[7]] # no SNPs m <- match(snpPositions,pos,nomatch=0) snpPositions <- snpPositions[which(m>0)] g <- m[which(m>0)] l2 <- length(g) # no SNPs found allCount1 <- allCount1 + 1 if (l2>0) { neaG <- nea[g] altG <- alt[g] refG <- ref[g] lno <- 0 # "-" not determined lma <- 0 # "X" match ref lne <- 0 # match ancesttal lnew <- 0 # new base posb <- c() posc <- c() posbt <- c() posct <- c() itt <- 1 for (it in 1:l2) { if (toupper(neaG[it]) == toupper(altG[it])) { lne <- lne +1 posb <- c(posb,it) posbt <- c(posbt,itt) itt <- itt + 1 } else { if (toupper(neaG[it]) == toupper(refG[it])) { lma <- lma+1 posc <- c(posc,it) posct <- c(posct,itt) itt <- itt + 1 } else { if ((neaG[it] == "-")||(neaG[it] == ".")||(neaG[it] == "N")) { lno <- lno+1 } else { lnew <- lnew +1 } } } } if (length(posb)>1) { dad <- diff(posbt) dam <- median(dad) dan <- mean(dad) snpPN <- snpPositions[posb] dN <- diff(snpPN) dNm <- median(dN) } else { dam <- 0 dNm <- 0 dan <- 0 } if (length(posc)>1) { dac <- diff(posct) dcm <- median(dac) dcn <- mean(dac) snpPH <- snpPositions[posc] dH <- diff(snpPH) dHm <- median(dH) } else { dcm <- 0 dHm <- 0 dcn <- 0 } if (l2>0) { dA <- diff(snpPositions) dAm <- median(dA) } else { dAm <- 0 } # l2 number SNPs de <- l2-lno # number determined in Ancesttal if (l2>0) { det <- de/l2 # ratio determined } else { det <- 0 } if (de>0) { refM <- lma/de # ratio reference altM <- lne/de # ratio alternative newM <- lnew/de # ratio new bases } else { refM <- 0 altM <- 0 newM <- 0 } res1 <- c(allCount1,allCount,ibdC,posAll,l2,de,lma,lne,lnew,det,refM,altM,newM,dam,dcm,dan,dcn,dAm,dNm,dHm) res[[allCount1]] <- res1 } else { de <- 0 lma <- 0 lne <- 0 lnew <- 0 det <- 0 refM <- 0 altM <- 0 newM <- 0 dam <- 0 dcm <- 0 dan <- 0 dcn <- 0 dAm <- 0 dNm <- 0 dHm <- 0 res1 <- c(allCount1,allCount,ibdC,posAll,l2,de,lma,lne,lnew,det,refM,altM,newM,dam,dcm,dan,dcn,dAm,dNm,dHm) res[[allCount1]] <- res1 } cat(allCount," out of 222246\r") } } } } rr <- unlist(res) l <- length(res1) AncestMat <- matrix(rr,nrow=allCount1,ncol=l,byrow=TRUE) colnames(AncestMat) <- c("allCount1","allCount","ibdC","posAll","#SNPs","#Nea","#Ref","#Alt","#New","NeaR","RefR","AltR","NewR","xAltCon","xRefCon","mAltCon","mRefCon","NeaPosC","AltPosC","RefPosC") write.table(AncestMat,file=paste("AncestMat",runIndex,".txt",sep="")) save(res,AncestMat,file=paste("AncestMat",runIndex,".Rda",sep=""))