minruns <- 1 #minruns <- 2 maxruns <- 100 continue <- FALSE output_index <- "" #continue <- FALSE thres <- c(100,1000,10000) noTh <- length(thres) pars <- read.table("dataSimParameters.txt") mismatchImplanted <- 0.0 maxrunsS <- pars[1,2] snps <- pars[2,2] samplesN <- pars[3,2] avDistSnps <- pars[4,2] avDistMinor <- pars[5,2] noImplanted <- pars[6,2] ## how many IBDs implanted <- pars[7,2] ## in how many samples length <- pars[8,2] minors <- pars[9,2] mismatches <- pars[10,2] if (mismatches>0) { mismatchImplanted <- pars[11,2] overlap <- pars[12,2] } else { overlap <- pars[11,2] } haploN <- 2*samplesN dataA <- 1:samplesN g2 <- 2*(1:snps) g1 <- g2 -1 s2 <- 2*dataA s1 <- s2-1 indi <- cbind(as.character(1:haploN),as.character(1:haploN),as.character(1:haploN),as.character(1:haploN)) if (mismatches>0) { dataS <- c(maxruns,snps,samplesN,avDistSnps,avDistMinor,noImplanted,implanted,length,minors,mismatches,overlap) } else { dataS <- c(maxruns,snps,samplesN,avDistSnps,avDistMinor,noImplanted,implanted,length,minors,mismatches,mismatchImplanted,overlap) } dim(dataS) <- c(1,length(dataS)) if (!continue) { for (thh in 1:noTh) { write(dataS,file=paste("LDres",thres[thh],output_index,".txt",sep=""),ncolumns=10000000) } } LDres <- list() for (thh in 1:noTh) { LDres[[thh]] <- list() } #################################################### # loop indexLD <- 1 for (run in minruns:maxruns) { # impl,implG,inter,implantI,implantN,intervalImp load(file=paste("dataSim",run,"Impl.Rda",sep="")) load(file=paste("dataSim",run,"Anno.Rda",sep="")) allsaF <- haploN allsa <- samplesN allsn <- snps #### j=0 noImplanted <- as.integer(scan(file=paste("dataSim",run,"Impl.txt",sep=""),what="integer",nlines=1,skip=j)) j <- j+1 impl <- list() implG <- list() inter <- list() implantI <- list() implantN <- list() intervalImp <- list() implantedA <- list() NegsaFA <- list() NegsaA <- list() PossaA <- list() NegsnA <- list() PossnA <- list() NegsaF <- 0 Negsa <- 0 Possa <- 0 Negsn <- 0 Possn <- 0 for (noIm in 1:noImplanted) { impl[[noIm]] <- as.integer(scan(file=paste("dataSim",run,"Impl.txt",sep=""),what="numeric",nlines=1,skip=j)) j <- j+1 implG[[noIm]] <- as.integer(scan(file=paste("dataSim",run,"Impl.txt",sep=""),what="integer",nlines=1,skip=j)) j <- j+1 inter[[noIm]] <- as.integer(scan(file=paste("dataSim",run,"Impl.txt",sep=""),what="integer",nlines=1,skip=j)) j <- j+1 implantI[[noIm]] <- as.integer(scan(file=paste("dataSim",run,"Impl.txt",sep=""),what="integer",nlines=1,skip=j)) j <- j+1 implantN[[noIm]] <- scan(file=paste("dataSim",run,"Impl.txt",sep=""),what="character",nlines=1,skip=j) j <- j+1 intervalImp[[noIm]] <- matrix(0,implanted,4) for (i in 1:implanted) { interImp <- as.integer(scan(file=paste("dataSim",run,"Impl.txt",sep=""),what="integer",nlines=1,skip=j)) j <- j+1 intervalImp[[noIm]][i,] <- interImp } implantedA[[noIm]] <- length(impl[[noIm]]) NegsaFA[[noIm]] <- haploN - implantedA[[noIm]] NegsaF <- NegsaF + NegsaFA[[noIm]] NegsaA[[noIm]] <- samplesN - implantedA[[noIm]] Negsa <- Negsa + NegsaA[[noIm]] PossaA[[noIm]] <- implantedA[[noIm]] Possa <- Possa + PossaA[[noIm]] NegsnA[[noIm]] <- snps - length(inter[[noIm]]) Negsn <- Negsn + NegsnA[[noIm]] PossnA[[noIm]] <- length(inter[[noIm]]) Possn <- Possn + PossnA[[noIm]] } implantTmax <- list() implantT <- matrix(0,samplesN,snps) for (noIm in 1:noImplanted) { implantTmax[[noIm]] <- matrix(0,samplesN,snps) for (i in 1:implanted) { implantT[implG[[noIm]][i],(intervalImp[[noIm]][i,1]:intervalImp[[noIm]][i,2])] <- 1 implantTmax[[noIm]][implG[[noIm]][i],(intervalImp[[noIm]][i,1]:intervalImp[[noIm]][i,2])] <- 1 } } implantHmax <- list() implantH <- matrix(0,haploN,snps) for (noIm in 1:noImplanted) { implantHmax[[noIm]] <- matrix(0,haploN,snps) for (i in 1:implanted) { implantH[impl[[noIm]][i],(intervalImp[[noIm]][i,1]:intervalImp[[noIm]][i,2])] <- 1 implantHmax[[noIm]][impl[[noIm]][i],(intervalImp[[noIm]][i,1]:intervalImp[[noIm]][i,2])] <- 1 } } implGA <- c() implA <- c() interA <- c() for (noIm in 1:noImplanted) { implGA <- union(implGA,implG[[noIm]]) implA <- union(implA,impl[[noIm]]) interA <- union(interA,inter[[noIm]]) } implantedSF <- length(implA) implantedS <- length(implGA) ################################################ #LD by PLINK ############ #plink_com <- paste("plink --file dataSim",run,"plink --r2 --ld-window 500 --ld-window-r2 0 --out plink",output_index,sep="") plink_com <- paste("plink --file dataSim",run,"plink --r --ld-window 500 --out plink",output_index,sep="") system(plink_com) resPlink <- read.table(file= paste("plink",output_index,".ld",sep=""),header=TRUE) resPlink <- as.matrix(resPlink) soR <- sort(resPlink[,7],index.return=TRUE,decreasing=TRUE) resPlink <- resPlink[soR$ix,] for (noIm in 1:noImplanted) { re1 <- intersect(match(inter[[noIm]],resPlink[,3]),match(inter[[noIm]],resPlink[,6])) pv <- resPlink[re1,7] sn <- union(resPlink[re1,3],resPlink[re1,6]) snv <- vector("numeric",length(sn)) reI <- vector("numeric",length(sn)) for (i1 in 1:length(sn)) { aa1 <- match(sn[i1],resPlink[re1,3]) aa2 <- match(sn[i1],resPlink[re1,6]) if (is.na(aa1)) { aa <- aa2 } else { if (is.na(aa2)) { aa <- aa1 } else { aa <- min(aa1,aa2) } } snv[i1] <- pv[aa] reI[i1] <- re1[aa] } for (thh in 1:noTh) { detected <- length(union(resPlink[(1:thres[thh]),3],resPlink[(1:thres[thh]),6])) tp <- length(which(reI<=thres[thh])) fp <- detected-tp fn <- length-tp tn <- snps-fp-tp-fn pos <- length neg <- snps-length all <- snps sens <- tp/pos spec <- tn/neg fdr <- fp/detected fpr <- fp/neg acc <- (tp+tn)/all F1 <- 2*tp/(2*tp+fp+fn) mcc <- (tp*tn - fp*fn)/sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn)) dataR <- c(run,noIm,tp,fn,fp,tn,sens,spec,fdr,fpr,acc,F1,mcc) LDres[[thh]][[indexLD]] <- dataR indexLD <- indexLD + 1 dim(dataR) <- c(1,length(dataR)) write(dataR,file=paste("LDres",thres[thh],output_index,".txt",sep=""),append=TRUE,ncolumns=10000000) } } } # end loop for (thh in 1:noTh) { LDres1 <- LDres[[thh]] save(LDres1,file=paste("LDres",thres[thh],output_index,".Rda",sep="")) LDresM <- read.table(file= paste("LDres",thres[thh],output_index,".txt",sep=""),skip=1) header <- c("runNumber","IBDnumber","trueP","falseN","falseP","trueN","sensitivity","specifity","FDR","FPR","accuracy","F1","Matthews") colnames(LDresM) <- header LDresM1 <- LDresM[,c(-1,-2)] LDMeans <- colMeans(LDresM1) LDStd <- apply(LDresM1,2,sd) LDMedian <- apply(LDresM1,2,median) resultsAll <- rbind(LDMeans,LDStd,LDMedian) write.table(resultsAll,file= paste("LDresults",thres[thh],output_index,".txt",sep=""),sep="\t") }