minruns <- 1 maxruns <- 100 source("/seppdata/sepp/linkage/release/funct.R") pars <- read.table("dataSimParameters.txt") mismatchImplanted <- 0.0 maxrunsS <- pars[1,2] snps <- pars[2,2] samplesN <- pars[3,2] avDistSnps <- pars[4,2] avDistMinor <- pars[5,2] noImplanted <- pars[6,2] ## how many IBDs implanted <- pars[7,2] ## in how many samples length <- pars[8,2] minors <- pars[9,2] mismatches <- pars[10,2] if (mismatches>0) { mismatchImplanted <- pars[11,2] overlap <- pars[12,2] } else { overlap <- pars[11,2] } haploN <- 2*samplesN dataA <- 1:samplesN g2 <- 2*(1:snps) g1 <- g2 -1 s2 <- 2*dataA s1 <- s2-1 indi <- cbind(as.character(1:haploN),as.character(1:haploN),as.character(1:haploN),as.character(1:haploN)) ### FABIA Parameters: ps, inteA, thresA, minSNPs I <- 10000 # All SNPs gives I/10 kbp DNA length IBDlength <- 50 # IBD length in kbp thresCount <- 1e-5 # p-value of random histogram hit, default 1e-5 minSNPsFactor <- 3/4 # percentage of IBD overlap; 1/2 for large to 3/4 for for small intervals ps <- 0.99 # percentage of Ls to remove / qunatile psZ <- 0.8 # percentage of Zs to remove / qunatile inteA <- IBDlength*10 # histogram length gives inteA/10 kbp DNA length pMAF <- 1/avDistMinor # max. minor allele frequency # estimated for 1000 genomes: 0.035 # max MAF is 0.05 ... kk <- 1 while ((I/inteA)*choose(samplesN,2)*(1-pbinom(kk,inteA,pMAF*pMAF))>thresCount) { kk <- kk +1} thresA <- kk minSNPs <- round(minSNPsFactor*thresA) ### END FABIA Parameters: ps, inteA, thresA, minSNPs ######################################################## ######################################################## ################# for simulation set explicitely thresA <- 11 minSNPs <- 8 ######################################################## ######################################################## ######################################################## if (mismatches>0) { dataS <- c(maxruns,snps,samplesN,avDistSnps,avDistMinor,noImplanted,implanted,length,minors,mismatches,overlap) } else { dataS <- c(maxruns,snps,samplesN,avDistSnps,avDistMinor,noImplanted,implanted,length,minors,mismatches,mismatchImplanted,overlap) } dim(dataS) <- c(1,length(dataS)) write(dataS,file=paste("fabiares1.txt",sep=""),ncolumns=10000000) write(dataS,file=paste("fabiaresIBD1.txt",sep=""),ncolumns=10000000) fabiares <- list() fabiaresIBD <- list() for (run in minruns:maxruns) { # impl,implG,inter,implantI,implantN,intervalImp load(file=paste("dataSim",run,"Impl.Rda",sep="")) load(file=paste("dataSim",run,"Anno.Rda",sep="")) allsaF <- haploN allsa <- samplesN allsn <- snps #### j=0 noImplanted <- as.integer(scan(file=paste("dataSim",run,"Impl.txt",sep=""),what="integer",nlines=1,skip=j)) j <- j+1 impl <- list() implG <- list() inter <- list() implantI <- list() implantN <- list() intervalImp <- list() implantedA <- list() NegsaFA <- list() NegsaA <- list() PossaA <- list() NegsnA <- list() PossnA <- list() NegsaF <- 0 Negsa <- 0 Possa <- 0 Negsn <- 0 Possn <- 0 for (noIm in 1:noImplanted) { impl[[noIm]] <- as.integer(scan(file=paste("dataSim",run,"Impl.txt",sep=""),what="numeric",nlines=1,skip=j)) j <- j+1 implG[[noIm]] <- as.integer(scan(file=paste("dataSim",run,"Impl.txt",sep=""),what="integer",nlines=1,skip=j)) j <- j+1 inter[[noIm]] <- as.integer(scan(file=paste("dataSim",run,"Impl.txt",sep=""),what="integer",nlines=1,skip=j)) j <- j+1 implantI[[noIm]] <- as.integer(scan(file=paste("dataSim",run,"Impl.txt",sep=""),what="integer",nlines=1,skip=j)) j <- j+1 implantN[[noIm]] <- scan(file=paste("dataSim",run,"Impl.txt",sep=""),what="character",nlines=1,skip=j) j <- j+1 intervalImp[[noIm]] <- matrix(0,implanted,4) for (i in 1:implanted) { interImp <- as.integer(scan(file=paste("dataSim",run,"Impl.txt",sep=""),what="integer",nlines=1,skip=j)) j <- j+1 intervalImp[[noIm]][i,] <- interImp } implantedA[[noIm]] <- length(impl[[noIm]]) NegsaFA[[noIm]] <- haploN - implantedA[[noIm]] NegsaF <- NegsaF + NegsaFA[[noIm]] NegsaA[[noIm]] <- samplesN - implantedA[[noIm]] Negsa <- Negsa + NegsaA[[noIm]] PossaA[[noIm]] <- implantedA[[noIm]] Possa <- Possa + PossaA[[noIm]] NegsnA[[noIm]] <- snps - length(inter[[noIm]]) Negsn <- Negsn + NegsnA[[noIm]] PossnA[[noIm]] <- length(inter[[noIm]]) Possn <- Possn + PossnA[[noIm]] } implantTmax <- list() implantT <- matrix(0,samplesN,snps) for (noIm in 1:noImplanted) { implantTmax[[noIm]] <- matrix(0,samplesN,snps) for (i in 1:implanted) { implantT[implG[[noIm]][i],(intervalImp[[noIm]][i,1]:intervalImp[[noIm]][i,2])] <- 1 implantTmax[[noIm]][implG[[noIm]][i],(intervalImp[[noIm]][i,1]:intervalImp[[noIm]][i,2])] <- 1 } } implantHmax <- list() implantH <- matrix(0,haploN,snps) for (noIm in 1:noImplanted) { implantHmax[[noIm]] <- matrix(0,haploN,snps) for (i in 1:implanted) { implantH[impl[[noIm]][i],(intervalImp[[noIm]][i,1]:intervalImp[[noIm]][i,2])] <- 1 implantHmax[[noIm]][impl[[noIm]][i],(intervalImp[[noIm]][i,1]:intervalImp[[noIm]][i,2])] <- 1 } } implGA <- c() implA <- c() interA <- c() for (noIm in 1:noImplanted) { implGA <- union(implGA,implG[[noIm]]) implA <- union(implA,impl[[noIm]]) interA <- union(interA,inter[[noIm]]) } implantedSF <- length(implA) implantedS <- length(implGA) #################################################################### load(file=paste("resA",run,".Rda",sep="")) #################################################################### i <- 0 max_n <- ncol(res@L) while ((i0.000001)) { i <- i +1 } max_n <- i off1 <- 0 resIBD1 <- getIBDs(res=res,sPF=sPF,annot=annot,chrom=1,labelsA=indi,max_n=max_n,ps=ps,psZ=psZ,inteA=inteA,thresA=thresA,minSNPs=minSNPs,off=off1) if ( length(resIBD1) > 1) { comp <- compareIBDres(resIBD1,simv="minD") mergedIBD1 <- mergeIBDres(resIBD1=resIBD1,resIBD2=NULL,comp=comp,cut=0.8) } else { mergedIBD1 <- resIBD1 } off2=inteA%/%2 resIBD2 <- getIBDs(res=res,sPF=sPF,annot=annot,chrom=1,labelsA=indi,max_n=max_n,ps=ps,psZ=psZ,inteA=inteA,thresA=thresA,minSNPs=minSNPs,off=off2) if ( length(resIBD2) > 1) { comp <- compareIBDres(resIBD2,simv="minD") mergedIBD2 <- mergeIBDres(resIBD1=resIBD2,resIBD2=NULL,comp=comp,cut=0.8) } else { mergedIBD2 <- resIBD2 } if ( length(mergedIBD1) > 0) { if ( length(mergedIBD2) > 0) { comp12 <- compareIBDres(mergedIBD1,mergedIBD2,simv="minD") mergedIBD <- mergeIBDres(resIBD1=mergedIBD1,resIBD2=mergedIBD2,comp=comp12,cut=0.8) } else { mergedIBD <- mergedIBD1 }} else { if ( length(mergedIBD2) > 0) { mergedIBD <- mergedIBD2 } else { mergedIBD <- mergedIBD1 } } save(mergedIBD,resIBD1,resIBD2,mergedIBD1,mergedIBD2,file=paste("resIBDA",run,"1.Rda",sep="")) #################################################################### maxtrueP <- list() maxfalseP <- list() maxfalseN <- list() maxtrueN <- list() maxsens <- list() maxspec <- list() maxfdr <- list() maxfpr <- list() maxacc <- list() maxF1 <- list() maxmcc <- list() maxtruePsa <- list() maxfalsePsa <- list() maxfalseNsa <- list() maxtrueNsa <- list() maxsenssa <- list() maxspecsa <- list() maxfdrsa <- list() maxfprsa <- list() maxaccsa <- list() maxF1sa <- list() maxmccsa <- list() maxtruePsn <- list() maxfalsePsn <- list() maxfalseNsn <- list() maxtrueNsn <- list() maxsenssn <- list() maxspecsn <- list() maxfdrsn <- list() maxfprsn <- list() maxaccsn <- list() maxF1sn <- list() maxmccsn <- list() for (noIm in 1:noImplanted) { maxtrueP[[noIm]] <- 0 maxfalseP[[noIm]] <- 0 maxfalseN[[noIm]] <- 0 maxtrueN[[noIm]] <- 0 maxsens[[noIm]] <- 0 maxspec[[noIm]] <- 0 maxfdr[[noIm]] <- 0 maxfpr[[noIm]] <- 0 maxacc[[noIm]] <- 0 maxF1[[noIm]] <- 0 maxmcc[[noIm]] <- 0 maxtruePsa[[noIm]] <- 0 maxfalsePsa[[noIm]] <- 0 maxfalseNsa[[noIm]] <- 0 maxtrueNsa[[noIm]] <- 0 maxsenssa[[noIm]] <- 0 maxspecsa[[noIm]] <- 0 maxfdrsa[[noIm]] <- 0 maxfprsa[[noIm]] <- 0 maxaccsa[[noIm]] <- 0 maxF1sa[[noIm]] <- 0 maxmccsa[[noIm]] <- 0 maxtruePsn[[noIm]] <- 0 maxfalsePsn[[noIm]] <- 0 maxfalseNsn[[noIm]] <- 0 maxtrueNsn[[noIm]] <- 0 maxsenssn[[noIm]] <- 0 maxspecsn[[noIm]] <- 0 maxfdrsn[[noIm]] <- 0 maxfprsn[[noIm]] <- 0 maxaccsn[[noIm]] <- 0 maxF1sn[[noIm]] <- 0 maxmccsn[[noIm]] <- 0 } allSamp <- c() allSnps <- c() foundAll <- matrix(0,haploN,snps) leeb <- length(mergedIBD) nob <- 1 if (leeb>0) { for (i in 1:leeb) { samplesF <- mergedIBD[[i]]$noSamples snpsF <- mergedIBD[[i]]$noSnps noSnpsF <- mergedIBD[[i]]$numberSnps noSamplesF <- mergedIBD[[i]]$numberSamples range <- snpsF[1]:snpsF[noSnpsF] allSamp <- union(allSamp,samplesF) allSnps <- union(allSnps,range) for (noIm in 1:noImplanted) { foundAllmax <- matrix(0,haploN,snps) startS <- rep((snps+1),noSamplesF) endS <- rep(0,noSamplesF) for (j in 1:noSnpsF) { ssa <- sPF$sL[[snpsF[j]]] ssa1 <- intersect(samplesF,ssa) ssa2 <- match(ssa1,samplesF) if (length(ssa2)>1) { for (k in 1:length(ssa1)) { if (startS[ssa2[k]]>snpsF[j]) { startS[ssa2[k]] <- snpsF[j] } if (endS[ssa2[k]]0) { fdr <- falseP/detected } else { fdr <- 0 } fpr <- falseP/Neg acc <- (trueP+trueN)/all st11 <- (2*trueP+falseP+falseN) if (st11>0) { F1 <- 2*trueP/(2*trueP+falseP+falseN) } else { F1 <- 0 } stt <- (trueP+falseP)*(trueP+falseN)*(trueN+falseP)*(trueN+falseN) if (stt>0) { mcc <- (trueP*trueN - falseP*falseN)/sqrt(stt) } else { mcc <- 0 } if (mcc>maxmcc[[noIm]]) { maxtrueP[[noIm]] <- trueP maxfalseP[[noIm]] <- falseP maxfalseN[[noIm]] <- falseN maxtrueN[[noIm]] <- trueN maxsens[[noIm]] <- sens maxspec[[noIm]] <- spec maxfdr[[noIm]] <- fdr maxfpr[[noIm]] <- fpr maxacc[[noIm]] <- acc maxF1[[noIm]] <- F1 maxmcc[[noIm]] <- mcc } ######## sampI <- intersect(impl[[noIm]],samplesF) minorI <- intersect(inter[[noIm]],snpsF) snpsI <- intersect(inter[[noIm]],range) ## samples detectedsa <- noSamplesF truePsa <- length(sampI) falsePsa <- detectedsa-truePsa falseNsa <- implantedA[[noIm]]-truePsa trueNsa <- allsaF - truePsa - falseNsa - falsePsa senssa <- truePsa/PossaA[[noIm]] specsa <- trueNsa/NegsaFA[[noIm]] if (detectedsa>0) { fdrsa <- falsePsa/detectedsa } else { fdrsa <- 0 } fprsa <- falsePsa/NegsaFA[[noIm]] accsa <- (truePsa+trueNsa)/allsaF st11sa <- (2*truePsa+falsePsa+falseNsa) if (st11sa>0) { F1sa <- 2*truePsa/(2*truePsa+falsePsa+falseNsa) } else { F1sa <- 0 } sttsa <- (truePsa+falsePsa)*(truePsa+falseNsa)*(trueNsa+falsePsa)*(trueNsa+falseNsa) if (sttsa>0) { mccsa <- (truePsa*trueNsa - falsePsa*falseNsa)/sqrt(sttsa) } else { mccsa <- 0 } if (mccsa>maxmccsa[[noIm]]) { maxtruePsa[[noIm]] <- truePsa maxfalsePsa[[noIm]] <- falsePsa maxfalseNsa[[noIm]] <- falseNsa maxtrueNsa[[noIm]] <- trueNsa maxsenssa[[noIm]] <- senssa maxspecsa[[noIm]] <- specsa maxfdrsa[[noIm]] <- fdrsa maxfprsa[[noIm]] <- fprsa maxaccsa[[noIm]] <- accsa maxF1sa[[noIm]] <- F1sa maxmccsa[[noIm]] <- mccsa } ## snps detectedsn <- length(range) truePsn <- length(snpsI) falsePsn <- detectedsn-truePsn falseNsn <- length(inter[[noIm]]) - truePsn trueNsn <- snps - truePsn - falseNsn - falsePsn senssn <- truePsn/PossnA[[noIm]] specsn <- trueNsn/NegsnA[[noIm]] if (detectedsn>0) { fdrsn <- falsePsn/detectedsn } else { fdrsn <- 0 } fprsn <- falsePsn/NegsnA[[noIm]] accsn <- (truePsn+trueNsn)/allsn st11sn <- (2*truePsn+falsePsn+falseNsn) if (st11sn>0) { F1sn <- 2*truePsn/(2*truePsn+falsePsn+falseNsn) } else { F1sn <- 0 } sttsn <- (truePsn+falsePsn)*(truePsn+falseNsn)*(trueNsn+falsePsn)*(trueNsn+falseNsn) if (sttsn>0) { mccsn <- (truePsn*trueNsn - falsePsn*falseNsn)/sqrt(sttsn) } else { mccsn <- 0 } if (mccsn>maxmccsn[[noIm]]) { maxtruePsn[[noIm]] <- truePsn maxfalsePsn [[noIm]]<- falsePsn maxfalseNsn[[noIm]] <- falseNsn maxtrueNsn[[noIm]] <- trueNsn maxsenssn[[noIm]] <- senssn maxspecsn[[noIm]] <- specsn maxfdrsn[[noIm]] <- fdrsn maxfprsn[[noIm]] <- fprsn maxaccsn[[noIm]] <- accsn maxF1sn[[noIm]] <- F1sn maxmccsn[[noIm]] <- mccsn } } ## all startS <- rep((snps+1),noSamplesF) endS <- rep(0,noSamplesF) for (j in 1:noSnpsF) { ssa <- sPF$sL[[snpsF[j]]] ssa1 <- intersect(samplesF,ssa) ssa2 <- match(ssa1,samplesF) if (length(ssa2)>1) { for (k in 1:length(ssa1)) { if (startS[ssa2[k]]>snpsF[j]) { startS[ssa2[k]] <- snpsF[j] } if (endS[ssa2[k]]0) { fdr <- falseP/detected } else { fdr <- 0 } fpr <- falseP/Neg acc <- (trueP+trueN)/all st11 <- (2*trueP+falseP+falseN) if (st11>0) { F1 <- 2*trueP/(2*trueP+falseP+falseN) } else { F1 <- 0 } stt <- (trueP+falseP)*(trueP+falseN)*(trueN+falseP)*(trueN+falseN) if (stt>0) { mcc <- (trueP*trueN - falseP*falseN)/sqrt(stt) } else { mcc <- 0 } allSampI <- intersect(implA,allSamp) lallSamp <- length(allSamp) allSnpsI <- intersect(interA,allSnps) lallSnps <- length(allSnps) ## samples detectedsa <- lallSamp truePsa <- length(allSampI) falsePsa <- detectedsa-truePsa falseNsa <- implantedSF-truePsa trueNsa <- allsaF - truePsa - falseNsa - falsePsa senssa <- truePsa/Possa specsa <- trueNsa/NegsaF if (detectedsa>0) { fdrsa <- falsePsa/detectedsa } else { fdrsa <- 0 } fprsa <- falsePsa/NegsaF accsa <- (truePsa+trueNsa)/allsaF st11sa <- (2*truePsa+falsePsa+falseNsa) if (st11sa>0) { F1sa <- 2*truePsa/(2*truePsa+falsePsa+falseNsa) } else { F1sa <- 0 } sttsa <- (truePsa+falsePsa)*(truePsa+falseNsa)*(trueNsa+falsePsa)*(trueNsa+falseNsa) if (sttsa>0) { mccsa <- (truePsa*trueNsa - falsePsa*falseNsa)/sqrt(sttsa) } else { mccsa <- 0 } ## snps detectedsn <- lallSnps truePsn <- length(allSnpsI) falsePsn <- detectedsn-truePsn falseNsn <- length(interA) - truePsn trueNsn <- snps - truePsn - falseNsn - falsePsn senssn <- truePsn/Possn specsn <- trueNsn/Negsn if (detectedsn>0) { fdrsn <- falsePsn/detectedsn } else { fdrsn <- 0 } fprsn <- falsePsn/Negsn accsn <- (truePsn+trueNsn)/allsn st11sn <- (2*truePsn+falsePsn+falseNsn) if (st11sn>0) { F1sn <- 2*truePsn/(2*truePsn+falsePsn+falseNsn) } else { F1sn <- 0 } sttsn <- (truePsn+falsePsn)*(truePsn+falseNsn)*(trueNsn+falsePsn)*(trueNsn+falseNsn) if (sttsn>0) { mccsn <- (truePsn*trueNsn - falsePsn*falseNsn)/sqrt(sttsn) } else { mccsn <- 0 } fabiaresIBD[[run]] <- matrix(0,noImplanted,35) for (noIm in 1:noImplanted) { dataRmax <- c(run,noIm,maxtruePsa[[noIm]],maxfalseNsa[[noIm]],maxfalsePsa[[noIm]],maxtrueNsa[[noIm]],maxsenssa[[noIm]],maxspecsa[[noIm]],maxfdrsa[[noIm]],maxfprsa[[noIm]],maxaccsa[[noIm]],maxF1sa[[noIm]],maxmccsa[[noIm]], maxtruePsn[[noIm]],maxfalseNsn[[noIm]],maxfalsePsn[[noIm]],maxtrueNsn[[noIm]],maxsenssn[[noIm]],maxspecsn[[noIm]],maxfdrsn[[noIm]],maxfprsn[[noIm]],maxaccsn[[noIm]],maxF1sn[[noIm]],maxmccsn[[noIm]], maxtrueP[[noIm]],maxfalseN[[noIm]],maxfalseP[[noIm]],maxtrueN[[noIm]],maxsens[[noIm]],maxspec[[noIm]],maxfdr[[noIm]],maxfpr[[noIm]],maxacc[[noIm]],maxF1[[noIm]],maxmcc[[noIm]]) write(dataRmax,file=paste("fabiaresIBD1.txt",sep=""),append=TRUE,ncolumns=10000000) fabiaresIBD[[run]][noIm,] <- dataRmax } dataR <- c(run,nob,leeb, truePsa,falseNsa,falsePsa,trueNsa,senssa,specsa,fdrsa,fprsa,accsa,F1sa,mccsa, truePsn,falseNsn,falsePsn,trueNsn,senssn,specsn,fdrsn,fprsn,accsn,F1sn,mccsn, trueP,falseN,falseP,trueN,sens,spec,fdr,fpr,acc,F1,mcc) fabiares[[run]] <- dataR dim(dataR) <- c(1,length(dataR)) write(dataR,file=paste("fabiares1.txt",sep=""),append=TRUE,ncolumns=10000000) } fabiaresM <- read.table(file="fabiares1.txt",skip=1) colMeans(fabiaresM)