topZeuNA <- function(res,n,p) { ret <- indi_europ[which(res@Z[n,]>quantile(res@Z[n,],p)),2] return(ret) } topZeu <- function(res,n,p) { ret <- indi_europ[which(res@Z[n,]>quantile(res@Z[n,],p)),2] return(ret) } topZEU <- function(Z,n,p) { ret <- indi_EUeurop[which(res@Z[n,]>quantile(res@Z[n,],p)),2] return(ret) } topZEUNA <- function(res,n,p) { ret <- indi_EUeurop[which(res@Z[n,]>quantile(res@Z[n,],p)),1] return(ret) } topZv <- function(res,n,p) { ret <- res@Z[n,which(res@Z[n,]>quantile(res@Z[n,],p))] return(ret) } topZw <- function(res,n,p) { ret <- which(res@Z[n,]>quantile(res@Z[n,],p)) return(ret) } topZeuNAw <- function(res,n,w) { ret <- indi_europ[which(res@Z[n,]>w),2] return(ret) } topZeuw <- function(res,n,w) { ret <- indi_europ[which(res@Z[n,]>w),2] return(ret) } topZEUw <- function(res,n,w) { ret <- indi_EUeurop[which(res@Z[n,]>w),2] return(ret) } topZEUNAw <- function(res,n,w) { ret <- indi_EUeurop[which(res@Z[n,]>w),1] return(ret) } topZvw <- function(res,n,w) { ret <- res@Z[n,which(res@Z[n,]>w)] return(ret) } topZww <- function(res,n,w) { ret <- which(res@Z[n,]>w) return(ret) } topLw <- function(res,n,w) { # ret <- res@L[which(res@L[,n]>quantile(res@L[,n],p)),n] ret <- which(res@L[,n]>w) return(ret) } topL <- function(res,n,p) { # ret <- res@L[which(res@L[,n]>quantile(res@L[,n],p)),n] ret <- which(res@L[,n]>quantile(res@L[,n],p)) return(ret) } topLv <- function(res,n,p) { ret <- res@L[which(res@L[,n]>quantile(res@L[,n],p)),n] return(ret) } topLvw <- function(res,n,w) { ret <- res@L[which(res@L[,n]>w),n] return(ret) } histLhw <- function(res,n,w,intervv=500,off=0) { off <- off%%intervv bb <- seq(1-off,length(res@L[,n]),intervv) lb <- length(bb) bb <- c(bb,(bb[lb]+intervv)) a <- topL(res,n,p) hist(a,breaks=bb,plot = FALSE) } histLh <- function(res,n,p,intervv=500,off=0) { off <- off%%intervv bb <- seq(1-off,length(res@L[,n]),intervv) lb <- length(bb) bb <- c(bb,(bb[lb]+intervv)) a <- which(res@L[,n]>quantile(res@L[,n],p)) rhist <- hist(a,breaks=bb,plot = FALSE) return(rhist=rhist,a=a) } plotL <- function(res,n,p,t="p",cex=1) { a <- topL(res,n,p) b <- topLv(res,n,p) plot(a,b,xlim=c(0,dim(res@L)[1]),col="blue",type=t,cex=cex) } plotLw <- function(res,n,w,t="p",cex=1) { a <- topLw(res,n,w) b <- topLvw(res,n,w) plot(a,b,xlim=c(0,dim(res@L)[1]),col="blue",type=t,cex=cex) } plotLh <- function(res,n,p,intervv=500,off=0) { off <- off%%intervv bb <- seq(1-off,length(res@L[,n]),intervv) lb <- length(bb) bb <- c(bb,(bb[lb]+intervv)) a <- topL(res,n,p) aa <- hist(a,breaks=bb,plot = FALSE) plot(aa,freq = TRUE,xlim=c(0,dim(res@L)[1]),col="blue") } plotLhw <- function(res,n,w,intervv=500,off=0) { off <- off%%intervv bb <- seq(1-off,length(res@L[,n]),intervv) lb <- length(bb) bb <- c(bb,(bb[lb]+intervv)) a <- topLw(res,n,w) aa <- hist(a,breaks=bb,plot = FALSE) plot(aa,freq = TRUE,xlim=c(0,dim(res@L)[1]),col="blue") } plotLdw <- function(res,n,w) { a <- topLw(res,n,w) b <- topLvw(res,n,w) smoothScatter(a,b,xlim=c(0,dim(res@L)[1])) } plotLd <- function(res,n,p) { a <- topL(res,n,p) b <- topLv(res,n,p) smoothScatter(a,b,xlim=c(0,dim(res@L)[1])) } ser1 <- function(n,res=res,p1=0.99,p2=0.999,intervv=500,off=0) { print(rbind(topZEU(res,n,p1),topZv(res,n,p1),topZEUNA(res,n,p1))) plotLh(res,n,p=p2,intervv,off=off) dev.new() plotL(res,n,p2) } extractIBD <- function(rrnn,p=0.999,inte=500,thres=30,off=0) { off <- off%%inte bb <- seq(1-off,length(rrnn),inte) lb <- length(bb) bb <- c(bb,(bb[lb]+inte)) a2 <- which(rrnn>quantile(rrnn,p)) hh1 <- hist(a2,breaks=bb,plot = FALSE) a1 <- which(hh1$counts>thres) la <- length(a1) if (la>0) { b1 <- round(hh1$mid[a1]) b2 <- rep(inte,la) b3 <- list() b4 <- rep(0,la) for (i in 1:la) { range <- (b1[i]-(inte%/%2)):(b1[i]+(inte%/%2)) a3 <- intersect(range,a2) b3[[i]] <- sort(a3) b4[i] <- length(a3) } return(list(m=b1,l=b2,pos=b3,len=b4)) } else { return(list(m=NULL,l=NULL,pos=NULL,len=NULL)) } } extractIBD_old <- function(rrnn,p=0.999,inte=500,thres=30,off=0) { off <- off%%inte bb <- seq(1-off,length(rrnn),inte) lb <- length(bb) bb <- c(bb,(bb[lb]+inte)) a3 <- which(rrnn>quantile(rrnn,p)) hh1 <- hist(a3,breaks=bb,plot = FALSE) if (la>0) { b1 <- hh1$mid[a1] a2 <- rep(0,la) b2 <- rep(0,la) j <- 1 i <- 1 while (i <= la) { ww <- 1 mm <- b1[i] if (i= (a2[i]-(b2[i]-1)/2))] a5 <- a4[which(a4 <= (a2[i]+(b2[i]-1)/2))] } else { a4 <- a3[which(a3 >=(a2[i]-b2[i]/2))] a5 <- a4[which(a4 <=(a2[i]+b2[i]/2)-1)] } b3[[i]] <- a5 b4[i] <- length(a5) } return(list(m=a2,l=b2,pos=b3,len=b4)) } else { return(list(m=NULL,l=NULL,pos=NULL,len=NULL)) } } snpPlot <- function(x,range=NULL, yLabels=NULL, zlim=NULL,title=NULL,colRamp=12){ require(colorRamps) if (missing(x)) { stop("Data matrix x missing. Stopped.") } if (!is.matrix(x)) { x <- as.matrix(x) } if (!is.numeric(x)) { stop("Data matrix x must be numeric. Stopped.") } min <- min(x) max <- max(x) if( !is.null(zlim) ){ min <- zlim[1] max <- zlim[2] } if( !is.null(yLabels) ){ yLabels <- c(yLabels) } else { yLabels <- c(1:nrow(x)) } if( !is.null(range) ){ xLabels <- c(range) } else { xLabels <- c(1:ncol(x)) } if( is.null(title) ){ title <- c() } # Red and green range from 0 to 1 while Blue ranges from 1 to 0 ColorRamp <- switch(colRamp, rgb( seq(0,1,length=256),seq(0,1,length=256),seq(1,0,length=256)), rgb( seq(0,1,length=256),seq(0.5,1,length=256),seq(1,0,length=256)), rgb( seq(0,1,length=256),seq(0,0.5,length=256),seq(1,0,length=256)), blue2green(256), green2red(256), blue2yellow(256), cyan2yellow(256), magenta2green(256), matlab.like(256), matlab.like2(256), blue2green2red(256), rgb(t(rep(c(255,255,255),256) - col2rgb(matlab.like2(256))),maxColorValue = 255), rgb(t(rep(c(255,255,255),256) - col2rgb(blue2green2red(256))),maxColorValue = 255), rgb(t(abs(rep(c(0,255,0),256) - col2rgb(matlab.like2(256)))),maxColorValue = 255), rgb(t(abs(rep(c(0,55,0),256) - col2rgb(blue2green2red(256)))),maxColorValue = 255), rgb(t(abs(rep(c(255,0,255),256) - col2rgb(matlab.like2(256)))),maxColorValue = 255), rgb(t(abs(rep(c(255,0,255),256) - col2rgb(blue2green2red(256)))),maxColorValue = 255) ) # ColorRamp <- rgb( seq(0,1,length=256), # Red # seq(0,1,length=256), # Green # seq(1,0,length=256)) # Blue ColorLevels <- seq(min, max, length=length(ColorRamp)) # Reverse Y axis reverse <- nrow(x) : 1 yLabels <- yLabels[reverse] x <- x[reverse,] # Data Map # par(mar = c(3,5,2.5,2)) # par(mar = c(3,5,2.5,1)) # Original: par(mar=c(5.1,4.1,4.1,2.1)) marO <- par("mar") par(mar = c(2.5,6,3,1)) image(1:ncol(x), 1:nrow(x), t(x), col=ColorRamp, xlab="", ylab="", axes=FALSE, zlim=c(min,max)) if( !is.null(title) ){ title(main=title) } xt <- c(1,axTicks(1)) lxt <- length(xt) if (abs(xt[lxt]-ncol(x))>5) { xt <- c(xt,ncol(x)) } axis(BELOW<-1, at=xt, labels=prettyNum(xLabels[xt], big.mark = ","), cex.axis=0.7) axis(LEFT <-2, at=1:length(yLabels), labels=yLabels, las= HORIZONTAL<-1,cex.axis=0.7) par(mar = marO) layout(1) } plotRangeL <- function(Lout,ibb,physPos=NULL,colRamp=12,val=c(0.0,2.0,1.0),chrom=1,count=0,labelsNA=NULL,prange=NULL,labelsNA1 = c("model L") ) { if (missing(Lout)) { stop("Sample SNVs 'Lout' are missing. Stopped.") } if (missing(ibb)) { stop("tagSNVs 'ibb' are missing. Stopped.") } n <- dim(Lout)[2] n1 <- length(ibb) doMis <- FALSE if (n1>4) { n1 <- n1-1 doMis <- TRUE } if ((!is.null(prange))&&(!is.null(physPos))) { if (length(prange)==2) { ma <- length(ibb[[1]]) a1 <- which(physPos>=prange[1]) a2 <- which(physPos<=prange[2]) aa <- intersect(a1,a2) if (length(aa)>1) { ibb <- ibb[[1]][aa] physPos <- physPos[aa] } } } if (is.null(physPos)) { physPos <- ibb*100 } ma <- length(ibb[[1]]) range <- ibb[[1]][1]:ibb[[1]][ma] lr <- ibb[[1]][ma]-ibb[[1]][1]+1 physRanget <- physPos[1]:physPos[ma] lt <- length(physRanget) ltr <- lt%/%(lr-1) ltt <- c(1,(1:(lr-2))*ltr,lt) physRange <- physRanget[ltt] physLength <- round(lt/1000) physPosM <- (physPos[ma] + physPos[1])%/%2 mat <- matrix(val[1],(n+n1+1),lr) mat[(n+1),] <- val[2] for (iB in 1:n1) { ibb_pos <- match(ibb[[iB]],range) mat[(n+1+iB),ibb_pos] <- val[3] } if (doMis) { ibb_pos <- match(ibb[[n1+1]],range) mat[(n+1+3),ibb_pos] <- 0.35*val[3] } ibb_pos <- match(ibb[[1]],range) for (j in 1:n) { lla <- which(Lout[range,j]>=0.1) llb <- setdiff(1:lr,lla) llc <- intersect(ibb_pos,lla) lla <- setdiff(lla,llc) mat[j,lla] <- val[2] mat[j,llb] <- val[1] mat[j,llc] <- val[3] } if (is.null(labelsNA)) { labelsNA <- as.character(1:n) } if (length(labelsNA1)!=n1) { if (n1>1) { ade <- rep("",(n1-1)) labelsNA1 = c("model L",ade) } else { labelsNA1 = c("model L") } } labelsNA <- c(labelsNA,"",labelsNA1) if (count<=0) { snpPlot(mat,range=physRange,yLabels=labelsNA, title=paste("chr: ",chrom," || pos: ",prettyNum(physPosM, big.mark = ",")," || length: ",physLength,"kbp || #SNPs: ",ma," || #Samples: ",n,sep=""),colRamp=colRamp) } else { snpPlot(mat,range=physRange,yLabels=labelsNA, title=paste("count: ",count," || chr: ",chrom," || pos: ",prettyNum(physPosM, big.mark = ",")," || length: ",physLength,"kbp || #SNPs: ",ma," || #Samples: ",n,sep=""),colRamp=colRamp) } } plotoneIBD <- function(IBD,filename,ibdC) { samp <- IBD[[ibdC]]$noSamples ibb <- IBD[[ibdC]]$noSnps ibb <- as.integer(sort.int(as.integer(unique(ibb)))) snpPositions <- IBD[[ibdC]]$snpPositions labels_ALL <- IBD[[ibdC]]$labelSamples Lout <- readSamplesSpfabia(X=filename,samples=samp,lowerB=0,upperB=1000.0) if (!exists("annotationAChr1")) { load(file="/seppdata/sepp/linkage/release/annotation/annotationAChr1.Rda") } if (!exists("posA")) { posA <- annotationAChr1[,2] neaA <- annotationAChr1[,24] refA <- annotationAChr1[,4] altA <- annotationAChr1[,5] } mA <- match(snpPositions,posA,nomatch=0) gA <- mA[which(mA>0)] l2 <- length(gA) neaAG <- neaA[gA] altAG <- altA[gA] refAG <- refA[gA] ibbA <- c() for (it in 1:l2) { if (toupper(neaAG[it]) == toupper(altAG[it])) { ibbA <- c(ibbA,ibb[it]) } } if (!exists("neanderHg19Chr1")) { load(file="/seppdata/sepp/linkage/release/annotation/neanderHg19Chr1.Rda") } if (!exists("posN")) { posN <- neanderHg19Chr1$V2 neaN <- neanderHg19Chr1$ad1 refN <- neanderHg19Chr1$V5 altN <- neanderHg19Chr1$V6 } mN <- match(snpPositions,posN,nomatch=0) gN <- mN[which(mN>0)] l2 <- length(gN) neaNG <- neaN[gN] altNG <- altN[gN] refNG <- refN[gN] ibbN <- c() ibbNa <- c() for (it in 1:l2) { AAs <- strsplit(as.character(neaNG[it]),",")[[1]] if (length(AAs)>1) { if (AAs[1] == altNG[it]) { ibbN <- c(ibbN,ibb[it]) } } else { if (AAs[1]=="-") { ibbNa <- c(ibbNa,ibb[it]) } } } if (!exists("denisovaHg19Chr1")) { load(file="/seppdata/sepp/linkage/release/annotation/denisovaHg19Chr1.Rda") } if (!exists("posD")) { posD <- denisovaHg19Chr1$V2 neaD <- denisovaHg19Chr1$ad1 refD <- denisovaHg19Chr1$V5 altD <- denisovaHg19Chr1$V6 } mD <- match(snpPositions,posD,nomatch=0) gD <- mD[which(mD>0)] l2 <- length(gD) neaDG <- neaD[gD] altDG <- altD[gD] refDG <- refD[gD] ibbD <- c() for (it in 1:l2) { AAs <- strsplit(as.character(neaDG[it]),",")[[1]] if (length(AAs)>1) { if (AAs[1] == altDG[it]) { ibbD <- c(ibbD,ibb[it]) } } } ibbL <- list(ibb,ibbA,ibbN,ibbD,ibbNa) plotRangeL(Lout=Lout,ibb=ibbL,physPos=snpPositions,colRamp=12,val=c(0.0,2.0,1.0),chrom=1,count=0,labelsNA=labels_ALL,labelsNA1=c("model L","Ancestor","Neandertal","Denisova")) } plotOneIBD <- function(IBD,filename,ibdC=1) { if (missing(IBD)) { stop("List of haplotypes 'IBD' is missing. Stopped.") } if (missing(filename)) { stop("File name 'filename' for loading sample is missing. Stopped.") } if (length(IBD)>=ibdC) { samp <- IBD[[ibdC]]$noSamples ibb <- IBD[[ibdC]]$noSnps ibb <- as.integer(sort.int(as.integer(unique(ibb)))) snpPositions <- IBD[[ibdC]]$snpPositions labels_ALL <- IBD[[ibdC]]$labelSamples Lout <- readSamplesSpfabia(X=filename,samples=samp,lowerB=0,upperB=1000.0) ibbL <- list(ibb) labelsK <- c("model L") # here other annotations can be included # ibbL <- list(ibb,ibbAnnot1,ibbAnnot2) # labelsK <- c("model L","Annot1",Annot2") plotRangeL(Lout=Lout,ibb=ibbL,physPos=snpPositions,colRamp=12,val=c(0.0,2.0,1.0),chrom=1,count=0,labelsNA=labels_ALL,labelsNA1=labelsK) } else { message("No plot because haplotype ",ibdC," does not exist!") } } plotIBDs <- function(IBD,filename) { devAskNewPage(ask = TRUE) for (i in 1:length(IBD)) { samp <- IBD[[i]]$noSamples ibb <- IBD[[i]]$noSnps ibb <- as.integer(sort.int(as.integer(unique(ibb)))) snpPositions <- IBD[[i]]$snpPositions labels_ALL <- IBD[[i]]$labelSamples Lout <- readSamplesSpfabia(X=filename,samples=samp,lowerB=0,upperB=1000.0) plotRangeL(Lout=Lout,ibb=list(ibb),physPos=snpPositions,colRamp=12,val=c(0.0,2.0,1.0),chrom=1,count=i,labelsNA=labels_ALL) } devAskNewPage(ask = FALSE) } getIBDs <- function(res,sPF,annot=NULL,chrom=1,labelsA=NULL,max_n=NULL,ps=0.99,psZ=0.8,inteA=500,thresA=11,minSNPs=8,off=0,procMinSamps=0.1,thresPrune=1e-3) { if (is.null(labelsA)) { nnL <- length(res@Z[1,]) labelsA <- cbind(as.character(1:nnL),as.character(1:nnL),as.character(1:nnL),as.character(1:nnL)) } if (is.null(max_n)) { i <- 0 max_n <- ncol(res@L) while ((i0.000001)) { i <- i +1 } max_n <- i } if (is.null(annot)) { snps <- length(res@L[,1]) dummyL <- 1:snps dummyC <- rep(0,snps) dummyN <- rep("N",snps) dummy <- as.character(dummyL) annot <- list() annot[[1]] <- dummyL annot[[2]] <- 100*dummyL annot[[3]] <- dummy annot[[4]] <- dummyN annot[[5]] <- dummyN annot[[6]] <- dummy annot[[7]] <- dummy annot[[8]] <- dummy annot[[9]] <- dummy annot[[10]] <- as.double(dummyC) annot[[11]] <- dummyC } ibd_res <- list() ibd_res_idx <- 1 for (n in 1:max_n) { ib <- extractIBD(res@L[,n],p=ps,inte=inteA,thres=thresA,off=off) topZ <- which(res@Z[n,]>quantile(res@Z[n,],psZ)) if ((!is.null(ib$len))&&(length(ib$len)>0)) { for (i in 1:length(ib$len)) { ibb <- as.vector(unlist(ib$pos[[i]])) #ma <- length(ibb) il <- length(ibb) aa <- c() nu <- list() for (j in 1:il) { nu[[j]] <- sPF$sL[[ibb[j]]] aa <- c(aa,nu[[j]]) } aa <- unique(aa) aa <- intersect(aa,topZ) lq <- length(aa) if (lq>1) { aa <- sort(aa) mat <- matrix(0,lq,il) for (j in 1:il) { nv <- which(match(aa,nu[[j]])>0) mat[nv,j] <- rep(1,length(nv)) } select1 <- which(rowSums(mat)>minSNPs) lq1 <- length(select1) if (lq1>1) { noSample1 <- aa[select1] mat1 <- mat[select1,] select2 <- which(colSums(mat1)>1) lq2 <- length(select2) if (lq2>1) { mat1 <- mat1[,select2] noSnp1 <- ibb[select2] matB <- mat1 ig=1 ie=1 while (ie == 1) { nnc <- ncol(matB) subclS <- 1:nnc lq2A <- nnc nnl <- nrow(matB) subclM <- 1:nnl lq1A <- nnl lq2A_old <- lq2A lq1A_old <- lq1A done=0 if ((lq1A<2)||(lq2A<2)) { lq1A <- 1 done <- 1 } while (done==0) { matA <- matB[subclM,subclS] cs1 <- rowSums(matA) m2 <- which.max(cs1) v1 <- matA%*%matA[m2,] v1A <- v1 v1A[m2] <- 0 m3 <- which.max(v1A) v2 <- matA%*%matA[m3,] subclM1A <- which(v1>minSNPs) subclM1B <- which(v2>minSNPs) subclM1 <- intersect(subclM1A,subclM1B) lq1A <- length(subclM1) if (lq1A>1) { cs2 <- colSums(matA[subclM1,]) subclS1 <- which(cs2>max(1,(lq1A%/%(1/procMinSamps)))) lq2A <- length(subclS1) subclM <- subclM[subclM1] subclS <- subclS[subclS1] subclM2 <- subclM[c(m2,m3)] if ((lq1A_old==lq1A)&&(lq2A_old==lq2A)) { done <- 1 } else { lq2A_old <- lq2A lq1A_old <- lq1A } if ((lq1A<2)||(lq2A<2)) { lq1A <- 1 done <- 1 } } else { done <- 1 } } if (lq1A>1) { a0 <- noSnp1[subclS] a1 <- diff(a0) m <- 1/(max(2,median(a1))) k0 <- length(a1) k <- k0 wegS <- 0 wegE <- 0 thresPruneN <- 1.0 - thresPrune if (k>2) { doPrune <- TRUE } else { doPrune <- FALSE } while (doPrune) { doPrune <- FALSE if ((k>2)&&(pexp(a1[1],m) > thresPruneN)) { a1 <- a1[-1] k <- k - 1 m <- 1/(max(2,median(a1))) wegS <- wegS+1 doPrune <- TRUE } else { if ((k>2)&&(pexp(a1[2],m) > thresPruneN)) { a1 <- a1[c(-2,-1)] k <- k - 2 m <- 1/(max(2,median(a1))) wegS <- wegS+2 doPrune <- TRUE } } if ((k>2)&&(pexp(a1[k],m) > thresPruneN)) { a1 <- a1[-k] k <- k - 1 m <- 1/(max(2,median(a1))) wegE <- wegE+1 doPrune <- TRUE } else { if ((k>2)&&(pexp(a1[k-1],m) > thresPruneN)) { a1 <- a1[c(-k,(-k+1))] k <- k - 2 m <- 1/(max(2,median(a1))) wegE <- wegE+2 doPrune <- TRUE } } } subclS <- subclS[(1+wegS):(k0-wegE)] lq2A <- length(subclS) if (lq2A>=minSNPs) { matA <- matB[subclM,subclS] noSample <- noSample1[subclM] noSample2 <- noSample1[subclM2] labelsEUA <- labelsA[noSample,2] labelsNAA <- labelsA[noSample,1] labelsTechA <- labelsA[noSample,4] labels_ALLA <- paste(labelsNAA,labelsEUA,sep="_") labelsNA2 <- labelsA[noSample2,1] noSnp <- noSnp1[subclS] physRangeA <- annot[[2]][noSnp] ibdLength <- physRangeA[lq2A]-physRangeA[1] ibdPos <- (physRangeA[lq2A]+physRangeA[1])%/%2 allelesA <- paste(annot[[4]][noSnp],":",annot[[5]][noSnp],sep="") snpNamesA <- annot[[3]][noSnp] snpFreqA <- annot[[10]][noSnp] nnn <- length(labelsA[,1]) snpGroupFreqA <- sPF$nsL[noSnp]/nnn snpChangeA <- annot[[11]][noSnp] snpsPerSample <- rowSums(matA) samplePerSnp <- colSums(matA) res_t <- list(number=ibd_res_idx,bicluster=n,chrom=chrom,ibdPos=ibdPos,ibdLength=ibdLength,numberSamples=lq1A,numberSnps=lq2A,noSamples=noSample,noSnps=noSnp,countrySamples=labelsEUA,idSamples=labelsNAA,labelSamples=labels_ALLA,techSamples=labelsTechA,maxSamples=labelsNA2,snpPositions=physRangeA,snpAlleles=allelesA,snpNames=snpNamesA,snpFreq=snpFreqA,snpGroupFreq=snpGroupFreqA,snpChange=snpChangeA,snpsPerSample=snpsPerSample,samplePerSnp=samplePerSnp) ibd_res[[ibd_res_idx]] <- res_t ibd_res_idx <- ibd_res_idx +1 ig <- ig + 1 if ((lq1A<(nnl-1))&&(lq2A<(nnc-1))) { matB <- matB[-subclM,-subclS] noSample1 <- noSample1[-subclM] noSnp1 <- noSnp1[-subclS] } else { ie=100 } } else { ie=100 } } else { ie=100 } } } } } } } } return(ibd_res) } ibd2excel <- function(resIBD,filename) { libd <- length(resIBD) if (libd>0) { lentry <- length(resIBD[[1]]) if (lentry<25) { out <- c("numb , ","bicluster , ","chrom , ","ibdPos , ","ibdLength , ","numberSamples , ","numberSnps , ","SampleNumber , ","SnpNumber , ","countrySamples , ","idSamples , ","labelSamples , ","techSamples , ","maxSamples , ","snpPositions , ","snpAlleles , ","snpNames , ","snpFreq , ","snpGroupFreq , ","snpChange , ","snpsPerSample , ","samplePerSnp") } else { out <- c("numb , ","bicluster , ","chrom , ","ibdPos , ","ibdLength , ","numberSamples , ","numberSnps , ","SampleNumber , ","SnpNumber , ","countrySamples , ","idSamples , ","labelSamples , ","techSamples , ","maxSamples , ","snpPositions , ","snpAlleles , ","snpNames , ","snpFreq , ","snpGroupFreq , ","snpChange , ","snpsPerSample , ","samplePerSnp , ","exonicPosition , ","exonicAnnotation , ","exonicCount , ","variantPosition , ","variantAnnotation , ","variantCount , ","TranscriptionFactorPosition , ","TranscriptionFactorAnnotation") } write.table(t(out),file=filename,sep=" ",quote = FALSE,row.names = FALSE,col.names = FALSE) for (i in 1:libd) { out <- c(resIBD[[i]][[1]]) for (j in 2:lentry) { out <- c(out,",",resIBD[[i]][[j]]) } write.table(t(out),file=filename,sep=" ",quote = FALSE,row.names = FALSE,col.names = FALSE,append=TRUE) } } else { out <- c("numb , ","bicluster , ","chrom , ","ibdPos , ","ibdLength , ","numberSamples , ","numberSnps , ","SampleNumber , ","SnpNumber , ","countrySamples , ","idSamples , ","labelSamples , ","techSamples , ","maxSamples , ","snpPositions , ","snpAlleles , ","snpNames , ","snpFreq , ","snpGroupFreq , ","snpChange , ","snpsPerSample , ","samplePerSnp") write.table(t(out),file=filename,sep=" ",quote = FALSE,row.names = FALSE,col.names = FALSE) } } sim <- function(x,y,simv="minD",minInter=3) { intervv1 <- length(intersect(x,y)) if (intervv1>=minInter) { erg <- switch(simv, jaccard = intervv1/length(union(x,y)), dice = 2*intervv1/(length(x)+length(y)), minD = intervv1/ min(length(x),length(y)), maxD = intervv1/ max(length(x),length(y)) ) } else { erg <- 0 } return(erg) } compareIBDres <- function(resIBD1,resIBD2=NULL,simv="minD",pSnps=NULL,pSamp=NULL,minSnps=10,minSamp=2) { if (!is.null(resIBD2)) { snps1 <- sapply(resIBD1,function(x) {x[[9]]} , simplify=FALSE) snps2 <- sapply(resIBD2,function(x) {x[[9]]} , simplify=FALSE) samp1 <- sapply(resIBD1,function(x) {x[[8]]} , simplify=FALSE) samp2 <- sapply(resIBD2,function(x) {x[[8]]} , simplify=FALSE) snps <- c(snps1,snps2) samp <- c(samp1,samp2) } else { snps <- sapply(resIBD1,function(x) {x[[9]]} , simplify=FALSE ) samp <- sapply(resIBD1,function(x) {x[[8]]} , simplify=FALSE) } l <- length(snps) snpsSim <- sapply(snps,function(x) { sapply(snps, function(y) {sim(x,y,simv,minInter=minSnps)})} ) if (!is.null(pSnps)) { snpsSim <- snpsSim^pSnps } sampSim <- sapply(samp,function(x) { sapply(samp, function(y) {sim(x,y,simv,minInter=minSamp)})} ) if (!is.null(pSamp)) { sampSim <- sampSim^pSamp } oneM <- matrix(1,nrow=l,ncol=l) dist <- as.dist(oneM - snpsSim*sampSim) #sampDist <- as.dist(oneM - sampSim) #snpsDist <- as.dist(oneM - snpsSim) clust <- hclust(dist) # sampClust <- hclust(sampDist) # snpsClust <- hclust(snpsDist) return(clust) } mergeIBDres <- function(resIBD1,resIBD2=NULL,comp,cut=0.9) { clustIBD <- cutree(comp,h=cut) l <- length(resIBD1) if (!is.null(resIBD2)) { l2 <- length(resIBD2) } else { l2 <- 0 } tabClust <- table(clustIBD) cl <- length(tabClust) resIBDmerge <- list() for (i in 1:cl) { tr1 <- which(clustIBD==i) ltr1 <- length(tr1) vv <- list() p2 <- tr1[1] if (p2<=l) { for (j in 1:22) { vv[[j]] <- resIBD1[[p2]][[j]] } } else { if ((l2>0)&&(p2<=l+l2)) { p2 <- p2-l for (j in 1:22) { vv[[j]] <- resIBD2[[p2]][[j]] } } } if (ltr1>1) { for (j in 2:ltr1) { p2 <- tr1[j] if (p2<=l) { ri1 <- resIBD1[[p2]][[8]] ri2 <- resIBD1[[p2]][[9]] ma1 <- match(ri1,vv[[8]]) ma2 <- match(ri2,vv[[9]]) na1 <- which(ma1>0) na2 <- which(ma2>0) if (length(na1)>0) { ma1 <- ma1[na1] vv[[21]][ma1] <- pmax(vv[[21]][ma1],resIBD1[[p2]][[21]][na1]) } if (length(na2)>0) { ma2 <- ma2[na2] vv[[22]][ma2] <- pmax(vv[[22]][ma2],resIBD1[[p2]][[22]][na2]) } ab1 <- 1:resIBD1[[p2]][[6]] ab2 <- 1:resIBD1[[p2]][[7]] a1 <- ab1[-na1] a2 <- ab2[-na2] if (length(a1)>0) { vv[[8]] <- c(vv[[8]],resIBD1[[p2]][[8]][a1]) vv[[10]] <- c(vv[[10]],resIBD1[[p2]][[10]][a1]) vv[[11]] <- c(vv[[11]],resIBD1[[p2]][[11]][a1]) vv[[12]] <- c(vv[[12]],resIBD1[[p2]][[12]][a1]) vv[[13]] <- c(vv[[13]],resIBD1[[p2]][[13]][a1]) vv[[21]] <- c(vv[[21]],resIBD1[[p2]][[21]][a1]) } if (length(a2)>0) { vv[[9]] <- c(vv[[9]],resIBD1[[p2]][[9]][a2]) vv[[15]] <- c(vv[[15]],resIBD1[[p2]][[15]][a2]) vv[[16]] <- c(vv[[16]],resIBD1[[p2]][[16]][a2]) vv[[17]] <- c(vv[[17]],resIBD1[[p2]][[17]][a2]) vv[[18]] <- c(vv[[18]],resIBD1[[p2]][[18]][a2]) vv[[19]] <- c(vv[[19]],resIBD1[[p2]][[19]][a2]) vv[[20]] <- c(vv[[20]],resIBD1[[p2]][[20]][a2]) vv[[22]] <- c(vv[[22]],resIBD1[[p2]][[22]][a2]) } vv[[14]] <- c(vv[[14]],resIBD1[[p2]][[14]]) } else { if ((l2>0)&&(p2<=(l+l2))) { p2 <- p2-l ri1 <- resIBD2[[p2]][[8]] ri2 <- resIBD2[[p2]][[9]] ma1 <- match(ri1,vv[[8]]) ma2 <- match(ri2,vv[[9]]) na1 <- which(ma1>0) na2 <- which(ma2>0) if (length(na1)>0) { ma1 <- ma1[na1] vv[[21]][ma1] <- pmax(vv[[21]][ma1],resIBD2[[p2]][[21]][na1]) } if (length(na2)>0) { ma2 <- ma2[na2] vv[[22]][ma2] <- pmax(vv[[22]][ma2],resIBD2[[p2]][[22]][na2]) } ab1 <- 1:resIBD2[[p2]][[6]] ab2 <- 1:resIBD2[[p2]][[7]] a1 <- ab1[-na1] a2 <- ab2[-na2] if (length(a1)>0) { vv[[8]] <- c(vv[[8]],resIBD2[[p2]][[8]][a1]) vv[[10]] <- c(vv[[10]],resIBD2[[p2]][[10]][a1]) vv[[11]] <- c(vv[[11]],resIBD2[[p2]][[11]][a1]) vv[[12]] <- c(vv[[12]],resIBD2[[p2]][[12]][a1]) vv[[13]] <- c(vv[[13]],resIBD2[[p2]][[13]][a1]) vv[[21]] <- c(vv[[21]],resIBD2[[p2]][[21]][a1]) } if (length(a2)>0) { vv[[9]] <- c(vv[[9]],resIBD2[[p2]][[9]][a2]) vv[[15]] <- c(vv[[15]],resIBD2[[p2]][[15]][a2]) vv[[16]] <- c(vv[[16]],resIBD2[[p2]][[16]][a2]) vv[[17]] <- c(vv[[17]],resIBD2[[p2]][[17]][a2]) vv[[18]] <- c(vv[[18]],resIBD2[[p2]][[18]][a2]) vv[[19]] <- c(vv[[19]],resIBD2[[p2]][[19]][a2]) vv[[20]] <- c(vv[[20]],resIBD2[[p2]][[20]][a2]) vv[[22]] <- c(vv[[22]],resIBD2[[p2]][[22]][a2]) } vv[[14]] <- c(vv[[14]],resIBD2[[p2]][[14]]) } } } so8 <- sort(vv[[8]],index.return=TRUE) vv[[8]] <- vv[[8]][so8$ix] vv[[10]] <- vv[[10]][so8$ix] vv[[11]] <- vv[[11]][so8$ix] vv[[12]] <- vv[[12]][so8$ix] vv[[13]] <- vv[[13]][so8$ix] vv[[21]] <- vv[[21]][so8$ix] so9 <- sort(vv[[9]],index.return=TRUE) vv[[9]] <- vv[[9]][so9$ix] vv[[15]] <- vv[[15]][so9$ix] vv[[16]] <- vv[[16]][so9$ix] vv[[17]] <- vv[[17]][so9$ix] vv[[18]] <- vv[[18]][so9$ix] vv[[19]] <- vv[[19]][so9$ix] vv[[20]] <- vv[[20]][so9$ix] vv[[22]] <- vv[[22]][so9$ix] vv[[4]] <- round(median(vv[[15]])) vv[[5]] <- max(vv[[15]])-min(vv[[15]]) vv[[6]] <- length(vv[[8]]) vv[[7]] <- length(vv[[9]]) vv[[14]] <- unique(vv[[14]]) } vv[[1]] <- i vv[[2]] <- ltr1 res_t <- list(number=vv[[1]],bicluster=vv[[2]],chrom=vv[[3]],ibdPos=vv[[4]],ibdLength=vv[[5]],numberSamples=vv[[6]],numberSnps=vv[[7]],noSamples=vv[[8]],noSnps=vv[[9]],countrySamples=vv[[10]],idSamples=vv[[11]],labelSamples=vv[[12]],techSamples=vv[[13]],maxSamples=vv[[14]],snpPositions=vv[[15]],snpAlleles=vv[[16]],snpNames=vv[[17]],snpFreq=vv[[18]],snpGroupFreq=vv[[19]],snpChange=vv[[20]],snpsPerSample=vv[[21]],samplePerSnp=vv[[22]]) resIBDmerge[[i]] <- res_t } return(resIBDmerge) }