startRun <- 0 endRun <- 639 lengthAll <- 3201157 shift <- 5000 intervallAll <- 10000 over <- intervallAll%/%shift N1 <- lengthAll%/%shift endRunAll <- (N1-over+1) # 639 #exonic$V2 exonicClass <- c("frameshift deletion","frameshift insertion","frameshift substitution","nonframeshift deletion","nonframeshift insertion","nonframeshift substitution","nonsynonymous SNV","stopgain SNV","stoploss SNV","synonymous SNV","unknown") #variant$V1 variantClass <- c("downstream","exonic","exonic;splicing","intergenic","intronic","ncRNA_exonic","ncRNA_intronic","ncRNA_splicing","ncRNA_UTR3","ncRNA_UTR5","ncRNA_UTR5;ncRNA_UTR3","splicing","upstream","upstream;downstream","UTR3","UTR5","UTR5;UTR3") ######################### avibdLength <- list() avibdPos <- list() count <- 0 for (posAll in (startRun+1):(endRun+1)) { start <- (posAll-1)*shift end <- start + intervallAll if (end > lengthAll) { end <- lengthAll } pRange <- paste("_",format(start,scientific=FALSE),"_",format(end,scientific=FALSE),sep="") load(file=paste("resIBD_chr1",pRange,"annot.Rda",sep="")) count <- count + length(IBDannot) avibdPos[[posAll]] <- sapply(IBDannot,function(x) {x[[4]]} , simplify=FALSE) avibdLength[[posAll]] <- sapply(IBDannot,function(x) {x[[5]]} , simplify=FALSE) } ibdPos <- unlist(avibdPos) ibdLength <- unlist(avibdLength) ibdSim <- cbind(ibdPos,ibdLength) dups <- duplicated(ibdSim) un <- which(dups==FALSE) length(ibdPos) length(ibdLength) length(un) count save(dups,un,file=paste("resDups.Rda",sep="")) noIBDD <- c() noSnpsD <- c() noSampD <- c() ibdLengthD <- c() ibdPosD <- c() ibdDistD <- c() exonicCount <- list() variantCount <- list() tfbsD <- list() allCount <- 0 allCountT <- 0 for (posAll in (startRun+1):(endRun+1)) { start <- (posAll-1)*shift end <- start + intervallAll if (end > lengthAll) { end <- lengthAll } pRange <- paste("_",format(start,scientific=FALSE),"_",format(end,scientific=FALSE),sep="") load(file=paste("resIBD_chr1",pRange,"annot.Rda",sep="")) noIBD <- length(IBDannot) IBDcount <- 0 if (noIBD>0) { for (ibdC in 1:noIBD) { allCount <- allCount + 1 if (!dups[allCount]) { allCountT <- allCountT +1 IBDcount <- IBDcount + 1 ibdPosD <- c(ibdPosD,IBDannot[[ibdC]][[4]]) ibdLengthD <- c(ibdLengthD,IBDannot[[ibdC]][[5]]) noSampD <- c(noSampD,IBDannot[[ibdC]][[6]]) noSnpsD <- c(noSnpsD,IBDannot[[ibdC]][[7]]) exonicCount[[allCountT]] <- IBDannot[[ibdC]][[25]] variantCount[[allCountT]] <- IBDannot[[ibdC]][[28]] g0 <- length(IBDannot[[ibdC]][[29]]) tfbsD <- c(tfbsD,g0) } } } noIBDD <- c(noIBDD,IBDcount) } ######################### allIBDsD <- sum(noIBDD) temp <- unlist(exonicCount) exonicCountD <- matrix(temp,ncol=11,nrow=allCountT,byrow=TRUE) temp <- unlist(variantCount) variantCountD <- matrix(temp,ncol=17,nrow=allCountT,byrow=TRUE) colnames(exonicCountD) <- exonicClass colnames(variantCountD) <- variantClass save(noIBDD,allIBDsD,noSnpsD,noSampD,ibdLengthD,ibdPosD,ibdDistD,exonicCountD,variantCountD,tfbsD,file=paste("resAnno.Rda",sep="")) print(allCountT) print(allIBDsD) sunoIBDD <- summary(noIBDD) print(sunoIBDD) sunoSnps <- summary(noSnpsD) print(sunoSnps) sunoSamp <- summary(noSampD) print(sunoSamp) suibdLength <- summary(ibdLengthD) print(suibdLength) ibdPosS <- sort(ibdPosD) ibdDistD <- diff(ibdPosS) suibdDist <- summary(ibdDistD) print(suibdDist) exonicCountS <- colSums(exonicCountD) names(exonicCountS) <- exonicClass print(exonicCountS) variantCountS <- colSums(variantCountD) names(variantCountS) <- variantClass print(variantCountS) tfbsD <- unlist(tfbsD) print(summary(tfbsD)) ss <- sum(tfbsD) print(ss) print(length(tfbsD)) for (i in 1:length(exonicClass)) { print(exonicClass[i]) print("####################") print(summary(exonicCountD[,i])) } for (i in 1:length(variantClass)) { print(variantClass[i]) print("####################") print(summary(variantCountD[,i])) } print(dim(exonicCountD)) exD <- apply(exonicCountD, 2, function(x) length(which(x>0))) print(exD) print(dim(variantCountD)) vaS <- apply(variantCountD, 2, function(x) length(which(x>0))) print(vaS) tfbsD <- unlist(tbfsD) #tfbsD <- unlist(tfbsD) print(length(tfbsD)) print(length(which(tfbsD>0)))