startRun <- 0
endRun <- 639

lengthAll <- 3201157
shift <- 5000
intervallAll <- 10000
over <- intervallAll%/%shift

N1 <- lengthAll%/%shift
endRunAll <- (N1-over+1) # 639

                                        #exonic$V2
exonicClass <- c("frameshift deletion","frameshift insertion","frameshift substitution","nonframeshift deletion","nonframeshift insertion","nonframeshift substitution","nonsynonymous SNV","stopgain SNV","stoploss SNV","synonymous SNV","unknown")

#variant$V1
variantClass <- c("downstream","exonic","exonic;splicing","intergenic","intronic","ncRNA_exonic","ncRNA_intronic","ncRNA_splicing","ncRNA_UTR3","ncRNA_UTR5","ncRNA_UTR5;ncRNA_UTR3","splicing","upstream","upstream;downstream","UTR3","UTR5","UTR5;UTR3")

#########################


avibdLength <- list()
avibdPos <- list()
count <- 0

for (posAll in (startRun+1):(endRun+1))  {

start <- (posAll-1)*shift
end <- start + intervallAll

if (end > lengthAll)  {

  end <- lengthAll
}
 
  
pRange <- paste("_",format(start,scientific=FALSE),"_",format(end,scientific=FALSE),sep="")


load(file=paste("resIBD_chr1",pRange,"annot.Rda",sep=""))

count <- count + length(IBDannot)

avibdPos[[posAll]] <-  sapply(IBDannot,function(x) {x[[4]]}  , simplify=FALSE)

avibdLength[[posAll]] <-  sapply(IBDannot,function(x) {x[[5]]}  , simplify=FALSE)

}


ibdPos <- unlist(avibdPos)
ibdLength <- unlist(avibdLength)

ibdSim <- cbind(ibdPos,ibdLength)

dups <- duplicated(ibdSim)

un <- which(dups==FALSE)



length(ibdPos)
length(ibdLength)

length(un)
count

save(dups,un,file=paste("resDups.Rda",sep=""))


noIBDD <- c()
noSnpsD <- c()
noSampD <-  c()
ibdLengthD <-  c()
ibdPosD <-  c()
ibdDistD <- c()
exonicCount <- list()
variantCount <- list()
tfbsD <- list()


allCount <- 0
allCountT <- 0


for (posAll in (startRun+1):(endRun+1))  {

start <- (posAll-1)*shift
end <- start + intervallAll

if (end > lengthAll)  {

  end <- lengthAll
}
 
  
pRange <- paste("_",format(start,scientific=FALSE),"_",format(end,scientific=FALSE),sep="")


load(file=paste("resIBD_chr1",pRange,"annot.Rda",sep=""))

noIBD <- length(IBDannot)

IBDcount <- 0

if (noIBD>0)  {

for (ibdC in 1:noIBD)  {

allCount <- allCount + 1

if (!dups[allCount]) {

allCountT <- allCountT +1
IBDcount <- IBDcount + 1



ibdPosD <-  c(ibdPosD,IBDannot[[ibdC]][[4]])
ibdLengthD <- c(ibdLengthD,IBDannot[[ibdC]][[5]])
noSampD <- c(noSampD,IBDannot[[ibdC]][[6]])
noSnpsD <- c(noSnpsD,IBDannot[[ibdC]][[7]])
exonicCount[[allCountT]] <- IBDannot[[ibdC]][[25]]
variantCount[[allCountT]] <- IBDannot[[ibdC]][[28]]

g0 <- length(IBDannot[[ibdC]][[29]])

tfbsD <- c(tfbsD,g0)

}


}
}

noIBDD <- c(noIBDD,IBDcount)

}



#########################

allIBDsD <- sum(noIBDD)

temp <- unlist(exonicCount)
exonicCountD <- matrix(temp,ncol=11,nrow=allCountT,byrow=TRUE)
temp <- unlist(variantCount)
variantCountD <- matrix(temp,ncol=17,nrow=allCountT,byrow=TRUE)

colnames(exonicCountD) <- exonicClass

colnames(variantCountD) <- variantClass

save(noIBDD,allIBDsD,noSnpsD,noSampD,ibdLengthD,ibdPosD,ibdDistD,exonicCountD,variantCountD,tfbsD,file=paste("resAnno.Rda",sep=""))



print(allCountT)
print(allIBDsD)

sunoIBDD <- summary(noIBDD)
print(sunoIBDD)


sunoSnps <- summary(noSnpsD)
print(sunoSnps)

sunoSamp <- summary(noSampD)
print(sunoSamp)

suibdLength <- summary(ibdLengthD)
print(suibdLength)

ibdPosS <- sort(ibdPosD)
ibdDistD <- diff(ibdPosS)
suibdDist <- summary(ibdDistD)
print(suibdDist)

exonicCountS <- colSums(exonicCountD)
names(exonicCountS) <- exonicClass
print(exonicCountS)

variantCountS <- colSums(variantCountD)
names(variantCountS) <- variantClass
print(variantCountS)

tfbsD <- unlist(tfbsD)
print(summary(tfbsD))
ss <- sum(tfbsD)
print(ss)
print(length(tfbsD))

for (i in 1:length(exonicClass))   {

print(exonicClass[i])
print("####################")
print(summary(exonicCountD[,i]))


   }

for (i in 1:length(variantClass))   {

print(variantClass[i])
print("####################")
print(summary(variantCountD[,i]))


   }


print(dim(exonicCountD))

exD <- apply(exonicCountD, 2, function(x) length(which(x>0)))
print(exD)

print(dim(variantCountD))
vaS <- apply(variantCountD, 2, function(x) length(which(x>0)))
print(vaS)

tfbsD <- unlist(tbfsD)
#tfbsD <- unlist(tfbsD)
print(length(tfbsD))
print(length(which(tfbsD>0)))