startRun <- 0
endRun <- 639

sampB <- c(1,2,3,5,10,20)
snpB <- c(8,9,10,12,15,20,30,50)
LB <- c(8,50,500,2000,5000,10000,20000,50000,100000,200000,500000,1000000)

# get dups and un
load(file=paste("resDups.Rda",sep=""))

# > table(indi_EUeurop[,2])
# 
# CEU FIN GBR IBS TSI 
# 170 186 178  28 196 


pops <- c("CEU","FIN","GBR","IBS","TSI")

recode <- c(
                "CEU", # (1,0,0,0,0) --> 1 
                "FIN", # (0,1,0,0,0) --> 2 
            "CEU/FIN", # (1,1,0,0,0) --> 3 
                "GBR", # (0,0,1,0,0) --> 4  
            "CEU/GBR", # (1,0,1,0,0) --> 5  
            "FIN/GBR", # (0,1,1,0,0) --> 6  
        "CEU/FIN/GBR", # (1,1,1,0,0) --> 7  
                "IBS", # (0,0,0,1,0) --> 8  
            "CEU/IBS", # (1,0,0,1,0) --> 9  
            "FIN/IBS", # (0,1,0,1,0) --> 10  
        "CEU/FIN/IBS", # (1,1,0,1,0) --> 11  
            "GBR/IBS", # (0,0,1,1,0) --> 12  
        "CEU/GBR/IBS", # (1,0,1,1,0) --> 13 
        "FIN/GBR/IBS", # (0,1,1,1,0) --> 14 
    "CEU/FIN/GBR/IBS", # (1,1,1,1,0) --> 15
                "TSI", # (0,0,0,0,1) --> 16
            "CEU/TSI", # (1,0,0,0,1) --> 17 
            "FIN/TSI", # (0,1,0,0,1) --> 18
        "CEU/FIN/TSI", # (1,1,0,0,1) --> 19
            "GBR/TSI", # (0,0,1,0,1) --> 20 
        "CEU/GBR/TSI", # (1,0,1,0,1) --> 21 
        "FIN/GBR/TSI", # (0,1,1,0,1) --> 22 
    "CEU/FIN/GBR/TSI", # (1,1,1,0,1) --> 23 
            "IBS/TSI", # (0,0,0,1,1) --> 24 
        "CEU/IBS/TSI", # (1,0,0,1,1) --> 25 
        "FIN/IBS/TSI", # (0,1,0,1,1) --> 26  
    "CEU/FIN/IBS/TSI", # (1,1,0,1,1) --> 27  
        "GBR/IBS/TSI", # (0,0,1,1,1) --> 28  
    "CEU/GBR/IBS/TSI", # (1,0,1,1,1) --> 29 
    "FIN/GBR/IBS/TSI", # (0,1,1,1,1) --> 30 
"CEU/FIN/GBR/IBS/TSI"  # (1,1,1,1,1) --> 31
)


allC <- rep(0,31)
names(allC) <- recode

lsB <- length(sampB)
lnB <- length(snpB)
lLB <- length(LB)

msB <- matrix(rep(0,lsB*31),nrow=lsB,ncol=31)
mnB <- matrix(rep(0,lnB*31),nrow=lnB,ncol=31)
mLB <- matrix(rep(0,lLB*31),nrow=lLB,ncol=31)

colnames(msB) <- recode
rownames(msB) <- as.character(sampB)

colnames(mnB) <- recode
rownames(mnB) <- as.character(snpB)

colnames(mLB) <- recode
rownames(mLB) <- as.character(LB)


lengthAll <- 3201157
shift <- 5000
intervallAll <- 10000
over <- intervallAll%/%shift

N1 <- lengthAll%/%shift
endRunAll <- (N1-over+1) # 639

allCount <- 0

noSnps <- c()
noSamp <- c()
ibdLength <- c()

ibdFreq <- c()
ibdFreqEU <- c()

load(file="../individuals.Rda")


EUeuropeans <- c(which(indi[,2]=="CEU"),which(indi[,2]=="FIN"),which(indi[,2]=="GBR"),which(indi[,2]=="IBS"),which(indi[,2]=="TSI"))
                 
EUeurop <- as.integer(sort.int(as.integer(unique(EUeuropeans))))

indi_EUeurop <- indi[EUeurop,]


for (posAll in (startRun+1):(endRun+1))  {

start <- (posAll-1)*shift
end <- start + intervallAll

if (end > lengthAll)  {

  end <- lengthAll
}
 
  
pRange <- paste("_",format(start,scientific=FALSE),"_",format(end,scientific=FALSE),sep="")


load(file=paste("resIBD_chr1",pRange,"annot.Rda",sep=""))

noIBD <- length(IBDannot)

if (noIBD>0)  {

for (ibdC in 1:noIBD)  {

allCount <- allCount + 1

if (!dups[allCount]) {

noSa <- IBDannot[[ibdC]][[6]]
noSamp <- c(noSamp,noSa)
noSn <- IBDannot[[ibdC]][[7]]
noSnps <- c(noSnps,noSn)
ibdL <- IBDannot[[ibdC]][[5]]
ibdLength <- c(ibdLength,ibdL)

ibdF <- IBDannot[[ibdC]][[18]]
ibdFEU <- IBDannot[[ibdC]][[19]]
ibdFM <- median(ibdF)
ibdFEUM <- median(ibdFEU)

ibdFreq <- c(ibdFreq,ibdFM)
ibdFreqEU <- c(ibdFreqEU,ibdFEUM)

samples <-  IBDannot[[ibdC]][[8]]
tt <- table(c(pops,indi_EUeurop[samples,2]))
tt <- tt - 1

aT <- tt>0
aI <- as.integer(aT)

codeT <- aI[1]+aI[2]*2+aI[3]*4+aI[4]*8+aI[5]*16

allC[codeT] <- allC[codeT]+1


if (noSa<(sampB[2]+1)) {
  sw <- 1
} else  {
if (noSa<(sampB[3]+1)) {
  sw <- 2
} else  {
if (noSa<(sampB[4]+1)) {
  sw <- 3
} else  {
if (noSa<(sampB[5]+1)) {
  sw <- 4
} else  {
if (noSa<(sampB[6]+1)) {
  sw <- 5
} else  {
  sw <- 6
}  
}  
}  
}  
}  


if (noSn<(snpB[2]+1)) {
  sn <- 1
} else  {
if (noSn<(snpB[3]+1)) {
  sn <- 2
} else  {
if (noSn<(snpB[4]+1)) {
  sn <- 3
} else  {
if (noSn<(snpB[5]+1)) {
  sn <- 4
} else  {
if (noSn<(snpB[6]+1)) {
  sn <- 5
} else  {
if (noSn<(snpB[7]+1)) {
  sn <- 6
} else  {
if (noSn<(snpB[8]+1)) {
  sn <- 7
} else  {
  sn <- 8
}  
}  
}  
}  
}  
}  
}  


if (ibdL<(LB[2]+1)) {
  sL <- 1
} else  {
if (ibdL<(LB[3]+1)) {
  sL <- 2
} else  {
if (ibdL<(LB[4]+1)) {
  sL <- 3
} else  {
if (ibdL<(LB[5]+1)) {
  sL <- 4
} else  {
if (ibdL<(LB[6]+1)) {
  sL <- 5
} else  {
if (ibdL<(LB[7]+1)) {
  sL <- 6
} else  {
if (ibdL<(LB[8]+1)) {
  sL <- 7
} else  {
if (ibdL<(LB[9]+1)) {
  sL <- 8
} else  {
if (ibdL<(LB[10]+1)) {
  sL <- 9
} else  {
if (ibdL<(LB[11]+1)) {
  sL <- 10
} else  {
if (ibdL<(LB[12]+1)) {
  sL <- 11
} else  {
  sL <- 12
}  
}  
}  
}  
}  
}  
}  
}  
}  
}  
}  


msB[sw,codeT] <-msB[sw,codeT] + 1
mnB[sn,codeT] <-mnB[sn,codeT] + 1
mLB[sL,codeT] <-mLB[sL,codeT] + 1


}
}


}


}

save(ibdLength,noSamp,noSnps,allC,msB,mnB,mLB,file="resultsA1.Rda")


sBB <- sampB+0.5
endB <- max(noSamp)+0.5
breaksA <- c(sBB,endB)

sBB <- snpB+0.5
endB <- max(noSnps)+0.5
breaksB <- c(sBB,endB)

sBB <- LB+0.5
endB <- max(ibdLength)+0.5
breaksC <- c(sBB,endB)


hSa <- hist(noSamp,breaks=breaksA,plot=FALSE,freq=TRUE,right=TRUE)

print(hSa)

hSn <- hist(noSnps,breaks=breaksB,plot=FALSE,freq=TRUE,right=TRUE)

print(hSn)


hL <- hist(ibdLength,breaks=breaksC,plot=FALSE,freq=TRUE,right=TRUE)

print(hL)


print(allC)

for (i in 1:lsB)  {

  print(rownames(msB)[i])
  print(msB[i,])
  

}

 for (i in 1:lnB)  {

  print(rownames(mnB)[i])
  print(mnB[i,])
  

}

for (i in 1:lLB)  {

  print(rownames(mLB)[i])
  print(mLB[i,])
  

}


iF <- hist(ibdFreq,breaks=16,plot=FALSE,freq=TRUE,right=TRUE)

print(iF)

iFEU <- hist(ibdFreqEU,breaks=16,plot=FALSE,freq=TRUE,right=TRUE)

print(iFEU)