startRun <- 0
endRun <- 639

sampB <- c(1,2,3,5,10,20)
snpB <- c(8,9,10,12,15,20,30,50)
LB <- c(8,50,500,2000,5000,10000,20000,50000,100000,200000,500000,1000000)

# get dups and un
load(file=paste("resDups.Rda",sep=""))

# > table(indi[,3])
# 
# AFR AMR ASN EUR 
# 492 362 572 758 
# 
# AMR = PUR CLM MXL

pops <- c("AFR","AMR","ASN","EUR")

recode <- c(
            "AFR", # (1,0,0,0) --> 1 
            "AMR", # (0,1,0,0) --> 2 
        "AFR/AMR", # (1,1,0,0) --> 3 
            "ASN", # (0,0,1,0) --> 4  
        "AFR/ASN", # (1,0,1,0) --> 5  
        "AMR/ASN", # (0,1,1,0) --> 6  
    "AFR/AMR/ASN", # (1,1,1,0) --> 7  
            "EUR", # (0,0,0,1) --> 8  
        "AFR/EUR", # (1,0,0,1) --> 9  
        "AMR/EUR", # (0,1,0,1) --> 10  
    "AFR/AMR/EUR", # (1,1,0,1) --> 11  
        "ASN/EUR", # (0,0,1,1) --> 12  
    "AFR/ASN/EUR", # (1,0,1,1) --> 13 
    "AMR/ASN/EUR", # (0,1,1,1) --> 14 
"AFR/AMR/ASN/EUR"  # (1,1,1,1) --> 15
)


allC <- rep(0,15)
names(allC) <- recode

lsB <- length(sampB)
lnB <- length(snpB)
lLB <- length(LB)

msB <- matrix(rep(0,lsB*15),nrow=lsB,ncol=15)
mnB <- matrix(rep(0,lnB*15),nrow=lnB,ncol=15)
mLB <- matrix(rep(0,lLB*15),nrow=lLB,ncol=15)

colnames(msB) <- recode
rownames(msB) <- as.character(sampB)

colnames(mnB) <- recode
rownames(mnB) <- as.character(snpB)

colnames(mLB) <- recode
rownames(mLB) <- as.character(LB)




lengthAll <- 3201157
shift <- 5000
intervallAll <- 10000
over <- intervallAll%/%shift

N1 <- lengthAll%/%shift
endRunAll <- (N1-over+1) # 639

allCount <- 0

noSnps <- c()
noSamp <- c()
ibdLength <- c()

ibdFreq <- c()
snpL <- c()

load(file="../individuals.Rda")

for (posAll in (startRun+1):(endRun+1))  {

start <- (posAll-1)*shift
end <- start + intervallAll

if (end > lengthAll)  {

  end <- lengthAll
}
 
  
pRange <- paste("_",format(start,scientific=FALSE),"_",format(end,scientific=FALSE),sep="")


load(file=paste("resIBD_chr1",pRange,"annot.Rda",sep=""))

noIBD <- length(IBDannot)

if (noIBD>0)  {

for (ibdC in 1:noIBD)  {

allCount <- allCount + 1

if (!dups[allCount]) {

noSa <- IBDannot[[ibdC]][[6]]
noSamp <- c(noSamp,noSa)
noSn <- IBDannot[[ibdC]][[7]]
noSnps <- c(noSnps,noSn)
ibdL <- IBDannot[[ibdC]][[5]]
ibdLength <- c(ibdLength,ibdL)

ibdF <- IBDannot[[ibdC]][[18]]
ibdFM <- median(ibdF)

ibdFreq <- c(ibdFreq,ibdFM)

snps <-  IBDannot[[ibdC]][[15]]
snpL <- c(snpL,snps)


samples <-  IBDannot[[ibdC]][[8]]
tt <- table(c(pops,indi[samples,3]))
tt <- tt - 1

aT <- tt>0
aI <- as.integer(aT)

codeT <- aI[1]+aI[2]*2+aI[3]*4+aI[4]*8

allC[codeT] <- allC[codeT]+1






if (noSa<(sampB[2]+1)) {
  sw <- 1
} else  {
if (noSa<(sampB[3]+1)) {
  sw <- 2
} else  {
if (noSa<(sampB[4]+1)) {
  sw <- 3
} else  {
if (noSa<(sampB[5]+1)) {
  sw <- 4
} else  {
if (noSa<(sampB[6]+1)) {
  sw <- 5
} else  {
  sw <- 6
}  
}  
}  
}  
}  


if (noSn<(snpB[2]+1)) {
  sn <- 1
} else  {
if (noSn<(snpB[3]+1)) {
  sn <- 2
} else  {
if (noSn<(snpB[4]+1)) {
  sn <- 3
} else  {
if (noSn<(snpB[5]+1)) {
  sn <- 4
} else  {
if (noSn<(snpB[6]+1)) {
  sn <- 5
} else  {
if (noSn<(snpB[7]+1)) {
  sn <- 6
} else  {
if (noSn<(snpB[8]+1)) {
  sn <- 7
} else  {
  sn <- 8
}  
}  
}  
}  
}  
}  
}  


if (ibdL<(LB[2]+1)) {
  sL <- 1
} else  {
if (ibdL<(LB[3]+1)) {
  sL <- 2
} else  {
if (ibdL<(LB[4]+1)) {
  sL <- 3
} else  {
if (ibdL<(LB[5]+1)) {
  sL <- 4
} else  {
if (ibdL<(LB[6]+1)) {
  sL <- 5
} else  {
if (ibdL<(LB[7]+1)) {
  sL <- 6
} else  {
if (ibdL<(LB[8]+1)) {
  sL <- 7
} else  {
if (ibdL<(LB[9]+1)) {
  sL <- 8
} else  {
if (ibdL<(LB[10]+1)) {
  sL <- 9
} else  {
if (ibdL<(LB[11]+1)) {
  sL <- 10
} else  {
if (ibdL<(LB[12]+1)) {
  sL <- 11
} else  {
  sL <- 12
}  
}  
}  
}  
}  
}  
}  
}  
}  
}  
}  


msB[sw,codeT] <-msB[sw,codeT] + 1
mnB[sn,codeT] <-mnB[sn,codeT] + 1
mLB[sL,codeT] <-mLB[sL,codeT] + 1





}


}

}


}

save(ibdLength,noSamp,noSnps,allC,msB,mnB,mLB,file="resultsA1.Rda")


sBB <- sampB+0.5
endB <- max(noSamp)+0.5
breaksA <- c(sBB,endB)

sBB <- snpB+0.5
endB <- max(noSnps)+0.5
breaksB <- c(sBB,endB)

sBB <- LB+0.5
endB <- max(ibdLength)+0.5
breaksC <- c(sBB,endB)



hSa <- hist(noSamp,breaks=breaksA,plot=FALSE,freq=TRUE,right=TRUE)

print(hSa)

hSn <- hist(noSnps,breaks=breaksB,plot=FALSE,freq=TRUE,right=TRUE)

print(hSn)


hL <- hist(ibdLength,breaks=breaksC,plot=FALSE,freq=TRUE,right=TRUE)

print(hL)


print(allC)

for (i in 1:lsB)  {

  print(rownames(msB)[i])
  print(msB[i,])
  

}

 for (i in 1:lnB)  {

  print(rownames(mnB)[i])
  print(mnB[i,])
  

}

for (i in 1:lLB)  {

  print(rownames(mLB)[i])
  print(mLB[i,])
  

}

breaksIF <- c(0,0.001,0.002,0.005,0.01,0.02,0.03,0.04,0.05,0.95,0.96,0.97,0.98,0.995,0.998,0.999,1.0)
iF <- hist(ibdFreq,breaks=breaksIF,plot=FALSE,freq=TRUE,right=TRUE)

print(iF)



snpsA <- unique(snpL)

length(snpsA)