load("dups.Rda")
load("analyzeResult.Rda")
load(file="/seppdata/sepp/linkage/kpgp/annotationAChr1.Rda")
library(hapFabia)
library(fabia)
#source("prepareAncientGenome.R")
#source("plotAncientGenomes.R")

shiftSize <- 5000
segmentSize <- 10000
fileName="KPGP1000_chr1" # without type


startRun <- 1
endRun <- 633

runIndex=""


lengthAll <- 3167651
shift <- 5000
intervallAll <- 10000
over <- intervallAll%/%shift

N1 <- lengthAll%/%shift
endRunAll <- (N1-over+2) # 639


# annotationAChr1
#colN <- c("CHROM","POS","ID","REF","ALT","LDAF","AVGPOST","RSQ","ERATE","THETA","CIEND","CIPOS","END","HOMLEN","HOMSEQ","SVLEN","SVTYPE","AC","AN","DEL","GT","DS","GL","AA","AF","AMR_AF","ASN_AF","AFR_AF","EUR_AF","VT","SNPSOURCE")

pos <- annotationAChr1[,2]
nea <- as.character(annotationAChr1[,24])
ref <- as.character(annotationAChr1[,4])
alt <- as.character(annotationAChr1[,5])


res <- list()

allCount <- 0
allCount1 <- 0

for (posAll in startRun:endRun)  {

start <- (posAll-1)*shift
end <- start + intervallAll

if (end > lengthAll)  {

  end <- lengthAll
}

  
pRange <- paste("_",format(start,scientific=FALSE),"_",format(end,scientific=FALSE),sep="")

load(file=paste(fileName,pRange,"_resAnno",".Rda",sep=""))
mergedIBD <- resHapFabia$mergedIBD 


noIBD <- length(mergedIBD)


if (noIBD > 0) {
  

for (ibdC in 1:noIBD)  {

allCount <- allCount + 1

if (!dups[allCount]) {

snpPositions <- mergedIBD[[ibdC]][[15]]

l1 <-mergedIBD[[ibdC]][[7]] # no SNPs


m <- match(snpPositions,pos,nomatch=0)

snpPositions <- snpPositions[which(m>0)]

g <- m[which(m>0)]

l2 <- length(g) # no SNPs found

allCount1 <- allCount1 + 1

if (l2>0)  {


neaG <- nea[g] 
altG <- alt[g]
refG <- ref[g]

lno <- 0 # "-" not determined
lma <- 0 # "X" match ref
lne <- 0 # match ancesttal
lnew <- 0 # new base

posb <- c()
posc <- c()
posbt <- c()
posct <- c()
itt <- 1
for (it in 1:l2)  {
  
if (toupper(neaG[it]) == toupper(altG[it]))  {
       lne <- lne +1
       posb <- c(posb,it)
       posbt <- c(posbt,itt)
       itt <- itt + 1

} else {
  if (toupper(neaG[it]) == toupper(refG[it]))  {
    lma <- lma+1
    posc <- c(posc,it)
    posct <- c(posct,itt)
    itt <- itt + 1
  } else {
  if ((neaG[it] == "-")||(neaG[it] == ".")||(neaG[it] == "N"))  {
    lno <- lno+1
  } else {
    lnew <- lnew +1
  }
}
}

}

if (length(posb)>1) { 
dad <- diff(posbt)
dam <- median(dad)
dan <- mean(dad)

snpPN <- snpPositions[posb]
dN <- diff(snpPN)
dNm <-  median(dN)

} else  {

  dam <- 0
  dNm <- 0
  dan <- 0
}

if (length(posc)>1) { 
dac <- diff(posct)
dcm <- median(dac)
dcn <- mean(dac)

snpPH <- snpPositions[posc]
dH <- diff(snpPH)
dHm <-  median(dH)
} else  {

  dcm <- 0
  dHm <- 0
  dcn <- 0
}
  
if (l2>0)  {
  dA  <- diff(snpPositions)
  dAm <-  median(dA)
 } else  {
 dAm <- 0
}
 
# l2 number SNPs
de <- l2-lno  # number determined in Ancesttal
if (l2>0)  {
det <- de/l2 # ratio determined
}  else  {
 det <- 0
}


if (de>0)  {
refM <- lma/de # ratio reference
altM <- lne/de # ratio alternative
newM <- lnew/de # ratio new bases
}  else  {
refM <- 0
altM <- 0
newM <- 0
}

res1 <- c(allCount1,allCount,ibdC,posAll,l2,de,lma,lne,lnew,det,refM,altM,newM,dam,dcm,dan,dcn,dAm,dNm,dHm)
res[[allCount1]] <- res1

} else {

de <- 0
lma <- 0
lne <- 0
lnew <- 0
det <- 0
refM <- 0
altM <- 0
newM <- 0
dam <- 0
dcm <- 0
dan <- 0
dcn <- 0
dAm <- 0
dNm <- 0
dHm <- 0

  res1 <- c(allCount1,allCount,ibdC,posAll,l2,de,lma,lne,lnew,det,refM,altM,newM,dam,dcm,dan,dcn,dAm,dNm,dHm)
res[[allCount1]] <- res1

}

cat(allCount," out of 222246\r")

}
}
}


}


rr <- unlist(res)
l <- length(res1)

AncestMat <- matrix(rr,nrow=allCount1,ncol=l,byrow=TRUE)

colnames(AncestMat) <- c("allCount1","allCount","ibdC","posAll","#SNPs","#Nea","#Ref","#Alt","#New","NeaR","RefR","AltR","NewR","xAltCon","xRefCon","mAltCon","mRefCon","NeaPosC","AltPosC","RefPosC")

write.table(AncestMat,file=paste("AncestMat",runIndex,".txt",sep=""))
            
save(res,AncestMat,file=paste("AncestMat",runIndex,".Rda",sep=""))