# Copyright (C) 2015 Patrick Praher and Klambauer Guenter 
# <patrick.praher@gmail.com> <klambauer@bioinf.jku.at> 


##### normalizeTumor #####
# Parameters:
# - X: GRanges, read count data
# - CNV2Segm: GRanges, a list of all segments with a CN2, 
#             only this segments are used to calculate the normalization constants
# - normType: "mean" | "median"
#####
normalizeTumor <- function(X,CN2Segm,normType="median"){
    
  if (!(normType %in% c("mean","median"))){
    stop(paste("Set TO of normalization to \"mean\"","\"median\""))
  }
  if(class(X)!="GRanges" || class(CN2Segm)!="GRanges"){
    stop("X and CN2Segm has to be GRanges")
 
  }	

  input <- X
  
  Ytmp <- subset(X, X %over% CN2Segm)
  Ytmp <- IRanges::as.matrix(IRanges::values(Ytmp))
  
  X <- IRanges::as.matrix(IRanges::values(X))
  Xorig <- X
  
  idxSG <- apply(Ytmp,1,function(x) all(x<1))
  Ytmp[idxSG, ] <- NA
  
  
  if ((nrow(Ytmp)-length(which(idxSG))) > 1){
    
    normFactors <- colSums(  rbind(Ytmp,rep(0,ncol(X))) ,na.rm=TRUE)
    
    if (normType=="mean"){
      normFactors <- colMeans(Ytmp,na.rm=TRUE)
      correctwiththis <-  median(normFactors,na.rm=TRUE)/normFactors
    } else if (normType=="median" | normType=="poisson"){
      normFactors <- apply(Ytmp,2,median,na.rm=TRUE)
      correctwiththis <-  median(normFactors,na.rm=TRUE)/normFactors
    } 
    
    if (any(normFactors==0)){
      stop(paste("Some normalization factors are zero!",
                 "Remove samples or chromosomes for which the average read count is zero,",
                 "e.g. chromosome Y."))
    }
    
    #browser()

  } else {
    warning(paste("Normalization for reference sequence not", 
                  "applicable, because of low number of segments"))
    correctwiththis <- rep(1,ncol(X))
  }
  
  if (any(!is.finite(correctwiththis))){
    warning(paste("Normalization for reference sequence not", 
                  "applicable, because at least one sample has zero",
                  "reads."))
    correctwiththis <- rep(1,ncol(X))
  }
  
  
  # calc new YY
  
  YY <- t(t(X)*correctwiththis)
  
  #YY[idxSG, ] <- 0
  #
  
  #Ytmp <- t(t(Ytmp)*correctwiththis)
  #Ytmp[idxSG, ] <- 0
  
  #Y[chrIdx, ] <- Ytmp

  rownames(YY) <- rownames(Xorig)
  colnames(YY) <- colnames(Xorig)
  
  values(input) <- YY
  return(input)

}


##### calcCN2Segments #####
# Parameters:
# - allSegments: GRanges, a list of all sequenced segments
# - CNVs: GRanges, a list of all segments with a CN != 2, subtracted from allSegments
# Desc: calculates list of semgments with CN2 based on a list of all segments and
#       a list of segments with CN != 2
#####
calcCN2Segments <- function(allSegments, CNVs){
  
  temp <- allSegments
  
  for(i in 1:length(CNVs)){
    temp <- setdiff(temp,CNVs[[i]])
  }
  
  return(temp)
}

##### toGR #####
# Parameters:
# - x: data.frame | list of data.frame, columns: "chrom", "start", "end", "copy_number"
# Desc: converts data.frame to GRanges, copy_number information is lost!
#####
toGR <- function(x){
  if(class(x) == "list"){
    ret <- list()
    for(i in 1:length(x)){
      if(is.data.frame(x[[i]])){
        ret[[i]] <- GRanges(x[[i]][,1], IRanges(x[[i]][,2], x[[i]][,3]))
      }else{
        ret[[i]] <- NA
      }
    }
  }else{
    ret <- GRanges(x[,1], IRanges(x[,2], x[,3]))
  }
  return(ret)
  
}