# Copyright (C) 2015 Patrick Praher and Klambauer Guenter # ##### normalizeTumor ##### # Parameters: # - X: GRanges, read count data # - CNV2Segm: GRanges, a list of all segments with a CN2, # only this segments are used to calculate the normalization constants # - normType: "mean" | "median" ##### normalizeTumor <- function(X,CN2Segm,normType="median"){ if (!(normType %in% c("mean","median"))){ stop(paste("Set TO of normalization to \"mean\"","\"median\"")) } if(class(X)!="GRanges" || class(CN2Segm)!="GRanges"){ stop("X and CN2Segm has to be GRanges") } input <- X Ytmp <- subset(X, X %over% CN2Segm) Ytmp <- IRanges::as.matrix(IRanges::values(Ytmp)) X <- IRanges::as.matrix(IRanges::values(X)) Xorig <- X idxSG <- apply(Ytmp,1,function(x) all(x<1)) Ytmp[idxSG, ] <- NA if ((nrow(Ytmp)-length(which(idxSG))) > 1){ normFactors <- colSums( rbind(Ytmp,rep(0,ncol(X))) ,na.rm=TRUE) if (normType=="mean"){ normFactors <- colMeans(Ytmp,na.rm=TRUE) correctwiththis <- median(normFactors,na.rm=TRUE)/normFactors } else if (normType=="median" | normType=="poisson"){ normFactors <- apply(Ytmp,2,median,na.rm=TRUE) correctwiththis <- median(normFactors,na.rm=TRUE)/normFactors } if (any(normFactors==0)){ stop(paste("Some normalization factors are zero!", "Remove samples or chromosomes for which the average read count is zero,", "e.g. chromosome Y.")) } #browser() } else { warning(paste("Normalization for reference sequence not", "applicable, because of low number of segments")) correctwiththis <- rep(1,ncol(X)) } if (any(!is.finite(correctwiththis))){ warning(paste("Normalization for reference sequence not", "applicable, because at least one sample has zero", "reads.")) correctwiththis <- rep(1,ncol(X)) } # calc new YY YY <- t(t(X)*correctwiththis) #YY[idxSG, ] <- 0 # #Ytmp <- t(t(Ytmp)*correctwiththis) #Ytmp[idxSG, ] <- 0 #Y[chrIdx, ] <- Ytmp rownames(YY) <- rownames(Xorig) colnames(YY) <- colnames(Xorig) values(input) <- YY return(input) } ##### calcCN2Segments ##### # Parameters: # - allSegments: GRanges, a list of all sequenced segments # - CNVs: GRanges, a list of all segments with a CN != 2, subtracted from allSegments # Desc: calculates list of semgments with CN2 based on a list of all segments and # a list of segments with CN != 2 ##### calcCN2Segments <- function(allSegments, CNVs){ temp <- allSegments for(i in 1:length(CNVs)){ temp <- setdiff(temp,CNVs[[i]]) } return(temp) } ##### toGR ##### # Parameters: # - x: data.frame | list of data.frame, columns: "chrom", "start", "end", "copy_number" # Desc: converts data.frame to GRanges, copy_number information is lost! ##### toGR <- function(x){ if(class(x) == "list"){ ret <- list() for(i in 1:length(x)){ if(is.data.frame(x[[i]])){ ret[[i]] <- GRanges(x[[i]][,1], IRanges(x[[i]][,2], x[[i]][,3])) }else{ ret[[i]] <- NA } } }else{ ret <- GRanges(x[,1], IRanges(x[,2], x[,3])) } return(ret) }