# Copyright (C) Matthias E. Futschik, # Institute of Theoretical Biology, # Charite, Humboldt-University # Berlin, Germany # (matthias.futschik@charite.de;m.futschik@staff.hu-berlin.de) # # # This code is free for academic research use. # For all other use, please contact Mattthias Futschik. # If you employing this code, please cite # Matthias E. Futschik & Hanspeter Herzel, Bioinformatics, 2008 # # NOTE: This code is distributed WITHOUT ANY WARRANTY. # # # For use, copy and paste the R-functions below in the R workspace. # (A Bioconductor package is currently under development.) # # The main function is fdr.cycle which delivers the false discovery # rates for the gene expression stored in the R/Bioconductor # ExpressionSet-Object. Its arguments are: # eset - ExpressionSet-Object # T - Period of cycle # times - Measurement times # background.model - "rr" for randomly permutations, # "gauss" for Gaussian distribution, # "ar1" for AR(1) models as background. # N - Number of generated background models # Note that the calculation of FDRs employing empirical background # distribution can require some time. So it can be good idea to # perform it as over-night job ;-) ### CALCULATION OF FALSE DISCOVERY RATE fdr.cycle <- function(eset,T,times,background.model="rr",N=100){ eset <- standardise(eset) F <- fourierscore(eset,T=T,times=times) F.b <- 0 for (i in 1:N){ eset.tmp <- backgroundData(eset,model=background.model) eset.tmp <- standardise(eset.tmp) F.b <- c(F.b,fourierscore(eset.tmp,T=T,times=times)) } F.b <- F.b[-1] fdr <- real(length=length(F)) for (i in 1:length(F)){ fdr[i] <- sum(F.b >= F[i])/ (sum(F >= F[i])*(length(F.b)/length(F)) ) } names(fdr) <- geneNames(eset) list(fdr=fdr) } ############################################## #### GENERATION OF BACKGROUND DATA backgroundData <- function(eset,model="rr"){ esetB <- eset data <- exprs(eset) dataB <- matrix(NA,ncol=dim(data)[[2]],nrow=dim(data)[[1]]) if (model=="rr"){ #### RANDOMISATION OF ROWS for (i in 1:dim(data)[[1]]){ dataB[i,] <- data[i,sample(dim(data)[[2]])] } } if (model=="gauss"){ #### GAUSSIAN PROCESS AR(0) for (i in 1:dim(data)[[1]]){ dataB[i,] <- rnorm(dim(data)[[2]],mean=mean(data[i,],na.rm=TRUE),sd=sd(data[i,],na.rm=TRUE)) } } if (model=="ar1"){ #### AR(1) BACKGROUND for (i in 1:dim(data)[[1]]){ a <- ar(data[i,],order=1,aic=FALSE) dataB[i,1] <- data[i,1] for (j in 2:dim(data)[[2]]){ dataB[i,j] <- a[[4]] + a[[2]]* (dataB[i,j-1]-a[[4]]) + rnorm(1,mean=0,sd=(sqrt(a[[3]]))) } } } exprs(esetB) <- dataB esetB } ######################################### ### FOURIER SCORE fourierscore <- function(eset,T,times){ F <- real(length=dim(exprs(eset))[[1]]) if (missing(times)){ for (i in 1:dim(exprs(eset))[[1]]){ F[i] <- sqrt(sum(cos(2*pi/T*seq(1,length(exprs(eset)[i,])))*exprs(eset[i,]))^2 + sum(sin(2*pi/T*seq(1,length(exprs(eset)[i,])))*exprs(eset[i,]))^2) } } else { for (i in 1:dim(exprs(eset))[[1]]){ F[i] <- sqrt(sum(cos(2*pi/T*times)*exprs(eset[i,]))^2 + sum(sin(2*pi/T*times)*exprs(eset[i,]))^2) } } F } ####################################################################### ### STANDARDISATION standardise <- function(eset){ data <- exprs(eset) for (i in 1:dim(data)[[1]]){ data[i,] <- (data[i,] - mean(data[i,],na.rm=TRUE))/sd(data[i,],na.rm=TRUE) } exprs(eset) <- data eset }