####################################### # Install and Load Required Libraries # ####################################### library(devtools) # devtools::install_github('biobakery/sparseDOSSA@varyLibSize') library(sparseDOSSA) library(stringi) ###################### # Specify Parameters # ###################### set.seed(670) nMicrobes <- 200 # Number of Features nSubjects <- 300 # Number of Subjects nPerSubject<-2 # Repeated measures spikeMicrobes <- 0.1 # Percentage of Spiked-in Bugs spikeMetadata<-0.5 # Percentage of spiked-in metadata spikeStrength<-"50" # Effect Size nMetadata<-4 nSamples = round(nSubjects*nPerSubject) # Create blocking variable subjectRandomEffects <- as.matrix(rnorm(nSubjects,mean=0,sd=1)) if (nPerSubject==1){ # NO RANDOM EFFECTS subjectRandomEffects <- as.matrix(rnorm(nSubjects,mean=0,sd=0)) } BLOCK <- as.vector(matrix(subjectRandomEffects,nrow=nPerSubject,ncol=length(subjectRandomEffects),byrow=TRUE)) # Specify Mean and Covariance Structure mu<-rep(0,nMetadata) cov<-diag(1,nMetadata, nMetadata) for (i in 1:nMetadata){for (j in 1:nMetadata){if(i!=j) cov[i,j]=0.5**(abs(i-j)) }} # AR(1) # Generate from MVN fakeMetadata<-as.matrix(MASS::mvrnorm(n=nSamples, mu,cov)) # Transpose and Add Blocking Structure finalMetadata<-apply(fakeMetadata, 2, function(x) x+BLOCK) t_UserMetadata<-apply(finalMetadata, 2, function(x) ifelse(x>median(x), 1, 0)) columns_not_to_binarize<-sample(1:nMetadata, nMetadata/2) t_UserMetadata[,columns_not_to_binarize]<-finalMetadata[, columns_not_to_binarize] UserMetadata<-t(t_UserMetadata) # Collect Relevant Spike-in Information spikeCount<- round(nMetadata*spikeMetadata) Metadatafrozenidx<-sample(1:nMetadata, spikeCount, replace=FALSE) significant_metadata<-paste('Metadata', Metadatafrozenidx, sep='') spikeCount<-as.character(spikeCount) ############################################# # Generate SparseDOSSA Synthetic Abundances # ############################################# DD<-sparseDOSSA::sparseDOSSA(number_features = nMicrobes, number_samples = nSamples, UserMetadata = UserMetadata, Metadatafrozenidx = Metadatafrozenidx, datasetCount = 1, spikeCount = spikeCount, spikeStrength = spikeStrength, percent_spiked = spikeMicrobes, seed = 1234, noZeroInflate = TRUE) ############################## # Gather SparseDOSSA Outputs # ############################## sparsedossa_results <- as.data.frame(DD$OTU_count) rownames(sparsedossa_results)<-sparsedossa_results$X1 sparsedossa_results<-sparsedossa_results[-1,-1] colnames(sparsedossa_results)<-paste('Sample', 1:ncol(sparsedossa_results), sep='') data<-as.matrix(sparsedossa_results[-c((nMetadata+1):(2*nMicrobes+nMetadata)),]) data<-data.matrix(data) class(data) <- "numeric" truth<-c(unlist(DD$truth)) truth<-truth[!stri_detect_fixed(truth,":")] truth<-truth[(5+nMetadata):length(truth)] truth<-as.data.frame(truth) significant_features<-truth[seq(1, (as.numeric(spikeCount)+1)*(nMicrobes*spikeMicrobes), (as.numeric(spikeCount)+1)),] significant_features<-as.vector(significant_features) #################### # Extract Features # #################### features<-as.data.frame(t(data[-c(1:nMetadata),])) #################### # Extract Metadata # #################### metadata<-as.data.frame(t(data[(1:nMetadata),])) which.TP = colnames(metadata) %in% significant_metadata meta_newname = paste0(colnames(metadata)[which.TP], "_TP") colnames(metadata)[which.TP] <- meta_newname significant_metadata<-meta_newname ############################### # Mark True Positive Deatures # ############################### wh.TP = colnames(features) %in% significant_features colnames(features)<-paste("Feature", 1:nMicrobes, sep = "") newname = paste0(colnames(features)[wh.TP], "_TP") colnames(features)[wh.TP] <- newname ############################################### # Note down significant features and metadata # ############################################### spiked_features<-colnames(features)[grep('TP', colnames(features))] spiked_metadata<-colnames(metadata)[grep('TP', colnames(metadata))] spiked_metadata_cont_index<-ifelse(length(unique(metadata[, spiked_metadata[[1]]]))>2, 1, 2) #################### # TP Investigation # #################### spike_correlations<-c() for (i in 1:length(spiked_features)){ spike_correlations[i]<-cor(features[, spiked_features[i]], metadata[, spiked_metadata[spiked_metadata_cont_index]], method = 'spearman') } max(spike_correlations)