####################################### # Install and Load Required Libraries # ####################################### library(devtools) # devtools::install_github('biobakery/sparseDOSSA@varyLibSize') library(sparseDOSSA) library(stringi) ###################### # Specify Parameters # ###################### set.seed(670) n.microbes <- 200 # Number of Features n.samples <- 300 # Number of Samples spike.perc <- 0.1 # Percentage of Spiked-in Bugs spikeStrength<-"50" # Effect Size ########################### # Specify Binary Metadata # ########################### n.metadata <- 1 UserMetadata<-as.matrix(rnorm(n.samples, 0, 1)) UserMetadata<-t(UserMetadata) # Transpose ################################################### # Spiked-in Metadata (Which Metadata to Spike-in) # ################################################### Metadatafrozenidx<-1 spikeCount<-as.character(length(Metadatafrozenidx)) significant_metadata<-paste('Metadata', Metadatafrozenidx, sep='') ############################################# # Generate SparseDOSSA Synthetic Abundances # ############################################# DD<-sparseDOSSA::sparseDOSSA(number_features = n.microbes, number_samples = n.samples, UserMetadata=UserMetadata, Metadatafrozenidx=Metadatafrozenidx, datasetCount = 1, spikeCount = spikeCount, spikeStrength = spikeStrength, percent_spiked=spike.perc, seed = 1234, noZeroInflate = TRUE) ############################## # Gather SparseDOSSA Outputs # ############################## sparsedossa_results <- as.data.frame(DD$OTU_count) rownames(sparsedossa_results)<-sparsedossa_results$X1 sparsedossa_results<-sparsedossa_results[-1,-1] colnames(sparsedossa_results)<-paste('Sample', 1:ncol(sparsedossa_results), sep='') data<-as.matrix(sparsedossa_results[-c((n.metadata+1):(2*n.microbes+n.metadata)),]) data<-data.matrix(data) class(data) <- "numeric" truth<-c(unlist(DD$truth)) truth<-truth[!stri_detect_fixed(truth,":")] truth<-truth[(5+n.metadata):length(truth)] truth<-as.data.frame(truth) significant_features<-truth[seq(1, (as.numeric(spikeCount)+1)*(n.microbes*spike.perc), (as.numeric(spikeCount)+1)),] significant_features<-as.vector(significant_features) #################### # Extract Features # #################### features<-as.data.frame(t(data[-c(1:n.metadata),])) #################### # Extract Metadata # #################### metadata<-as.data.frame(data[1,]) colnames(metadata)<-rownames(data)[1] ############################### # Mark True Positive Deatures # ############################### wh.TP = colnames(features) %in% significant_features colnames(features)<-paste("Feature", 1:n.microbes, sep = "") newname = paste0(colnames(features)[wh.TP], "_TP") colnames(features)[wh.TP] <- newname; colnames(features)[grep('TP', colnames(features))] ############################################### # Note down significant features and metadata # ############################################### spiked_features<-colnames(features)[grep('TP', colnames(features))] #################### # TP Investigation # #################### spike_correlations<-c() for (i in 1:length(spiked_features)){ spike_correlations[i]<-cor(features[, spiked_features[i]], metadata[, 1], method = 'spearman') } max(spike_correlations)