Thank you for creating this fantastic package. I would like to hear your opinion on the best approach for replotting the MaAsLin3 summary plot while focusing on visualizing an interaction between two variables. I have tried using the ‘maaslin_plot_results_from_output’ function, but I’ve been unable to produce the desired summary plot and instead get the association plots. I am attaching an example using the HMP2 data. I’m sorry if this is a silly question; I’m still a novice at bioinformatics and microbiome analysis.
########################################################################
################# MaAsLin 3 input
########################################################################
# Read abundance table
taxa_table_name <- system.file("extdata", "HMP2_taxonomy.tsv",
package = "maaslin3")
taxa_table <- read.csv(taxa_table_name, sep = '\t', row.names = 1)
# Read metadata table
metadata_name <- system.file("extdata", "HMP2_metadata.tsv",
package = "maaslin3")
metadata <- read.csv(metadata_name, sep = '\t', row.names = 1)
# Factor the categorical variables to test IBD against healthy controls
metadata$diagnosis <-
factor(metadata$diagnosis, levels = c('nonIBD', 'UC', 'CD'))
metadata$dysbiosis_state <-
factor(metadata$dysbiosis_state, levels =
c('none', 'dysbiosis_UC', 'dysbiosis_CD'))
metadata$antibiotics <-
factor(metadata$antibiotics, levels = c('No', 'Yes'))
taxa_table[1:5, 1:5]
########################################################################
################# Run MaAslin3 with interaction
########################################################################
set.seed(22)
fit_out <- maaslin3(input_data = taxa_table,
input_metadata = metadata,
output = 'hmp2_output',
formula = '~ diagnosis:antibiotics + dysbiosis_state +
age + reads',
normalization = 'TSS',
transform = 'LOG',
augment = TRUE,
standardize = TRUE,
max_significance = 0.1,
median_comparison_abundance = TRUE,
median_comparison_prevalence = FALSE,
max_pngs = 10,
cores = 1)
########################################################################
################# Replot Summary Plot
########################################################################
# Rename results file with clean titles
all_results <- read.csv('hmp2_output/all_results.tsv', sep='\t')
all_results <- all_results %>%
mutate(metadata = case_when(metadata == 'age' ~ 'Age',
metadata == 'diagnosis' ~ 'Diagnosis',
metadata == 'dysbiosis_state' ~ 'Dysbiosis',
metadata == 'reads' ~ 'Read depth'),
value = case_when(value == 'age' ~ 'Age',
value == 'dysbiosis_CD' ~ 'CD',
value == 'dysbiosis_UC' ~ 'UC',
value == 'reads' ~ 'Read depth',
TRUE ~ value),
feature = gsub('_', ' ', feature) %>%
gsub(pattern = 'sp ', replacement = 'sp. '))
# Write results
write.table(all_results, 'hmp2_output/all_results.tsv', sep='\t')
# This section is necessary for updating the association plots
taxa_table_copy <- taxa_table
colnames(taxa_table_copy) <- gsub('_', ' ', colnames(taxa_table_copy)) %>%
gsub(pattern = 'sp ', replacement = 'sp. ')
# Rename the features in the norm transformed data file
data_transformed <-
read.csv('hmp2_output/features/data_transformed.tsv', sep='\t')
colnames(data_transformed) <-
gsub('_', ' ', colnames(data_transformed)) %>%
gsub(pattern = 'sp ', replacement = 'sp. ')
write.table(data_transformed,
'hmp2_output/features/data_transformed.tsv',
sep='\t', row.names = FALSE)
# Rename the metadata like in the outputs table
metadata_copy <- metadata
colnames(metadata_copy) <-
case_when(colnames(metadata_copy) == 'age' ~ 'Age',
colnames(metadata_copy) == 'diagnosis' ~ 'Diagnosis',
colnames(metadata_copy) == 'dysbiosis_state' ~ 'Dysbiosis',
colnames(metadata_copy) == 'reads' ~ 'Read depth',
TRUE ~ colnames(metadata_copy))
metadata_copy <- metadata_copy %>%
mutate(Dysbiosis = case_when(Dysbiosis == 'dysbiosis_UC' ~ 'UC',
Dysbiosis == 'dysbiosis_CD' ~ 'CD',
Dysbiosis == 'none' ~ 'None') %>%
factor(levels = c('None', 'UC', 'CD')),
antibiotics = case_when(antibiotics == 'Yes' ~ 'Used',
antibiotics == 'No' ~ 'Not used') %>%
factor(levels = c('Not used', 'Used')),
Diagnosis = case_when(Diagnosis == 'nonIBD' ~ 'non-IBD',
TRUE ~ Diagnosis) %>%
factor(levels = c('non-IBD', 'UC', 'CD')))
# Set the new heatmap and coefficient plot variables and order them
coef_plot_vars = c('CD:antibiotics Not used', 'CD:antibiotics Used')
heatmap_vars = c('Dysbiosis UC', 'Diagnosis UC',
'Abx Used', 'Age', 'Read depth',
'UC:antibiotics Yes',
'UC:antibiotics No')
# Recreate the plots
plots <- maaslin_plot_results_from_output(
output = 'hmp2_output',
metadata = metadata_copy,
normalization = "TSS",
transform = "LOG",
median_comparison_abundance = TRUE,
median_comparison_prevalence = FALSE,
coef_plot_vars = coef_plot_vars,
heatmap_vars = heatmap_vars,
max_significance = 0.1,
max_pngs = 20,
plot_summary_plot = TRUE)