Reploting MaAsLin summary plot for interactions

Thank you for creating this fantastic package. I would like to hear your opinion on the best approach for replotting the MaAsLin3 summary plot while focusing on visualizing an interaction between two variables. I have tried using the ‘maaslin_plot_results_from_output’ function, but I’ve been unable to produce the desired summary plot and instead get the association plots. I am attaching an example using the HMP2 data. I’m sorry if this is a silly question; I’m still a novice at bioinformatics and microbiome analysis.

########################################################################
################# MaAsLin 3 input
########################################################################

# Read abundance table
taxa_table_name <- system.file("extdata", "HMP2_taxonomy.tsv",
                               package = "maaslin3")
taxa_table <- read.csv(taxa_table_name, sep = '\t', row.names = 1)

# Read metadata table
metadata_name <- system.file("extdata", "HMP2_metadata.tsv",
                             package = "maaslin3")
metadata <- read.csv(metadata_name, sep = '\t', row.names = 1)

# Factor the categorical variables to test IBD against healthy controls
metadata$diagnosis <-
  factor(metadata$diagnosis, levels = c('nonIBD', 'UC', 'CD'))
metadata$dysbiosis_state <-
  factor(metadata$dysbiosis_state, levels =
           c('none', 'dysbiosis_UC', 'dysbiosis_CD'))
metadata$antibiotics <-
  factor(metadata$antibiotics, levels = c('No', 'Yes'))

taxa_table[1:5, 1:5]

########################################################################
################# Run MaAslin3 with interaction
########################################################################
set.seed(22)
fit_out <- maaslin3(input_data = taxa_table,
                    input_metadata = metadata,
                    output = 'hmp2_output',
                    formula = '~ diagnosis:antibiotics +  dysbiosis_state +
                        age + reads',
                    normalization = 'TSS',
                    transform = 'LOG',
                    augment = TRUE,
                    standardize = TRUE,
                    max_significance = 0.1,
                    median_comparison_abundance = TRUE,
                    median_comparison_prevalence = FALSE,
                    max_pngs = 10,
                    cores = 1)

########################################################################
################# Replot Summary Plot
########################################################################

# Rename results file with clean titles
all_results <- read.csv('hmp2_output/all_results.tsv', sep='\t')
all_results <- all_results %>%
  mutate(metadata = case_when(metadata == 'age' ~ 'Age',
                              metadata == 'diagnosis' ~ 'Diagnosis',
                              metadata == 'dysbiosis_state' ~ 'Dysbiosis',
                              metadata == 'reads' ~ 'Read depth'),
         value = case_when(value == 'age' ~ 'Age',
                           value == 'dysbiosis_CD' ~ 'CD',
                           value == 'dysbiosis_UC' ~ 'UC',
                           value == 'reads' ~ 'Read depth',
                           TRUE ~ value),
         feature = gsub('_', ' ', feature) %>%
           gsub(pattern = 'sp ', replacement = 'sp. '))

# Write results
write.table(all_results, 'hmp2_output/all_results.tsv', sep='\t')

# This section is necessary for updating the association plots
taxa_table_copy <- taxa_table
colnames(taxa_table_copy) <- gsub('_', ' ', colnames(taxa_table_copy)) %>%
  gsub(pattern = 'sp ', replacement = 'sp. ')

# Rename the features in the norm transformed data file
data_transformed <-
  read.csv('hmp2_output/features/data_transformed.tsv', sep='\t')
colnames(data_transformed) <-
  gsub('_', ' ', colnames(data_transformed)) %>%
  gsub(pattern = 'sp ', replacement = 'sp. ')
write.table(data_transformed,
            'hmp2_output/features/data_transformed.tsv',
            sep='\t', row.names = FALSE)

# Rename the metadata like in the outputs table
metadata_copy <- metadata
colnames(metadata_copy) <-
  case_when(colnames(metadata_copy) == 'age' ~ 'Age',
            colnames(metadata_copy) == 'diagnosis' ~ 'Diagnosis',
            colnames(metadata_copy) == 'dysbiosis_state' ~ 'Dysbiosis',
            colnames(metadata_copy) == 'reads' ~ 'Read depth',
            TRUE ~ colnames(metadata_copy))
metadata_copy <- metadata_copy %>%
  mutate(Dysbiosis = case_when(Dysbiosis == 'dysbiosis_UC' ~ 'UC',
                               Dysbiosis == 'dysbiosis_CD' ~ 'CD',
                               Dysbiosis == 'none' ~ 'None') %>%
           factor(levels = c('None', 'UC', 'CD')),
         antibiotics = case_when(antibiotics == 'Yes' ~ 'Used',
                                 antibiotics == 'No' ~ 'Not used') %>%
           factor(levels = c('Not used', 'Used')),
         Diagnosis = case_when(Diagnosis == 'nonIBD' ~ 'non-IBD',
                               TRUE ~ Diagnosis) %>%
           factor(levels = c('non-IBD', 'UC', 'CD')))

# Set the new heatmap and coefficient plot variables and order them
coef_plot_vars = c('CD:antibiotics Not used', 'CD:antibiotics Used')

heatmap_vars = c('Dysbiosis UC', 'Diagnosis UC',
                 'Abx Used', 'Age', 'Read depth',
                 'UC:antibiotics Yes',
                 'UC:antibiotics No')

# Recreate the plots
plots <- maaslin_plot_results_from_output(
  output = 'hmp2_output',
  metadata = metadata_copy,
  normalization = "TSS",
  transform = "LOG",
  median_comparison_abundance = TRUE,
  median_comparison_prevalence = FALSE,
  coef_plot_vars = coef_plot_vars,
  heatmap_vars = heatmap_vars,
  max_significance = 0.1,
  max_pngs = 20,
  plot_summary_plot = TRUE)

I think the issue is that the names you’re looking for in the heatmap_vars and coef_plot_vars aren’t actually in your results. This causes the summary plot to not be produced but leaves a message at the top of the console output from maaslin_plot_results_from_output saying what variables were missed and which are available. For example, this code works fine for me:

########################################################################
################# MaAsLin 3 input
########################################################################

# Read abundance table
taxa_table_name <- system.file("extdata", "HMP2_taxonomy.tsv",
                               package = "maaslin3")
taxa_table <- read.csv(taxa_table_name, sep = '\t', row.names = 1)

# Read metadata table
metadata_name <- system.file("extdata", "HMP2_metadata.tsv",
                             package = "maaslin3")
metadata <- read.csv(metadata_name, sep = '\t', row.names = 1)

# Factor the categorical variables to test IBD against healthy controls
metadata$diagnosis <-
  factor(metadata$diagnosis, levels = c('nonIBD', 'UC', 'CD'))
metadata$dysbiosis_state <-
  factor(metadata$dysbiosis_state, levels =
           c('none', 'dysbiosis_UC', 'dysbiosis_CD'))
metadata$antibiotics <-
  factor(metadata$antibiotics, levels = c('No', 'Yes'))

taxa_table[1:5, 1:5]

########################################################################
################# Run MaAslin3 with interaction
########################################################################
set.seed(22)
fit_out <- maaslin3(input_data = taxa_table,
                    input_metadata = metadata,
                    output = 'hmp2_output',
                    formula = '~ diagnosis:antibiotics +  dysbiosis_state +
                        age + reads',
                    normalization = 'TSS',
                    transform = 'LOG',
                    augment = TRUE,
                    standardize = TRUE,
                    max_significance = 0.1,
                    median_comparison_abundance = TRUE,
                    median_comparison_prevalence = FALSE,
                    max_pngs = 10,
                    cores = 1)

########################################################################
################# Replot Summary Plot
########################################################################

# Rename results file with clean titles
all_results <- read.csv('hmp2_output/all_results.tsv', sep='\t')
all_results <- all_results %>%
  mutate(metadata = case_when(metadata == 'age' ~ 'Age',
                              metadata == 'diagnosis' ~ 'Diagnosis',
                              metadata == 'dysbiosis_state' ~ 'Dysbiosis',
                              metadata == 'reads' ~ 'Read depth'),
         value = case_when(value == 'age' ~ 'Age',
                           value == 'dysbiosis_CD' ~ 'CD',
                           value == 'dysbiosis_UC' ~ 'UC',
                           value == 'reads' ~ 'Read depth',
                           value == 'CD:antibioticsNo' ~ 'CD:antibiotics Not used',
                           value == 'CD:antibioticsYes' ~ 'CD:antibiotics Used',
                           value == 'UC:antibioticsNo' ~ 'UC:antibiotics Not used',
                           value == 'UC:antibioticsYes' ~ 'UC:antibiotics Used',
                           TRUE ~ value),
         name = case_when(name == 'age' ~ 'Age',
                           name == 'dysbiosis_statedysbiosis_CD' ~ 'CD',
                           name == 'dysbiosis_statedysbiosis_UC' ~ 'UC',
                           name == 'reads' ~ 'Read depth',
                           name == 'diagnosisCD:antibioticsNo' ~ 'CD:antibiotics Not used',
                           name == 'diagnosisCD:antibioticsYes' ~ 'CD:antibiotics Used',
                           name == 'diagnosisUC:antibioticsNo' ~ 'UC:antibiotics Not used',
                          name == 'diagnosisUC:antibioticsYes' ~ 'UC:antibiotics Used',
                           TRUE ~ name),
         feature = gsub('_', ' ', feature) %>%
           gsub(pattern = 'sp ', replacement = 'sp. '))

# Write results
write.table(all_results, 'hmp2_output/all_results.tsv', sep='\t')

# This section is necessary for updating the association plots
taxa_table_copy <- taxa_table
colnames(taxa_table_copy) <- gsub('_', ' ', colnames(taxa_table_copy)) %>%
  gsub(pattern = 'sp ', replacement = 'sp. ')

# Rename the features in the norm transformed data file
data_transformed <-
  read.csv('hmp2_output/features/data_transformed.tsv', sep='\t')
colnames(data_transformed) <-
  gsub('_', ' ', colnames(data_transformed)) %>%
  gsub(pattern = 'sp ', replacement = 'sp. ')
write.table(data_transformed,
            'hmp2_output/features/data_transformed.tsv',
            sep='\t', row.names = FALSE)

# Rename the metadata like in the outputs table
metadata_copy <- metadata
colnames(metadata_copy) <-
  case_when(colnames(metadata_copy) == 'age' ~ 'Age',
            colnames(metadata_copy) == 'diagnosis' ~ 'Diagnosis',
            colnames(metadata_copy) == 'dysbiosis_state' ~ 'Dysbiosis',
            colnames(metadata_copy) == 'reads' ~ 'Read depth',
            TRUE ~ colnames(metadata_copy))
metadata_copy <- metadata_copy %>%
  mutate(Dysbiosis = case_when(Dysbiosis == 'dysbiosis_UC' ~ 'UC',
                               Dysbiosis == 'dysbiosis_CD' ~ 'CD',
                               Dysbiosis == 'none' ~ 'None') %>%
           factor(levels = c('None', 'UC', 'CD')),
         antibiotics = case_when(antibiotics == 'Yes' ~ 'Used',
                                 antibiotics == 'No' ~ 'Not used') %>%
           factor(levels = c('Not used', 'Used')),
         Diagnosis = case_when(Diagnosis == 'nonIBD' ~ 'non-IBD',
                               TRUE ~ Diagnosis) %>%
           factor(levels = c('non-IBD', 'UC', 'CD')))

# Set the new heatmap and coefficient plot variables and order them
coef_plot_vars = c('Diagnosis UC:antibiotics Used')

heatmap_vars = c('Dysbiosis UC',
                 'Age', 'Read depth',
                 'Diagnosis UC:antibiotics Used',
                 'Diagnosis UC:antibiotics Not used')

# Recreate the plots
plots <- maaslin_plot_results_from_output(
  output = 'hmp2_output',
  metadata = metadata_copy,
  normalization = "TSS",
  transform = "LOG",
  median_comparison_abundance = TRUE,
  median_comparison_prevalence = FALSE,
  coef_plot_vars = coef_plot_vars,
  heatmap_vars = heatmap_vars,
  max_significance = 0.1,
  max_pngs = 20,
  plot_summary_plot = TRUE)
1 Like