Metaphlan :: (suggestion) set DB location under environnement variable

Hello,

We would like to provide metaphlan on our institution cluster, but tha default db location causes problems for us.
context: centralised installation on read only shared drives.

we would like to have metaphlan DB location under the control of an environnement varaible that point outside of the intallation folder.
it will allow us to update DB easily.

for this purpose I made th following patch versus tagged release 3.0.10

--- metaphlan.py.ori    2021-06-14 11:28:14.000000000 +0000
+++ metaphlan.py    2021-07-02 13:32:25.414352399 +0000
@@ -62,6 +62,7 @@
 metaphlan_script_install_folder = os.path.dirname(os.path.abspath(__file__))
 # get the default database folder
 DEFAULT_DB_FOLDER = os.path.join(metaphlan_script_install_folder, "metaphlan_databases")
+DEFAULT_DB_FOLDER= os.environ.get('METAPHLAN_DB_DIR', DEFAULT_DB_FOLDER)
 INDEX = 'latest'
 tax_units = "kpcofgst"

--- strainphlan.py.ori  2021-06-14 11:28:14.000000000 +0000
+++ strainphlan.py  2021-07-02 13:32:31.063352739 +0000
@@ -25,8 +25,12 @@
 from Bio.SeqRecord import SeqRecord
 from Bio.Seq import Seq

-DEFAULT_DATABASE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 
-    "metaphlan_databases/mpa_v30_CHOCOPhlAn_201901.pkl")
+# get the directory that contains this script
+metaphlan_script_install_folder = os.path.dirname(os.path.abspath(__file__))
+DEFAULT_DB_FOLDER = os.path.join(metaphlan_script_install_folder, "metaphlan_databases")
+DEFAULT_DB_FOLDER = os.environ.get('METAPHLAN_DB_DIR', DEFAULT_DB_FOLDER)
+DEFAULT_DB_NAME =  "mpa_v30_CHOCOPhlAn_201901.pkl"
+DEFAULT_DATABASE = os.path.join(DEFAULT_DB_FOLDER, DEFAULT_DB_NAME)
 PHYLOPHLAN_MODES = ['accurate', 'fast']

 # Regular expression to remove comments: \n\"\"\"[^"]+\n\"\"\"
--- utils/extract_markers.py.ori    2021-06-14 11:28:14.000000000 +0000
+++ utils/extract_markers.py    2021-07-02 13:32:47.264353714 +0000
@@ -27,8 +27,13 @@
 except ImportError:
     from external_exec import generate_markers_fasta

-DEFAULT_DATABASE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 
-    "../metaphlan_databases/mpa_v30_CHOCOPhlAn_201901.pkl")
+# get the directory that contains this script
+metaphlan_script_install_folder = os.path.dirname(os.path.abspath(__file__))
+DEFAULT_DB_FOLDER = os.path.join(metaphlan_script_install_folder, "../metaphlan_databases")
+DEFAULT_DB_FOLDER = os.environ.get('METAPHLAN_DB_DIR', DEFAULT_DB_FOLDER)
+DEFAULT_DB_NAME =  "mpa_v30_CHOCOPhlAn_201901.pkl"
+DEFAULT_DATABASE = os.path.join(DEFAULT_DB_FOLDER, DEFAULT_DB_NAME)
+

 """
 Reads and parses the command line arguments of the script.

with this modification it is easy for anyone to control the metaphlan database location (we obviously export METAPHLAN_DB_DIR via the metaphlan modulefile)

regards

Eric

Hi Eric,
Thank you for the patches, I’ve included them in the new version 3.0.11!