|
| 1 | +#!/bin/bash -e |
| 2 | +MMSEQS="$1" |
| 3 | +QUERY="$2" |
| 4 | +BASE="$4" |
| 5 | +DB1="$5" |
| 6 | +DB2="$6" |
| 7 | +DB3="$7" |
| 8 | +USE_ENV="$8" |
| 9 | +USE_TEMPLATES="$9" |
| 10 | +FILTER="${10}" |
| 11 | +TAXONOMY="${11}" |
| 12 | +TAXONOMYREPORT="${12}" |
| 13 | +M8OUT="${13}" |
| 14 | +GPU="${14}" |
| 15 | +GPUSERVER="${15}" |
| 16 | +EXPAND_EVAL=inf |
| 17 | +ALIGN_EVAL=10 |
| 18 | +DIFF=3000 |
| 19 | +QSC=-20.0 |
| 20 | +MAX_ACCEPT=1000000 |
| 21 | +if [ "${FILTER}" = "1" ]; then |
| 22 | +# 0.1 was not used in benchmarks due to POSIX shell bug in line above |
| 23 | +# EXPAND_EVAL=0.1 |
| 24 | + ALIGN_EVAL=10 |
| 25 | + QSC=0.8 |
| 26 | + MAX_ACCEPT=100000 |
| 27 | +fi |
| 28 | +export MMSEQS_CALL_DEPTH=1 |
| 29 | +SEARCH_PARAM="--num-iterations 3 --db-load-mode 2 -a --k-score 'seq:96,prof:80' -e 0.1 --max-seqs 10000" |
| 30 | +if [ "${GPU}" = "1" ]; then |
| 31 | + GPU_PARAM="--gpu 1 --prefilter-mode 1" |
| 32 | +fi |
| 33 | +if [ "${GPUSERVER}" = "1" ]; then |
| 34 | + GPU_PARAM="${GPU_PARAM} --gpu-server 1" |
| 35 | +fi |
| 36 | +if [ "${GPU_PARAM}" != "" ]; then |
| 37 | + SEARCH_PARAM="${SEARCH_PARAM} ${GPU_PARAM}" |
| 38 | +fi |
| 39 | +FILTER_PARAM="--filter-min-enable 1000 --diff ${DIFF} --qid 0.0,0.2,0.4,0.6,0.8,1.0 --qsc 0 --max-seq-id 0.95" |
| 40 | +EXPAND_PARAM="--expansion-mode 0 -e ${EXPAND_EVAL} --expand-filter-clusters ${FILTER} --max-seq-id 0.95" |
| 41 | +mkdir -p "${BASE}" |
| 42 | +"${MMSEQS}" createdb "${QUERY}" "${BASE}/qdb" --dbtype 1 |
| 43 | +if [ -n "${ORIG_CUDA_VISIBLE_DEVICES}" ]; then |
| 44 | + export CUDA_VISIBLE_DEVICES="${ORIG_CUDA_VISIBLE_DEVICES}" |
| 45 | +fi |
| 46 | +if [ -n "$UNIREF_CUDA_VISIBLE_DEVICES" ]; then |
| 47 | + export CUDA_VISIBLE_DEVICES="${UNIREF_CUDA_VISIBLE_DEVICES}" |
| 48 | +fi |
| 49 | +"${MMSEQS}" search "${BASE}/qdb" "${DB1}" "${BASE}/res" "${BASE}/tmp1" $SEARCH_PARAM |
| 50 | +"${MMSEQS}" mvdb "${BASE}/tmp1/latest/profile_1" "${BASE}/prof_res" |
| 51 | +"${MMSEQS}" lndb "${BASE}/qdb_h" "${BASE}/prof_res_h" |
| 52 | + |
| 53 | +( |
| 54 | + |
| 55 | +"${MMSEQS}" expandaln "${BASE}/qdb" "${DB1}.idx" "${BASE}/res" "${DB1}.idx" "${BASE}/res_exp" --db-load-mode 2 ${EXPAND_PARAM} |
| 56 | +"${MMSEQS}" align "${BASE}/prof_res" "${DB1}.idx" "${BASE}/res_exp" "${BASE}/res_exp_realign" --db-load-mode 2 -e ${ALIGN_EVAL} --max-accept ${MAX_ACCEPT} --alt-ali 10 -a |
| 57 | +"${MMSEQS}" filterresult "${BASE}/qdb" "${DB1}.idx" "${BASE}/res_exp_realign" "${BASE}/res_exp_realign_filter" --db-load-mode 2 --qid 0 --qsc $QSC --diff 0 --max-seq-id 1.0 --filter-min-enable 100 |
| 58 | +if [ "${M8OUT}" = "1" ]; then |
| 59 | + "${MMSEQS}" filterresult "${BASE}/qdb" "${DB1}.idx" "${BASE}/res_exp_realign_filter" "${BASE}/res_exp_realign_filter_filter" --db-load-mode 2 ${FILTER_PARAM} |
| 60 | + "${MMSEQS}" convertalis "${BASE}/qdb" "${DB1}.idx" "${BASE}/res_exp_realign_filter_filter" "${BASE}/uniref.m8" --db-load-mode 2 --format-output query,target,fident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits,tseq |
| 61 | + "${MMSEQS}" rmdb "${BASE}/res_exp_realign_filter_filter" |
| 62 | +else |
| 63 | + "${MMSEQS}" result2msa "${BASE}/qdb" "${DB1}.idx" "${BASE}/res_exp_realign_filter" "${BASE}/uniref.a3m" --msa-format-mode 6 --db-load-mode 2 --filter-msa ${FILTER} ${FILTER_PARAM} |
| 64 | +fi |
| 65 | +"${MMSEQS}" rmdb "${BASE}/res_exp_realign" |
| 66 | +"${MMSEQS}" rmdb "${BASE}/res_exp" |
| 67 | +"${MMSEQS}" rmdb "${BASE}/res" |
| 68 | +if [ "${TAXONOMY}" = "1" ] && [ -e "${DB1}_taxonomy" ]; then |
| 69 | + "${MMSEQS}" convertalis "${BASE}/qdb" "${DB1}.idx" "${BASE}/res_exp_realign_filter" "${BASE}/res_exp_realign_tax" --db-output 1 --format-output "taxid,target,taxlineage" --db-load-mode 2 |
| 70 | + awk 'BEGIN { printf("%c%c%c%c",8,0,0,0); exit; }' > "${BASE}/res_exp_realign_tax.dbtype" |
| 71 | + MMSEQS_FORCE_MERGE=1 "${MMSEQS}" filtertaxdb "${DB1}" "${BASE}/res_exp_realign_tax" "${BASE}/res_exp_realign_tax_filt" --taxon-list '!12908&&!28384' |
| 72 | + tr -d '\000' < "${BASE}/res_exp_realign_tax_filt" | sort -u > "${BASE}/uniref_tax.tsv" |
| 73 | + "${MMSEQS}" rmdb "${BASE}/res_exp_realign_tax_filt" |
| 74 | + "${MMSEQS}" rmdb "${BASE}/res_exp_realign_tax" |
| 75 | +fi |
| 76 | +if [ "${TAXONOMYREPORT}" = "1" ] && [ -e "${DB1}_taxonomy" ]; then |
| 77 | + "${MMSEQS}" taxonomyreport "${DB1}.idx" "${BASE}/res_exp_realign_filter" "${BASE}/res_exp_realign_taxreport" --report-mode 3 |
| 78 | + "${MMSEQS}" createtsv "${BASE}/qdb" "${BASE}/res_exp_realign_taxreport" "${BASE}/uniref_taxreport.tsv" |
| 79 | + "${MMSEQS}" rmdb "${BASE}/res_exp_realign_taxreport" |
| 80 | +fi |
| 81 | +"${MMSEQS}" rmdb "${BASE}/res_exp_realign_filter" |
| 82 | + |
| 83 | +)& |
| 84 | +( |
| 85 | + |
| 86 | +if [ "${USE_TEMPLATES}" = "1" ]; then |
| 87 | + if [ -n "${ORIG_CUDA_VISIBLE_DEVICES}" ]; then |
| 88 | + export CUDA_VISIBLE_DEVICES="${ORIG_CUDA_VISIBLE_DEVICES}" |
| 89 | + fi |
| 90 | + if [ -n "${PDB_CUDA_VISIBLE_DEVICES}" ]; then |
| 91 | + export CUDA_VISIBLE_DEVICES="${PDB_CUDA_VISIBLE_DEVICES}" |
| 92 | + fi |
| 93 | + "${MMSEQS}" search "${BASE}/prof_res" "${DB2}" "${BASE}/res_pdb" "${BASE}/tmp2" --db-load-mode 2 -s 7.5 -a -e 0.1 ${GPU_PARAM} |
| 94 | + "${MMSEQS}" convertalis "${BASE}/prof_res" "${DB2}.idx" "${BASE}/res_pdb" "${BASE}/pdb70.m8" --format-output query,target,fident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits,cigar --db-load-mode 2 |
| 95 | + "${MMSEQS}" rmdb "${BASE}/res_pdb" |
| 96 | +fi |
| 97 | + |
| 98 | +)& |
| 99 | +( |
| 100 | + |
| 101 | +if [ "${USE_ENV}" = "1" ]; then |
| 102 | + if [ -n "${ORIG_CUDA_VISIBLE_DEVICES}" ]; then |
| 103 | + export CUDA_VISIBLE_DEVICES="${ORIG_CUDA_VISIBLE_DEVICES}" |
| 104 | + fi |
| 105 | + if [ -n "${ENV_CUDA_VISIBLE_DEVICES}" ]; then |
| 106 | + export CUDA_VISIBLE_DEVICES="${ENV_CUDA_VISIBLE_DEVICES}" |
| 107 | + fi |
| 108 | + "${MMSEQS}" search "${BASE}/prof_res" "${DB3}" "${BASE}/res_env" "${BASE}/tmp3" $SEARCH_PARAM |
| 109 | + "${MMSEQS}" expandaln "${BASE}/prof_res" "${DB3}.idx" "${BASE}/res_env" "${DB3}.idx" "${BASE}/res_env_exp" -e ${EXPAND_EVAL} --expansion-mode 0 --db-load-mode 2 |
| 110 | + "${MMSEQS}" align "${BASE}/tmp3/latest/profile_1" "${DB3}.idx" "${BASE}/res_env_exp" "${BASE}/res_env_exp_realign" --db-load-mode 2 -e ${ALIGN_EVAL} --max-accept ${MAX_ACCEPT} --alt-ali 10 -a |
| 111 | + "${MMSEQS}" filterresult "${BASE}/qdb" "${DB3}.idx" "${BASE}/res_env_exp_realign" "${BASE}/res_env_exp_realign_filter" --db-load-mode 2 --qid 0 --qsc $QSC --diff 0 --max-seq-id 1.0 --filter-min-enable 100 |
| 112 | + if [ "${M8OUT}" = "1" ]; then |
| 113 | + "${MMSEQS}" filterresult "${BASE}/qdb" "${DB3}.idx" "${BASE}/res_env_exp_realign_filter" "${BASE}/res_env_exp_realign_filter_filter" --db-load-mode 2 ${FILTER_PARAM} |
| 114 | + "${MMSEQS}" convertalis "${BASE}/qdb" "${DB3}.idx" "${BASE}/res_env_exp_realign_filter_filter" "${BASE}/bfd.mgnify30.metaeuk30.smag30.m8" --db-load-mode 2 --format-output query,target,fident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits,tseq |
| 115 | + "${MMSEQS}" rmdb "${BASE}/res_env_exp_realign_filter_filter" |
| 116 | + else |
| 117 | + "${MMSEQS}" result2msa "${BASE}/qdb" "${DB3}.idx" "${BASE}/res_env_exp_realign_filter" "${BASE}/bfd.mgnify30.metaeuk30.smag30.a3m" --msa-format-mode 6 --db-load-mode 2 --filter-msa ${FILTER} ${FILTER_PARAM} |
| 118 | + fi |
| 119 | + "${MMSEQS}" rmdb "${BASE}/res_env_exp_realign_filter" |
| 120 | + "${MMSEQS}" rmdb "${BASE}/res_env_exp_realign" |
| 121 | + "${MMSEQS}" rmdb "${BASE}/res_env_exp" |
| 122 | + "${MMSEQS}" rmdb "${BASE}/res_env" |
| 123 | +fi |
| 124 | + |
| 125 | +)& |
| 126 | +wait |
| 127 | + |
| 128 | +"${MMSEQS}" rmdb "${BASE}/qdb" |
| 129 | +"${MMSEQS}" rmdb "${BASE}/qdb_h" |
| 130 | +"${MMSEQS}" rmdb "${BASE}/res" |
| 131 | +rm -f -- "${BASE}/prof_res"* |
| 132 | +rm -rf -- "${BASE}/tmp1" "${BASE}/tmp2" "${BASE}/tmp3" |
0 commit comments