Suppress printing (#115) (#511)

Colelyman · kclem · web-flow · commit 62bfaf0f987a · 2025-01-10T17:08:29.000-07:00
* Refactor output handling to use 'info' instead of 'print' and remove unused code

* Remove exit after raising of Exception

* Cast chr_id to a string (again)

* Properly scrub invalid characters from beign written to JSON status file

This commit will convert \n, \, and " when being written to the JSON message
field because all of those characters will cause the JSON to be invalid.

* Change branch to cole/suppress_printing

* Move test branch back to master

---------

Co-authored-by: kclem &lt;k.clement.dev@gmail.com&gt;
diff --git a/CRISPResso2/CRISPRessoAggregateCORE.py b/CRISPResso2/CRISPRessoAggregateCORE.py
@@ -53,7 +53,7 @@ def main():
 |/--\ \_| \_| | \ |_ \_| /--\ | |_ |
 |__________________________________|
         '''
-        print(CRISPRessoShared.get_crispresso_header(description, aggregate_string))
+        info(CRISPRessoShared.get_crispresso_header(description, aggregate_string))
 
         parser = argparse.ArgumentParser(description="Aggregate CRISPResso2 Runs")
         parser.add_argument("-p", "--prefix", action='append', help="Prefix for CRISPResso folders to aggregate (may be specified multiple times)", default=[])
@@ -903,7 +903,7 @@ def main():
             process_pool.shutdown()
 
         info('Analysis Complete!', {'percent_complete': 100})
-        print(CRISPRessoShared.get_crispresso_footer())
+        info(CRISPRessoShared.get_crispresso_footer())
         sys.exit(0)
 
     except Exception as e:
diff --git a/CRISPResso2/CRISPRessoBatchCORE.py b/CRISPResso2/CRISPRessoBatchCORE.py
@@ -89,18 +89,17 @@ def main():
 ||__)/--\| \__|  ||
 |_________________|
         '''
-        print(CRISPRessoShared.get_crispresso_header(description, batch_string))
+        info(CRISPRessoShared.get_crispresso_header(description, batch_string))
 
         # if no args are given, print a simplified help message
         if len(sys.argv) == 1:
-            print(CRISPRessoShared.format_cl_text('usage: CRISPRessoBatch  [-bs BATCH_SETTINGS]  [-n NAME]\n' + \
+            raise CRISPRessoShared.BadParameterException(CRISPRessoShared.format_cl_text('usage: CRISPRessoBatch  [-bs BATCH_SETTINGS]  [-n NAME]\n' + \
                 'commonly-used arguments:\n' + \
                 '-h, --help            show the full list of arguments\n' + \
                 '-v, --version         show program\'s version number and exit\n' + \
                 '-bs BATCH_SETTINGS    Tab-separated file where rows are samples and columns specify settings for each sample.\n' + \
                 '-n NAME, --name NAME  Name for the analysis (default: name based on input file name)'
             ))
-            sys.exit()
 
         parser = CRISPRessoShared.getCRISPRessoArgParser("Batch", parser_title = 'CRISPRessoBatch Parameters')
 
@@ -984,7 +983,7 @@ def main():
                 CRISPRessoShared.zip_results(path_value[1])
             else:
                 CRISPRessoShared.zip_results(OUTPUT_DIRECTORY)
-        print(CRISPRessoShared.get_crispresso_footer())
+        info(CRISPRessoShared.get_crispresso_footer())
         sys.exit(0)
 
     except Exception as e:
diff --git a/CRISPResso2/CRISPRessoCORE.py b/CRISPResso2/CRISPRessoCORE.py
@@ -7,26 +7,24 @@
 (c) 2020 The General Hospital Corporation. All Rights Reserved.
 '''
 
-import sys
-running_python3 = False
-if sys.version_info > (3, 0):
-    running_python3 = True
-
 import argparse
-from collections import Counter
-from copy import deepcopy
-from concurrent.futures import ProcessPoolExecutor, wait
-from functools import partial
 import errno
 import gzip
 import json
-import zipfile
+import logging
 import os
 import re
+import sys
 import subprocess as sb
 import traceback
-from multiprocessing import Process
+import zipfile
 
+from collections import Counter
+from copy import deepcopy
+from concurrent.futures import ProcessPoolExecutor, wait
+from datetime import datetime
+from functools import partial
+from multiprocessing import Process
 
 from CRISPResso2 import CRISPRessoCOREResources
 from CRISPResso2.CRISPRessoReports import CRISPRessoReport
@@ -41,15 +39,6 @@
 from CRISPResso2 import CRISPResso2Align
 from CRISPResso2 import CRISPRessoMultiProcessing
 
-from datetime import datetime
-present = datetime.now()
-#d1 = datetime.strptime('21/07/2019','%d/%m/%Y')
-#if present > d1:
-#    print('\nYour version of CRISPResso2 is out of date. Please download a new version.\n')
-#    sys.exit(1)
-
-import logging
-
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)
 logger.addHandler(CRISPRessoShared.LogStreamHandler())
@@ -429,7 +418,6 @@ def get_new_variant_object(args, fastq_seq, refs, ref_names, aln_matrix, pe_scaf
     if args.prime_editing_pegRNA_scaffold_seq and 'Prime-edited' in best_match_names: #any scaffold extensions must be closer to the prime-edited sequence
         pe_read_possible_scaffold_loc = new_variant['variant_Prime-edited']['ref_positions'].index(pe_scaffold_dna_info[0]-1) + 1
         if new_variant['variant_Prime-edited']['aln_seq'][pe_read_possible_scaffold_loc:(pe_read_possible_scaffold_loc+len(pe_scaffold_dna_info[1]))] == pe_scaffold_dna_info[1]:
-#            print('comparingHERE ' + new_variant['variant_Prime-edited']['aln_seq'][pe_read_possible_scaffold_loc:(pe_read_possible_scaffold_loc+len(pe_scaffold_dna_info[1])+5)] + ' from ' + new_variant['variant_Prime-edited']['aln_seq'] + ' and ' + new_variant['variant_Prime-edited']['aln_ref'])
             new_variant['aln_ref_names'] = ["Scaffold-incorporated"]
             new_variant['class_name'] = "Scaffold-incorporated"
             old_payload = deepcopy(new_variant['variant_Prime-edited']) #keep prime-edited allele and alignment
@@ -1380,11 +1368,11 @@ def print_stacktrace_if_debug():
         start_time_string =  start_time.strftime('%Y-%m-%d %H:%M:%S')
         description = ['~~~CRISPResso 2~~~', '-Analysis of genome editing outcomes from deep sequencing data-']
         header = CRISPRessoShared.get_crispresso_header(description=description, header_str=None)
-        print(header)
+        info(header)
 
         # if no args are given, print a simplified help message
         if len(sys.argv) == 1:
-            print(CRISPRessoShared.format_cl_text('usage: CRISPResso [-r1 FASTQ_R1] [-r2 FASTQ_R2] [-a AMPLICON_SEQ] [-g GUIDE_SEQ] [-n NAME]\n' + \
+            raise CRISPRessoShared.BadParameterException(CRISPRessoShared.format_cl_text('usage: CRISPResso [-r1 FASTQ_R1] [-r2 FASTQ_R2] [-a AMPLICON_SEQ] [-g GUIDE_SEQ] [-n NAME]\n' + \
                 'commonly-used arguments:\n' + \
                 '-h, --help            show the full list of arguments\n' + \
                 '-v, --version         show program\'s version number and exit\n' + \
@@ -1394,7 +1382,6 @@ def print_stacktrace_if_debug():
                 '-g GUIDE_SEQ          Guide sequence (default: None)\n' + \
                 '-n NAME, --name NAME  Name for the analysis (default: name based on input file name)'
             ))
-            sys.exit()
 
 
         arg_parser = CRISPRessoShared.getCRISPRessoArgParser("Core")
@@ -5133,7 +5120,7 @@ def get_scaffold_len(row, scaffold_start_loc, scaffold_seq):
             CRISPRessoShared.zip_results(OUTPUT_DIRECTORY)
 
         info('Analysis Complete!', {'percent_complete': 100})
-        print(CRISPRessoShared.get_crispresso_footer())
+        info(CRISPRessoShared.get_crispresso_footer())
 
         sys.exit(0)
 
diff --git a/CRISPResso2/CRISPRessoCompareCORE.py b/CRISPResso2/CRISPRessoCompareCORE.py
@@ -103,7 +103,7 @@ def main():
 |___________________________|
         '''
         compare_header = CRISPRessoShared.get_crispresso_header(description, compare_header)
-        print(compare_header)
+        info(compare_header)
 
         parser = CRISPRessoShared.getCRISPRessoArgParser("Compare", parser_title = 'CRISPRessoCompare Parameters')
 
@@ -455,7 +455,7 @@ def get_plot_title_with_ref_name(plotTitle, refName):
             CRISPRessoShared.zip_results(OUTPUT_DIRECTORY)
 
         info('Analysis Complete!', {'percent_complete': 100})
-        print(CRISPRessoShared.get_crispresso_footer())
+        info(CRISPRessoShared.get_crispresso_footer())
         sys.exit(0)
 
     except Exception as e:
diff --git a/CRISPResso2/CRISPRessoMetaCORE.py b/CRISPResso2/CRISPRessoMetaCORE.py
@@ -84,7 +84,7 @@ def main():
 |  |_| |_| |_| |_|____   |_|   |_|  |_|  |
 |________________________________________|
         '''
-        print(CRISPRessoShared.get_crispresso_header(description, meta_string))
+        info(CRISPRessoShared.get_crispresso_header(description, meta_string))
 
         parser = CRISPRessoShared.getCRISPRessoArgParser("Meta", parser_title = 'CRISPRessoMeta Parameters')
 
@@ -366,7 +366,7 @@ def main():
             crispresso2Meta_info_file, crispresso2_info,
         )
         info('Analysis Complete!', {'percent_complete': 100})
-        print(CRISPRessoShared.get_crispresso_footer())
+        info(CRISPRessoShared.get_crispresso_footer())
         sys.exit(0)
 
     except Exception as e:
diff --git a/CRISPResso2/CRISPRessoPooledCORE.py b/CRISPResso2/CRISPRessoPooledCORE.py
@@ -67,28 +67,36 @@ def is_exe(fpath):
 
 
 def check_samtools():
+    """
+    Assert that samtools is installed
+    """
 
     cmd_path=which('samtools')
     if cmd_path:
         return True
     else:
-        sys.stdout.write('\nCRISPRessoPooled requires samtools')
-        sys.stdout.write('\n\nPlease install samtools and add it to your path following the instructions at: http://www.htslib.org/download/')
-        return False
+        raise CRISPRessoShared.InstallationException('CRISPRessoPooled requires samtools\nPlease install samtools and add it to your path following the instructions at: http://www.htslib.org/download/')
 
 def check_bowtie2():
+    """
+    Assert that bowtie2 is installed
+    """
 
     cmd_path1=which('bowtie2')
     cmd_path2=which('bowtie2-inspect')
 
     if cmd_path1 and cmd_path2:
         return True
     else:
-        sys.stdout.write('\nCRISPRessoPooled requires Bowtie2!')
-        sys.stdout.write('\n\nPlease install Bowtie2 and add it to your path following the instructions at: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml#obtaining-bowtie-2')
-        return False
+        raise CRISPRessoShared.InstallationException('\nCRISPRessoPooled requires Bowtie2!\nPlease install Bowtie2 and add it to your path following the instructions at: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml#obtaining-bowtie-2')
+
+def print_full_pandas_df(x):
+    """
+    Print the full pandas dataframe (no clipping of rows or columns)
 
-def print_full(x):
+    Args:
+        x (pd.DataFrame): The dataframe to print
+    """
     pd.set_option('display.max_rows', len(x))
     pd.set_option('display.max_columns', None)
     pd.set_option('display.width', 2000)
@@ -243,41 +251,6 @@ def normalize_name(name, fastq_r1, fastq_r2, aligned_pooled_bam):
 pd=check_library('pandas')
 np=check_library('numpy')
 
-###EXCEPTIONS############################
-class FlashException(Exception):
-    pass
-
-class TrimmomaticException(Exception):
-    pass
-
-class Bowtie2Exception(Exception):
-    pass
-
-class AmpliconsNotUniqueException(Exception):
-    pass
-
-class AmpliconsNamesNotUniqueException(Exception):
-    pass
-
-class NoReadsAlignedException(Exception):
-    pass
-
-class DonorSequenceException(Exception):
-    pass
-
-class AmpliconEqualDonorException(Exception):
-    pass
-
-class SgRNASequenceException(Exception):
-    pass
-
-class NTException(Exception):
-    pass
-
-class ExonSequenceException(Exception):
-    pass
-
-
 def main():
     try:
         start_time =  datetime.now()
@@ -291,11 +264,11 @@ def main():
 ||   \__/\__/|__|__|__/ |
 |_______________________|
         '''
-        print(CRISPRessoShared.get_crispresso_header(description, pooled_string))
+        info(CRISPRessoShared.get_crispresso_header(description, pooled_string))
 
         # if no args are given, print a simplified help message
         if len(sys.argv) == 1:
-            print(CRISPRessoShared.format_cl_text('usage: CRISPRessoPooled [-r1 FASTQ_R1] [-r2 FASTQ_R2] [-f AMPLICONS_FILE] [-x GENOME_ROOT] [-n NAME]\n' + \
+            raise CRISPRessoShared.BadParameterException(CRISPRessoShared.format_cl_text('usage: CRISPRessoPooled [-r1 FASTQ_R1] [-r2 FASTQ_R2] [-f AMPLICONS_FILE] [-x GENOME_ROOT] [-n NAME]\n' + \
                 'commonly-used arguments:\n' + \
                 '-h, --help            show the full list of arguments\n' + \
                 '-v, --version         show program\'s version number and exit\n' + \
@@ -305,7 +278,6 @@ def main():
                 '-x GENOME_ROOT        Folder that contains the bowtie2-indexed genome for optional unbiased alignment of reads (default: None, reads are only aligned to provided amplicon sequences)\n' + \
                 '-n NAME, --name NAME  Name for the analysis (default: name based on input file name)\n'
             ))
-            sys.exit()
 
         parser = CRISPRessoShared.getCRISPRessoArgParser("Pooled", parser_title = 'CRISPRessoPooled Parameters')
 
@@ -736,7 +708,7 @@ def main():
 
             if not len(df_template.amplicon_seq.unique())==df_template.shape[0]:
                 duplicated_entries = df_template.amplicon_seq[df_template.amplicon_seq.duplicated()]
-                raise Exception('The amplicon sequences must be distinct! (Duplicated entries: ' + str(duplicated_entries.values) + ')')
+                raise CRISPRessoShared.BadParameterException('The amplicon sequences must be distinct! (Duplicated entries: ' + str(duplicated_entries.values) + ')')
 
             #check to see that no sequences and their reverse complements are present
             amp_seqs = df_template.amplicon_seq.values #Beware, this is a numpy array of dtype str and if you add these arrays amp_seqs + rc_amp_seqs, it will concat the strings, not the arrays....
@@ -747,15 +719,15 @@ def main():
 
             if not len(df_template.amplicon_name.unique())==df_template.shape[0]:
                 duplicated_entries = df_template.amplicon_name[df_template.amplicon_name.duplicated()]
-                raise Exception('The amplicon names must be distinct! (Duplicated names: ' + str(duplicated_entries.values) + ')')
+                raise CRISPRessoShared.BadParameterException('The amplicon names must be distinct! (Duplicated names: ' + str(duplicated_entries.values) + ')')
 
             df_template=df_template.set_index('amplicon_name')
             df_template.index=df_template.index.to_series().str.replace(' ', '_')
 
             for idx, row in df_template.iterrows():
                 wrong_nt=CRISPRessoShared.find_wrong_nt(row.amplicon_seq)
                 if wrong_nt:
-                    raise NTException('The amplicon sequence %s contains wrong characters:%s' % (idx, ' '.join(wrong_nt)))
+                    raise CRISPRessoShared.NTException('The amplicon sequence %s contains wrong characters:%s' % (idx, ' '.join(wrong_nt)))
 
                 if 'guide_seq' in df_template.columns and not pd.isnull(row.guide_seq):
                     cut_points = []
@@ -765,7 +737,7 @@ def main():
 
                         wrong_nt = CRISPRessoShared.find_wrong_nt(current_guide_seq)
                         if wrong_nt:
-                            raise NTException('The sgRNA sequence %s contains wrong characters:%s'  % (current_guide_seq, ' '.join(wrong_nt)))
+                            raise CRISPRessoShared.NTException('The sgRNA sequence %s contains wrong characters:%s'  % (current_guide_seq, ' '.join(wrong_nt)))
 
                         offset_fw=guide_qw_centers[idx]+len(current_guide_seq)-1
                         offset_rc=(-guide_qw_centers[idx])-1
@@ -950,7 +922,7 @@ def main():
                     filename_amplicon_seqs_fasta, filename_aligned_amplicons_sam_log, filename_aligned_amplicons_sam)
                 bowtie_status=sb.call(aligner_command, shell=True)
                 if bowtie_status:
-                        raise Bowtie2Exception('Bowtie2 failed to align amplicons to the genome, please check the output file.')
+                        raise CRISPRessoShared.AlignmentException('Bowtie2 failed to align amplicons to the genome, please check the output file.')
 
                 additional_columns = []
                 with open (filename_aligned_amplicons_sam) as aln:
@@ -1066,7 +1038,7 @@ def rreplace(s, old, new):
             if can_finish_incomplete_run and 'genome_demultiplexing' in crispresso2_info['running_info']['finished_steps'] and os.path.isfile(REPORT_ALL_DEPTH):
                 info('Using previously-computed demultiplexing of genomic reads')
                 df_all_demux = pd.read_csv(REPORT_ALL_DEPTH, sep='\t')
-                df_all_demux['loc'] = df_all_demux['chr_id'] + ' ' + df_all_demux['start'].apply(str) + ' '+df_all_demux['end'].apply(str)
+                df_all_demux['loc'] = df_all_demux['chr_id'].apply('str') + ' ' + df_all_demux['start'].apply(str) + ' '+df_all_demux['end'].apply(str)
                 df_all_demux.set_index(['loc'], inplace=True)
             else:
                 #REDISCOVER LOCATIONS and DEMULTIPLEX READS
@@ -1240,7 +1212,7 @@ def rreplace(s, old, new):
                 df_all_demux.set_index(['loc'], inplace=True)
 
                 if sum_aligned_reads == 0:
-                    raise NoReadsAlignedException("No reads aligned to the specified genome")
+                    raise CRISPRessoShared.NoReadsAlignedException("No reads aligned to the specified genome")
 
                 crispresso2_info['running_info']['finished_steps']['genome_demultiplexing'] = True
                 CRISPRessoShared.write_crispresso_info(
@@ -1409,11 +1381,10 @@ def rreplace(s, old, new):
                 info('Running CRISPResso on the regions discovered...')
                 crispresso_cmds = []
                 for idx, row in df_regions.iterrows():
-
                     if row.n_reads > args.min_reads_to_use_region:
                         info('\nRunning CRISPResso on: %s-%d-%d...'%(row.chr_id, row.bpstart, row.bpend))
                         if pd.isna(row.sequence):
-                            raise Exception('Cannot extract sequence from input reference ' + uncompressed_reference)
+                            raise CRISPRessoShared.BadParameterException('Cannot extract sequence ' + str(row.sequence) + ' from input reference ' + uncompressed_reference)
                         crispresso_cmd = args.crispresso_command + ' -r1 %s -a %s -o %s' %(row.fastq_file, row.sequence, OUTPUT_DIRECTORY)
                         crispresso_cmd = CRISPRessoShared.propagate_crispresso_options(crispresso_cmd, crispresso_options_for_pooled, args)
                         crispresso_cmds.append(crispresso_cmd)
@@ -1649,7 +1620,7 @@ def default_sigpipe():
                 try:
                     run_data = CRISPRessoShared.load_crispresso_info(sub_folder)
                 except Exception as e:
-                    raise Exception('CRISPResso run %s is not complete. Cannot read CRISPResso2_info.json file.'% sub_folder)
+                    raise CRISPRessoShared.OutputFolderIncompleteException('CRISPResso run %s is not complete. Cannot read CRISPResso2_info.json file.'% sub_folder)
                 ref_sequences = [run_data['results']['refs'][ref_name]['sequence'] for ref_name in run_data['results']['ref_names']]
                 allele_frequency_table_zip_filename = os.path.join(sub_folder, run_data['running_info']['allele_frequency_table_zip_filename'])
                 if not os.path.exists(allele_frequency_table_zip_filename):
@@ -1708,7 +1679,7 @@ def default_sigpipe():
             CRISPRessoShared.zip_results(OUTPUT_DIRECTORY)
 
         info('All Done!', {'percent_complete': 100})
-        print(CRISPRessoShared.get_crispresso_footer())
+        info(CRISPRessoShared.get_crispresso_footer())
         sys.exit(0)
 
     except Exception as e:
diff --git a/CRISPResso2/CRISPRessoPooledWGSCompareCORE.py b/CRISPResso2/CRISPRessoPooledWGSCompareCORE.py
@@ -82,7 +82,7 @@ def main():
         compare_header = CRISPRessoShared.get_crispresso_header(
             description, compare_header,
         )
-        print(compare_header)
+        info(compare_header)
 
         parser = argparse.ArgumentParser(
             description='CRISPRessoPooledWGSCompare Parameters',
@@ -380,7 +380,7 @@ def main():
             CRISPRessoShared.zip_results(OUTPUT_DIRECTORY)
 
         info('All Done!', {'percent_complete': 100})
-        print(CRISPRessoShared.get_crispresso_footer())
+        info(CRISPRessoShared.get_crispresso_footer())
         sys.exit(0)
 
     except Exception as e:
diff --git a/CRISPResso2/CRISPRessoShared.py b/CRISPResso2/CRISPRessoShared.py
diff --git a/CRISPResso2/CRISPRessoWGSCORE.py b/CRISPResso2/CRISPRessoWGSCORE.py