Skip to content

Commit 62bfaf0

Browse files
Colelymankclem
andauthored
Suppress printing (#115) (#511)
* Refactor output handling to use 'info' instead of 'print' and remove unused code * Remove exit after raising of Exception * Cast chr_id to a string (again) * Properly scrub invalid characters from beign written to JSON status file This commit will convert \n, \, and " when being written to the JSON message field because all of those characters will cause the JSON to be invalid. * Change branch to cole/suppress_printing * Move test branch back to master --------- Co-authored-by: kclem <k.clement.dev@gmail.com>
1 parent 7cceaf9 commit 62bfaf0

9 files changed

Lines changed: 63 additions & 128 deletions

CRISPResso2/CRISPRessoAggregateCORE.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def main():
5353
|/--\ \_| \_| | \ |_ \_| /--\ | |_ |
5454
|__________________________________|
5555
'''
56-
print(CRISPRessoShared.get_crispresso_header(description, aggregate_string))
56+
info(CRISPRessoShared.get_crispresso_header(description, aggregate_string))
5757

5858
parser = argparse.ArgumentParser(description="Aggregate CRISPResso2 Runs")
5959
parser.add_argument("-p", "--prefix", action='append', help="Prefix for CRISPResso folders to aggregate (may be specified multiple times)", default=[])
@@ -903,7 +903,7 @@ def main():
903903
process_pool.shutdown()
904904

905905
info('Analysis Complete!', {'percent_complete': 100})
906-
print(CRISPRessoShared.get_crispresso_footer())
906+
info(CRISPRessoShared.get_crispresso_footer())
907907
sys.exit(0)
908908

909909
except Exception as e:

CRISPResso2/CRISPRessoBatchCORE.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,18 +89,17 @@ def main():
8989
||__)/--\| \__| ||
9090
|_________________|
9191
'''
92-
print(CRISPRessoShared.get_crispresso_header(description, batch_string))
92+
info(CRISPRessoShared.get_crispresso_header(description, batch_string))
9393

9494
# if no args are given, print a simplified help message
9595
if len(sys.argv) == 1:
96-
print(CRISPRessoShared.format_cl_text('usage: CRISPRessoBatch [-bs BATCH_SETTINGS] [-n NAME]\n' + \
96+
raise CRISPRessoShared.BadParameterException(CRISPRessoShared.format_cl_text('usage: CRISPRessoBatch [-bs BATCH_SETTINGS] [-n NAME]\n' + \
9797
'commonly-used arguments:\n' + \
9898
'-h, --help show the full list of arguments\n' + \
9999
'-v, --version show program\'s version number and exit\n' + \
100100
'-bs BATCH_SETTINGS Tab-separated file where rows are samples and columns specify settings for each sample.\n' + \
101101
'-n NAME, --name NAME Name for the analysis (default: name based on input file name)'
102102
))
103-
sys.exit()
104103

105104
parser = CRISPRessoShared.getCRISPRessoArgParser("Batch", parser_title = 'CRISPRessoBatch Parameters')
106105

@@ -984,7 +983,7 @@ def main():
984983
CRISPRessoShared.zip_results(path_value[1])
985984
else:
986985
CRISPRessoShared.zip_results(OUTPUT_DIRECTORY)
987-
print(CRISPRessoShared.get_crispresso_footer())
986+
info(CRISPRessoShared.get_crispresso_footer())
988987
sys.exit(0)
989988

990989
except Exception as e:

CRISPResso2/CRISPRessoCORE.py

Lines changed: 12 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -7,26 +7,24 @@
77
(c) 2020 The General Hospital Corporation. All Rights Reserved.
88
'''
99

10-
import sys
11-
running_python3 = False
12-
if sys.version_info > (3, 0):
13-
running_python3 = True
14-
1510
import argparse
16-
from collections import Counter
17-
from copy import deepcopy
18-
from concurrent.futures import ProcessPoolExecutor, wait
19-
from functools import partial
2011
import errno
2112
import gzip
2213
import json
23-
import zipfile
14+
import logging
2415
import os
2516
import re
17+
import sys
2618
import subprocess as sb
2719
import traceback
28-
from multiprocessing import Process
20+
import zipfile
2921

22+
from collections import Counter
23+
from copy import deepcopy
24+
from concurrent.futures import ProcessPoolExecutor, wait
25+
from datetime import datetime
26+
from functools import partial
27+
from multiprocessing import Process
3028

3129
from CRISPResso2 import CRISPRessoCOREResources
3230
from CRISPResso2.CRISPRessoReports import CRISPRessoReport
@@ -41,15 +39,6 @@
4139
from CRISPResso2 import CRISPResso2Align
4240
from CRISPResso2 import CRISPRessoMultiProcessing
4341

44-
from datetime import datetime
45-
present = datetime.now()
46-
#d1 = datetime.strptime('21/07/2019','%d/%m/%Y')
47-
#if present > d1:
48-
# print('\nYour version of CRISPResso2 is out of date. Please download a new version.\n')
49-
# sys.exit(1)
50-
51-
import logging
52-
5342
logger = logging.getLogger(__name__)
5443
logger.setLevel(logging.DEBUG)
5544
logger.addHandler(CRISPRessoShared.LogStreamHandler())
@@ -429,7 +418,6 @@ def get_new_variant_object(args, fastq_seq, refs, ref_names, aln_matrix, pe_scaf
429418
if args.prime_editing_pegRNA_scaffold_seq and 'Prime-edited' in best_match_names: #any scaffold extensions must be closer to the prime-edited sequence
430419
pe_read_possible_scaffold_loc = new_variant['variant_Prime-edited']['ref_positions'].index(pe_scaffold_dna_info[0]-1) + 1
431420
if new_variant['variant_Prime-edited']['aln_seq'][pe_read_possible_scaffold_loc:(pe_read_possible_scaffold_loc+len(pe_scaffold_dna_info[1]))] == pe_scaffold_dna_info[1]:
432-
# print('comparingHERE ' + new_variant['variant_Prime-edited']['aln_seq'][pe_read_possible_scaffold_loc:(pe_read_possible_scaffold_loc+len(pe_scaffold_dna_info[1])+5)] + ' from ' + new_variant['variant_Prime-edited']['aln_seq'] + ' and ' + new_variant['variant_Prime-edited']['aln_ref'])
433421
new_variant['aln_ref_names'] = ["Scaffold-incorporated"]
434422
new_variant['class_name'] = "Scaffold-incorporated"
435423
old_payload = deepcopy(new_variant['variant_Prime-edited']) #keep prime-edited allele and alignment
@@ -1380,11 +1368,11 @@ def print_stacktrace_if_debug():
13801368
start_time_string = start_time.strftime('%Y-%m-%d %H:%M:%S')
13811369
description = ['~~~CRISPResso 2~~~', '-Analysis of genome editing outcomes from deep sequencing data-']
13821370
header = CRISPRessoShared.get_crispresso_header(description=description, header_str=None)
1383-
print(header)
1371+
info(header)
13841372

13851373
# if no args are given, print a simplified help message
13861374
if len(sys.argv) == 1:
1387-
print(CRISPRessoShared.format_cl_text('usage: CRISPResso [-r1 FASTQ_R1] [-r2 FASTQ_R2] [-a AMPLICON_SEQ] [-g GUIDE_SEQ] [-n NAME]\n' + \
1375+
raise CRISPRessoShared.BadParameterException(CRISPRessoShared.format_cl_text('usage: CRISPResso [-r1 FASTQ_R1] [-r2 FASTQ_R2] [-a AMPLICON_SEQ] [-g GUIDE_SEQ] [-n NAME]\n' + \
13881376
'commonly-used arguments:\n' + \
13891377
'-h, --help show the full list of arguments\n' + \
13901378
'-v, --version show program\'s version number and exit\n' + \
@@ -1394,7 +1382,6 @@ def print_stacktrace_if_debug():
13941382
'-g GUIDE_SEQ Guide sequence (default: None)\n' + \
13951383
'-n NAME, --name NAME Name for the analysis (default: name based on input file name)'
13961384
))
1397-
sys.exit()
13981385

13991386

14001387
arg_parser = CRISPRessoShared.getCRISPRessoArgParser("Core")
@@ -5133,7 +5120,7 @@ def get_scaffold_len(row, scaffold_start_loc, scaffold_seq):
51335120
CRISPRessoShared.zip_results(OUTPUT_DIRECTORY)
51345121

51355122
info('Analysis Complete!', {'percent_complete': 100})
5136-
print(CRISPRessoShared.get_crispresso_footer())
5123+
info(CRISPRessoShared.get_crispresso_footer())
51375124

51385125
sys.exit(0)
51395126

CRISPResso2/CRISPRessoCompareCORE.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def main():
103103
|___________________________|
104104
'''
105105
compare_header = CRISPRessoShared.get_crispresso_header(description, compare_header)
106-
print(compare_header)
106+
info(compare_header)
107107

108108
parser = CRISPRessoShared.getCRISPRessoArgParser("Compare", parser_title = 'CRISPRessoCompare Parameters')
109109

@@ -455,7 +455,7 @@ def get_plot_title_with_ref_name(plotTitle, refName):
455455
CRISPRessoShared.zip_results(OUTPUT_DIRECTORY)
456456

457457
info('Analysis Complete!', {'percent_complete': 100})
458-
print(CRISPRessoShared.get_crispresso_footer())
458+
info(CRISPRessoShared.get_crispresso_footer())
459459
sys.exit(0)
460460

461461
except Exception as e:

CRISPResso2/CRISPRessoMetaCORE.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def main():
8484
| |_| |_| |_| |_|____ |_| |_| |_| |
8585
|________________________________________|
8686
'''
87-
print(CRISPRessoShared.get_crispresso_header(description, meta_string))
87+
info(CRISPRessoShared.get_crispresso_header(description, meta_string))
8888

8989
parser = CRISPRessoShared.getCRISPRessoArgParser("Meta", parser_title = 'CRISPRessoMeta Parameters')
9090

@@ -366,7 +366,7 @@ def main():
366366
crispresso2Meta_info_file, crispresso2_info,
367367
)
368368
info('Analysis Complete!', {'percent_complete': 100})
369-
print(CRISPRessoShared.get_crispresso_footer())
369+
info(CRISPRessoShared.get_crispresso_footer())
370370
sys.exit(0)
371371

372372
except Exception as e:

CRISPResso2/CRISPRessoPooledCORE.py

Lines changed: 27 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -67,28 +67,36 @@ def is_exe(fpath):
6767

6868

6969
def check_samtools():
70+
"""
71+
Assert that samtools is installed
72+
"""
7073

7174
cmd_path=which('samtools')
7275
if cmd_path:
7376
return True
7477
else:
75-
sys.stdout.write('\nCRISPRessoPooled requires samtools')
76-
sys.stdout.write('\n\nPlease install samtools and add it to your path following the instructions at: http://www.htslib.org/download/')
77-
return False
78+
raise CRISPRessoShared.InstallationException('CRISPRessoPooled requires samtools\nPlease install samtools and add it to your path following the instructions at: http://www.htslib.org/download/')
7879

7980
def check_bowtie2():
81+
"""
82+
Assert that bowtie2 is installed
83+
"""
8084

8185
cmd_path1=which('bowtie2')
8286
cmd_path2=which('bowtie2-inspect')
8387

8488
if cmd_path1 and cmd_path2:
8589
return True
8690
else:
87-
sys.stdout.write('\nCRISPRessoPooled requires Bowtie2!')
88-
sys.stdout.write('\n\nPlease install Bowtie2 and add it to your path following the instructions at: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml#obtaining-bowtie-2')
89-
return False
91+
raise CRISPRessoShared.InstallationException('\nCRISPRessoPooled requires Bowtie2!\nPlease install Bowtie2 and add it to your path following the instructions at: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml#obtaining-bowtie-2')
92+
93+
def print_full_pandas_df(x):
94+
"""
95+
Print the full pandas dataframe (no clipping of rows or columns)
9096
91-
def print_full(x):
97+
Args:
98+
x (pd.DataFrame): The dataframe to print
99+
"""
92100
pd.set_option('display.max_rows', len(x))
93101
pd.set_option('display.max_columns', None)
94102
pd.set_option('display.width', 2000)
@@ -243,41 +251,6 @@ def normalize_name(name, fastq_r1, fastq_r2, aligned_pooled_bam):
243251
pd=check_library('pandas')
244252
np=check_library('numpy')
245253

246-
###EXCEPTIONS############################
247-
class FlashException(Exception):
248-
pass
249-
250-
class TrimmomaticException(Exception):
251-
pass
252-
253-
class Bowtie2Exception(Exception):
254-
pass
255-
256-
class AmpliconsNotUniqueException(Exception):
257-
pass
258-
259-
class AmpliconsNamesNotUniqueException(Exception):
260-
pass
261-
262-
class NoReadsAlignedException(Exception):
263-
pass
264-
265-
class DonorSequenceException(Exception):
266-
pass
267-
268-
class AmpliconEqualDonorException(Exception):
269-
pass
270-
271-
class SgRNASequenceException(Exception):
272-
pass
273-
274-
class NTException(Exception):
275-
pass
276-
277-
class ExonSequenceException(Exception):
278-
pass
279-
280-
281254
def main():
282255
try:
283256
start_time = datetime.now()
@@ -291,11 +264,11 @@ def main():
291264
|| \__/\__/|__|__|__/ |
292265
|_______________________|
293266
'''
294-
print(CRISPRessoShared.get_crispresso_header(description, pooled_string))
267+
info(CRISPRessoShared.get_crispresso_header(description, pooled_string))
295268

296269
# if no args are given, print a simplified help message
297270
if len(sys.argv) == 1:
298-
print(CRISPRessoShared.format_cl_text('usage: CRISPRessoPooled [-r1 FASTQ_R1] [-r2 FASTQ_R2] [-f AMPLICONS_FILE] [-x GENOME_ROOT] [-n NAME]\n' + \
271+
raise CRISPRessoShared.BadParameterException(CRISPRessoShared.format_cl_text('usage: CRISPRessoPooled [-r1 FASTQ_R1] [-r2 FASTQ_R2] [-f AMPLICONS_FILE] [-x GENOME_ROOT] [-n NAME]\n' + \
299272
'commonly-used arguments:\n' + \
300273
'-h, --help show the full list of arguments\n' + \
301274
'-v, --version show program\'s version number and exit\n' + \
@@ -305,7 +278,6 @@ def main():
305278
'-x GENOME_ROOT Folder that contains the bowtie2-indexed genome for optional unbiased alignment of reads (default: None, reads are only aligned to provided amplicon sequences)\n' + \
306279
'-n NAME, --name NAME Name for the analysis (default: name based on input file name)\n'
307280
))
308-
sys.exit()
309281

310282
parser = CRISPRessoShared.getCRISPRessoArgParser("Pooled", parser_title = 'CRISPRessoPooled Parameters')
311283

@@ -736,7 +708,7 @@ def main():
736708

737709
if not len(df_template.amplicon_seq.unique())==df_template.shape[0]:
738710
duplicated_entries = df_template.amplicon_seq[df_template.amplicon_seq.duplicated()]
739-
raise Exception('The amplicon sequences must be distinct! (Duplicated entries: ' + str(duplicated_entries.values) + ')')
711+
raise CRISPRessoShared.BadParameterException('The amplicon sequences must be distinct! (Duplicated entries: ' + str(duplicated_entries.values) + ')')
740712

741713
#check to see that no sequences and their reverse complements are present
742714
amp_seqs = df_template.amplicon_seq.values #Beware, this is a numpy array of dtype str and if you add these arrays amp_seqs + rc_amp_seqs, it will concat the strings, not the arrays....
@@ -747,15 +719,15 @@ def main():
747719

748720
if not len(df_template.amplicon_name.unique())==df_template.shape[0]:
749721
duplicated_entries = df_template.amplicon_name[df_template.amplicon_name.duplicated()]
750-
raise Exception('The amplicon names must be distinct! (Duplicated names: ' + str(duplicated_entries.values) + ')')
722+
raise CRISPRessoShared.BadParameterException('The amplicon names must be distinct! (Duplicated names: ' + str(duplicated_entries.values) + ')')
751723

752724
df_template=df_template.set_index('amplicon_name')
753725
df_template.index=df_template.index.to_series().str.replace(' ', '_')
754726

755727
for idx, row in df_template.iterrows():
756728
wrong_nt=CRISPRessoShared.find_wrong_nt(row.amplicon_seq)
757729
if wrong_nt:
758-
raise NTException('The amplicon sequence %s contains wrong characters:%s' % (idx, ' '.join(wrong_nt)))
730+
raise CRISPRessoShared.NTException('The amplicon sequence %s contains wrong characters:%s' % (idx, ' '.join(wrong_nt)))
759731

760732
if 'guide_seq' in df_template.columns and not pd.isnull(row.guide_seq):
761733
cut_points = []
@@ -765,7 +737,7 @@ def main():
765737

766738
wrong_nt = CRISPRessoShared.find_wrong_nt(current_guide_seq)
767739
if wrong_nt:
768-
raise NTException('The sgRNA sequence %s contains wrong characters:%s' % (current_guide_seq, ' '.join(wrong_nt)))
740+
raise CRISPRessoShared.NTException('The sgRNA sequence %s contains wrong characters:%s' % (current_guide_seq, ' '.join(wrong_nt)))
769741

770742
offset_fw=guide_qw_centers[idx]+len(current_guide_seq)-1
771743
offset_rc=(-guide_qw_centers[idx])-1
@@ -950,7 +922,7 @@ def main():
950922
filename_amplicon_seqs_fasta, filename_aligned_amplicons_sam_log, filename_aligned_amplicons_sam)
951923
bowtie_status=sb.call(aligner_command, shell=True)
952924
if bowtie_status:
953-
raise Bowtie2Exception('Bowtie2 failed to align amplicons to the genome, please check the output file.')
925+
raise CRISPRessoShared.AlignmentException('Bowtie2 failed to align amplicons to the genome, please check the output file.')
954926

955927
additional_columns = []
956928
with open (filename_aligned_amplicons_sam) as aln:
@@ -1066,7 +1038,7 @@ def rreplace(s, old, new):
10661038
if can_finish_incomplete_run and 'genome_demultiplexing' in crispresso2_info['running_info']['finished_steps'] and os.path.isfile(REPORT_ALL_DEPTH):
10671039
info('Using previously-computed demultiplexing of genomic reads')
10681040
df_all_demux = pd.read_csv(REPORT_ALL_DEPTH, sep='\t')
1069-
df_all_demux['loc'] = df_all_demux['chr_id'] + ' ' + df_all_demux['start'].apply(str) + ' '+df_all_demux['end'].apply(str)
1041+
df_all_demux['loc'] = df_all_demux['chr_id'].apply('str') + ' ' + df_all_demux['start'].apply(str) + ' '+df_all_demux['end'].apply(str)
10701042
df_all_demux.set_index(['loc'], inplace=True)
10711043
else:
10721044
#REDISCOVER LOCATIONS and DEMULTIPLEX READS
@@ -1240,7 +1212,7 @@ def rreplace(s, old, new):
12401212
df_all_demux.set_index(['loc'], inplace=True)
12411213

12421214
if sum_aligned_reads == 0:
1243-
raise NoReadsAlignedException("No reads aligned to the specified genome")
1215+
raise CRISPRessoShared.NoReadsAlignedException("No reads aligned to the specified genome")
12441216

12451217
crispresso2_info['running_info']['finished_steps']['genome_demultiplexing'] = True
12461218
CRISPRessoShared.write_crispresso_info(
@@ -1409,11 +1381,10 @@ def rreplace(s, old, new):
14091381
info('Running CRISPResso on the regions discovered...')
14101382
crispresso_cmds = []
14111383
for idx, row in df_regions.iterrows():
1412-
14131384
if row.n_reads > args.min_reads_to_use_region:
14141385
info('\nRunning CRISPResso on: %s-%d-%d...'%(row.chr_id, row.bpstart, row.bpend))
14151386
if pd.isna(row.sequence):
1416-
raise Exception('Cannot extract sequence from input reference ' + uncompressed_reference)
1387+
raise CRISPRessoShared.BadParameterException('Cannot extract sequence ' + str(row.sequence) + ' from input reference ' + uncompressed_reference)
14171388
crispresso_cmd = args.crispresso_command + ' -r1 %s -a %s -o %s' %(row.fastq_file, row.sequence, OUTPUT_DIRECTORY)
14181389
crispresso_cmd = CRISPRessoShared.propagate_crispresso_options(crispresso_cmd, crispresso_options_for_pooled, args)
14191390
crispresso_cmds.append(crispresso_cmd)
@@ -1649,7 +1620,7 @@ def default_sigpipe():
16491620
try:
16501621
run_data = CRISPRessoShared.load_crispresso_info(sub_folder)
16511622
except Exception as e:
1652-
raise Exception('CRISPResso run %s is not complete. Cannot read CRISPResso2_info.json file.'% sub_folder)
1623+
raise CRISPRessoShared.OutputFolderIncompleteException('CRISPResso run %s is not complete. Cannot read CRISPResso2_info.json file.'% sub_folder)
16531624
ref_sequences = [run_data['results']['refs'][ref_name]['sequence'] for ref_name in run_data['results']['ref_names']]
16541625
allele_frequency_table_zip_filename = os.path.join(sub_folder, run_data['running_info']['allele_frequency_table_zip_filename'])
16551626
if not os.path.exists(allele_frequency_table_zip_filename):
@@ -1708,7 +1679,7 @@ def default_sigpipe():
17081679
CRISPRessoShared.zip_results(OUTPUT_DIRECTORY)
17091680

17101681
info('All Done!', {'percent_complete': 100})
1711-
print(CRISPRessoShared.get_crispresso_footer())
1682+
info(CRISPRessoShared.get_crispresso_footer())
17121683
sys.exit(0)
17131684

17141685
except Exception as e:

CRISPResso2/CRISPRessoPooledWGSCompareCORE.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def main():
8282
compare_header = CRISPRessoShared.get_crispresso_header(
8383
description, compare_header,
8484
)
85-
print(compare_header)
85+
info(compare_header)
8686

8787
parser = argparse.ArgumentParser(
8888
description='CRISPRessoPooledWGSCompare Parameters',
@@ -380,7 +380,7 @@ def main():
380380
CRISPRessoShared.zip_results(OUTPUT_DIRECTORY)
381381

382382
info('All Done!', {'percent_complete': 100})
383-
print(CRISPRessoShared.get_crispresso_footer())
383+
info(CRISPRessoShared.get_crispresso_footer())
384384
sys.exit(0)
385385

386386
except Exception as e:

0 commit comments

Comments
 (0)