diff options
Diffstat (limited to 'centrifuge_evaluate_mason.py.patch')
-rw-r--r-- | centrifuge_evaluate_mason.py.patch | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/centrifuge_evaluate_mason.py.patch b/centrifuge_evaluate_mason.py.patch new file mode 100644 index 000000000000..88c309bf795a --- /dev/null +++ b/centrifuge_evaluate_mason.py.patch @@ -0,0 +1,127 @@ +--- a/centrifuge_evaluate_mason.py 2023-04-14 21:29:29.482568396 +0530 ++++ b/centrifuge_evaluate_mason.py 2023-04-14 22:05:44.988504275 +0530 +@@ -1,4 +1,4 @@ +-#!/usr/bin/env python ++#!/usr/bin/python3 + + import sys, os, subprocess, inspect + import platform, multiprocessing +@@ -27,7 +27,7 @@ + higher_ranked = {} + + ancestors = set() +- for tax_id in taxonomy_tree.keys(): ++ for tax_id in list(taxonomy_tree.keys()): + if tax_id in ancestors: + continue + while True: +@@ -82,7 +82,7 @@ + + fields = line.strip().split('\t') + if len(fields) != 3: +- print >> sys.stderr, "Warning: %s missing" % (line.strip()) ++ print("Warning: %s missing" % (line.strip()), file=sys.stderr) + continue + read_name, tax_id = fields[1:3] + # Traverse up taxonomy tree to match the given rank parameter +@@ -117,7 +117,7 @@ + # print read_name + + raw_unique_classified = 0 +- for read_name, maps in db_dic.items(): ++ for read_name, maps in list(db_dic.items()): + if len(maps) == 1 and read_name not in higher_ranked: + raw_unique_classified += 1 + return classified, unique_classified, unclassified, len(db_dic), raw_unique_classified +@@ -184,7 +184,7 @@ + read_fname] + + if verbose: +- print >> sys.stderr, ' '.join(centrifuge_cmd) ++ print(' '.join(centrifuge_cmd), file=sys.stderr) + + out_fname = "centrifuge.output" + proc = subprocess.Popen(centrifuge_cmd, stdout=open(out_fname, "w"), stderr=subprocess.PIPE) +@@ -208,12 +208,12 @@ + # if rank == "strain": + # assert num_cases == num_fragment + +- print >> sys.stderr, "\t\t%s" % rank +- print >> sys.stderr, "\t\t\tsensitivity: {:,} / {:,} ({:.2%})".format(classified, num_cases, float(classified) / num_cases) +- print >> sys.stderr, "\t\t\tprecision : {:,} / {:,} ({:.2%})".format(classified, raw_classified, float(classified) / raw_classified) +- print >> sys.stderr, "\n\t\t\tfor uniquely classified " +- print >> sys.stderr, "\t\t\t\t\tsensitivity: {:,} / {:,} ({:.2%})".format(unique_classified, num_cases, float(unique_classified) / num_cases) +- print >> sys.stderr, "\t\t\t\t\tprecision : {:,} / {:,} ({:.2%})".format(unique_classified, raw_unique_classified, float(unique_classified) / raw_unique_classified) ++ print("\t\t%s" % rank, file=sys.stderr) ++ print("\t\t\tsensitivity: {:,} / {:,} ({:.2%})".format(classified, num_cases, float(classified) / num_cases), file=sys.stderr) ++ print("\t\t\tprecision : {:,} / {:,} ({:.2%})".format(classified, raw_classified, float(classified) / raw_classified), file=sys.stderr) ++ print("\n\t\t\tfor uniquely classified ", file=sys.stderr) ++ print("\t\t\t\t\tsensitivity: {:,} / {:,} ({:.2%})".format(unique_classified, num_cases, float(unique_classified) / num_cases), file=sys.stderr) ++ print("\t\t\t\t\tprecision : {:,} / {:,} ({:.2%})".format(unique_classified, raw_unique_classified, float(unique_classified) / raw_unique_classified), file=sys.stderr) + + # Calculate sum of squared residuals in abundance + """ +@@ -252,12 +252,12 @@ + if rank_taxID not in true_abundance: + true_abundance[rank_taxID] = 0.0 + true_abundance[rank_taxID] += (reads / float(genomeSize)) +- for taxID, reads in true_abundance.items(): ++ for taxID, reads in list(true_abundance.items()): + true_abundance[taxID] /= total_sum + +- print >> sys.stderr, "number of genomes:", num_genomes +- print >> sys.stderr, "number of species:", num_species +- print >> sys.stderr, "number of uniq species:", len(true_abundance) ++ print("number of genomes:", num_genomes, file=sys.stderr) ++ print("number of species:", num_species, file=sys.stderr) ++ print("number of uniq species:", len(true_abundance), file=sys.stderr) + + read_fname = "centrifuge_data/bacteria_sim10M/bacteria_sim10M.fa" + summary_fname = "centrifuge.summary" +@@ -271,14 +271,14 @@ + read_fname] + + if verbose: +- print >> sys.stderr, ' '.join(centrifuge_cmd) ++ print(' '.join(centrifuge_cmd), file=sys.stderr) + + out_fname = "centrifuge.output" + proc = subprocess.Popen(centrifuge_cmd, stdout=open(out_fname, "w"), stderr=subprocess.PIPE) + proc.communicate() + + calc_abundance = {} +- for taxID in true_abundance.keys(): ++ for taxID in list(true_abundance.keys()): + calc_abundance[taxID] = 0.0 + first = True + for line in open(summary_fname): +@@ -296,12 +296,12 @@ + """ + + abundance_file = open("abundance.cmp", 'w') +- print >> abundance_file, "taxID\ttrue\tcalc\trank" ++ print("taxID\ttrue\tcalc\trank", file=abundance_file) + for rank in ranks: + if rank == "strain": + continue + true_abundance_rank, calc_abundance_rank = {}, {} +- for taxID in true_abundance.keys(): ++ for taxID in list(true_abundance.keys()): + assert taxID in calc_abundance + rank_taxID = taxID + while True: +@@ -322,11 +322,11 @@ + calc_abundance_rank[rank_taxID] += calc_abundance[taxID] + + ssr = 0.0 # Sum of Squared Residuals +- for taxID in true_abundance_rank.keys(): ++ for taxID in list(true_abundance_rank.keys()): + assert taxID in calc_abundance_rank + ssr += (true_abundance_rank[taxID] - calc_abundance_rank[taxID]) ** 2 +- print >> abundance_file, "%s\t%.6f\t%.6f\t%s" % (taxID, true_abundance_rank[taxID], calc_abundance_rank[taxID], rank) +- print >> sys.stderr, "%s) Sum of squared residuals: %.6f" % (rank, ssr) ++ print("%s\t%.6f\t%.6f\t%s" % (taxID, true_abundance_rank[taxID], calc_abundance_rank[taxID], rank), file=abundance_file) ++ print("%s) Sum of squared residuals: %.6f" % (rank, ssr), file=sys.stderr) + abundance_file.close() + + |