summarylogtreecommitdiffstats
path: root/centrifuge_evaluate_mason.py.patch
diff options
context:
space:
mode:
Diffstat (limited to 'centrifuge_evaluate_mason.py.patch')
-rw-r--r--centrifuge_evaluate_mason.py.patch127
1 files changed, 127 insertions, 0 deletions
diff --git a/centrifuge_evaluate_mason.py.patch b/centrifuge_evaluate_mason.py.patch
new file mode 100644
index 000000000000..88c309bf795a
--- /dev/null
+++ b/centrifuge_evaluate_mason.py.patch
@@ -0,0 +1,127 @@
+--- a/centrifuge_evaluate_mason.py 2023-04-14 21:29:29.482568396 +0530
++++ b/centrifuge_evaluate_mason.py 2023-04-14 22:05:44.988504275 +0530
+@@ -1,4 +1,4 @@
+-#!/usr/bin/env python
++#!/usr/bin/python3
+
+ import sys, os, subprocess, inspect
+ import platform, multiprocessing
+@@ -27,7 +27,7 @@
+ higher_ranked = {}
+
+ ancestors = set()
+- for tax_id in taxonomy_tree.keys():
++ for tax_id in list(taxonomy_tree.keys()):
+ if tax_id in ancestors:
+ continue
+ while True:
+@@ -82,7 +82,7 @@
+
+ fields = line.strip().split('\t')
+ if len(fields) != 3:
+- print >> sys.stderr, "Warning: %s missing" % (line.strip())
++ print("Warning: %s missing" % (line.strip()), file=sys.stderr)
+ continue
+ read_name, tax_id = fields[1:3]
+ # Traverse up taxonomy tree to match the given rank parameter
+@@ -117,7 +117,7 @@
+ # print read_name
+
+ raw_unique_classified = 0
+- for read_name, maps in db_dic.items():
++ for read_name, maps in list(db_dic.items()):
+ if len(maps) == 1 and read_name not in higher_ranked:
+ raw_unique_classified += 1
+ return classified, unique_classified, unclassified, len(db_dic), raw_unique_classified
+@@ -184,7 +184,7 @@
+ read_fname]
+
+ if verbose:
+- print >> sys.stderr, ' '.join(centrifuge_cmd)
++ print(' '.join(centrifuge_cmd), file=sys.stderr)
+
+ out_fname = "centrifuge.output"
+ proc = subprocess.Popen(centrifuge_cmd, stdout=open(out_fname, "w"), stderr=subprocess.PIPE)
+@@ -208,12 +208,12 @@
+ # if rank == "strain":
+ # assert num_cases == num_fragment
+
+- print >> sys.stderr, "\t\t%s" % rank
+- print >> sys.stderr, "\t\t\tsensitivity: {:,} / {:,} ({:.2%})".format(classified, num_cases, float(classified) / num_cases)
+- print >> sys.stderr, "\t\t\tprecision : {:,} / {:,} ({:.2%})".format(classified, raw_classified, float(classified) / raw_classified)
+- print >> sys.stderr, "\n\t\t\tfor uniquely classified "
+- print >> sys.stderr, "\t\t\t\t\tsensitivity: {:,} / {:,} ({:.2%})".format(unique_classified, num_cases, float(unique_classified) / num_cases)
+- print >> sys.stderr, "\t\t\t\t\tprecision : {:,} / {:,} ({:.2%})".format(unique_classified, raw_unique_classified, float(unique_classified) / raw_unique_classified)
++ print("\t\t%s" % rank, file=sys.stderr)
++ print("\t\t\tsensitivity: {:,} / {:,} ({:.2%})".format(classified, num_cases, float(classified) / num_cases), file=sys.stderr)
++ print("\t\t\tprecision : {:,} / {:,} ({:.2%})".format(classified, raw_classified, float(classified) / raw_classified), file=sys.stderr)
++ print("\n\t\t\tfor uniquely classified ", file=sys.stderr)
++ print("\t\t\t\t\tsensitivity: {:,} / {:,} ({:.2%})".format(unique_classified, num_cases, float(unique_classified) / num_cases), file=sys.stderr)
++ print("\t\t\t\t\tprecision : {:,} / {:,} ({:.2%})".format(unique_classified, raw_unique_classified, float(unique_classified) / raw_unique_classified), file=sys.stderr)
+
+ # Calculate sum of squared residuals in abundance
+ """
+@@ -252,12 +252,12 @@
+ if rank_taxID not in true_abundance:
+ true_abundance[rank_taxID] = 0.0
+ true_abundance[rank_taxID] += (reads / float(genomeSize))
+- for taxID, reads in true_abundance.items():
++ for taxID, reads in list(true_abundance.items()):
+ true_abundance[taxID] /= total_sum
+
+- print >> sys.stderr, "number of genomes:", num_genomes
+- print >> sys.stderr, "number of species:", num_species
+- print >> sys.stderr, "number of uniq species:", len(true_abundance)
++ print("number of genomes:", num_genomes, file=sys.stderr)
++ print("number of species:", num_species, file=sys.stderr)
++ print("number of uniq species:", len(true_abundance), file=sys.stderr)
+
+ read_fname = "centrifuge_data/bacteria_sim10M/bacteria_sim10M.fa"
+ summary_fname = "centrifuge.summary"
+@@ -271,14 +271,14 @@
+ read_fname]
+
+ if verbose:
+- print >> sys.stderr, ' '.join(centrifuge_cmd)
++ print(' '.join(centrifuge_cmd), file=sys.stderr)
+
+ out_fname = "centrifuge.output"
+ proc = subprocess.Popen(centrifuge_cmd, stdout=open(out_fname, "w"), stderr=subprocess.PIPE)
+ proc.communicate()
+
+ calc_abundance = {}
+- for taxID in true_abundance.keys():
++ for taxID in list(true_abundance.keys()):
+ calc_abundance[taxID] = 0.0
+ first = True
+ for line in open(summary_fname):
+@@ -296,12 +296,12 @@
+ """
+
+ abundance_file = open("abundance.cmp", 'w')
+- print >> abundance_file, "taxID\ttrue\tcalc\trank"
++ print("taxID\ttrue\tcalc\trank", file=abundance_file)
+ for rank in ranks:
+ if rank == "strain":
+ continue
+ true_abundance_rank, calc_abundance_rank = {}, {}
+- for taxID in true_abundance.keys():
++ for taxID in list(true_abundance.keys()):
+ assert taxID in calc_abundance
+ rank_taxID = taxID
+ while True:
+@@ -322,11 +322,11 @@
+ calc_abundance_rank[rank_taxID] += calc_abundance[taxID]
+
+ ssr = 0.0 # Sum of Squared Residuals
+- for taxID in true_abundance_rank.keys():
++ for taxID in list(true_abundance_rank.keys()):
+ assert taxID in calc_abundance_rank
+ ssr += (true_abundance_rank[taxID] - calc_abundance_rank[taxID]) ** 2
+- print >> abundance_file, "%s\t%.6f\t%.6f\t%s" % (taxID, true_abundance_rank[taxID], calc_abundance_rank[taxID], rank)
+- print >> sys.stderr, "%s) Sum of squared residuals: %.6f" % (rank, ssr)
++ print("%s\t%.6f\t%.6f\t%s" % (taxID, true_abundance_rank[taxID], calc_abundance_rank[taxID], rank), file=abundance_file)
++ print("%s) Sum of squared residuals: %.6f" % (rank, ssr), file=sys.stderr)
+ abundance_file.close()
+
+