--- a/centrifuge_evaluate_mason.py 2023-04-14 21:29:29.482568396 +0530 +++ b/centrifuge_evaluate_mason.py 2023-04-14 22:05:44.988504275 +0530 @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/python3 import sys, os, subprocess, inspect import platform, multiprocessing @@ -27,7 +27,7 @@ higher_ranked = {} ancestors = set() - for tax_id in taxonomy_tree.keys(): + for tax_id in list(taxonomy_tree.keys()): if tax_id in ancestors: continue while True: @@ -82,7 +82,7 @@ fields = line.strip().split('\t') if len(fields) != 3: - print >> sys.stderr, "Warning: %s missing" % (line.strip()) + print("Warning: %s missing" % (line.strip()), file=sys.stderr) continue read_name, tax_id = fields[1:3] # Traverse up taxonomy tree to match the given rank parameter @@ -117,7 +117,7 @@ # print read_name raw_unique_classified = 0 - for read_name, maps in db_dic.items(): + for read_name, maps in list(db_dic.items()): if len(maps) == 1 and read_name not in higher_ranked: raw_unique_classified += 1 return classified, unique_classified, unclassified, len(db_dic), raw_unique_classified @@ -184,7 +184,7 @@ read_fname] if verbose: - print >> sys.stderr, ' '.join(centrifuge_cmd) + print(' '.join(centrifuge_cmd), file=sys.stderr) out_fname = "centrifuge.output" proc = subprocess.Popen(centrifuge_cmd, stdout=open(out_fname, "w"), stderr=subprocess.PIPE) @@ -208,12 +208,12 @@ # if rank == "strain": # assert num_cases == num_fragment - print >> sys.stderr, "\t\t%s" % rank - print >> sys.stderr, "\t\t\tsensitivity: {:,} / {:,} ({:.2%})".format(classified, num_cases, float(classified) / num_cases) - print >> sys.stderr, "\t\t\tprecision : {:,} / {:,} ({:.2%})".format(classified, raw_classified, float(classified) / raw_classified) - print >> sys.stderr, "\n\t\t\tfor uniquely classified " - print >> sys.stderr, "\t\t\t\t\tsensitivity: {:,} / {:,} ({:.2%})".format(unique_classified, num_cases, float(unique_classified) / num_cases) - print >> sys.stderr, "\t\t\t\t\tprecision : {:,} / {:,} ({:.2%})".format(unique_classified, raw_unique_classified, float(unique_classified) / raw_unique_classified) + print("\t\t%s" % rank, file=sys.stderr) + print("\t\t\tsensitivity: {:,} / {:,} ({:.2%})".format(classified, num_cases, float(classified) / num_cases), file=sys.stderr) + print("\t\t\tprecision : {:,} / {:,} ({:.2%})".format(classified, raw_classified, float(classified) / raw_classified), file=sys.stderr) + print("\n\t\t\tfor uniquely classified ", file=sys.stderr) + print("\t\t\t\t\tsensitivity: {:,} / {:,} ({:.2%})".format(unique_classified, num_cases, float(unique_classified) / num_cases), file=sys.stderr) + print("\t\t\t\t\tprecision : {:,} / {:,} ({:.2%})".format(unique_classified, raw_unique_classified, float(unique_classified) / raw_unique_classified), file=sys.stderr) # Calculate sum of squared residuals in abundance """ @@ -252,12 +252,12 @@ if rank_taxID not in true_abundance: true_abundance[rank_taxID] = 0.0 true_abundance[rank_taxID] += (reads / float(genomeSize)) - for taxID, reads in true_abundance.items(): + for taxID, reads in list(true_abundance.items()): true_abundance[taxID] /= total_sum - print >> sys.stderr, "number of genomes:", num_genomes - print >> sys.stderr, "number of species:", num_species - print >> sys.stderr, "number of uniq species:", len(true_abundance) + print("number of genomes:", num_genomes, file=sys.stderr) + print("number of species:", num_species, file=sys.stderr) + print("number of uniq species:", len(true_abundance), file=sys.stderr) read_fname = "centrifuge_data/bacteria_sim10M/bacteria_sim10M.fa" summary_fname = "centrifuge.summary" @@ -271,14 +271,14 @@ read_fname] if verbose: - print >> sys.stderr, ' '.join(centrifuge_cmd) + print(' '.join(centrifuge_cmd), file=sys.stderr) out_fname = "centrifuge.output" proc = subprocess.Popen(centrifuge_cmd, stdout=open(out_fname, "w"), stderr=subprocess.PIPE) proc.communicate() calc_abundance = {} - for taxID in true_abundance.keys(): + for taxID in list(true_abundance.keys()): calc_abundance[taxID] = 0.0 first = True for line in open(summary_fname): @@ -296,12 +296,12 @@ """ abundance_file = open("abundance.cmp", 'w') - print >> abundance_file, "taxID\ttrue\tcalc\trank" + print("taxID\ttrue\tcalc\trank", file=abundance_file) for rank in ranks: if rank == "strain": continue true_abundance_rank, calc_abundance_rank = {}, {} - for taxID in true_abundance.keys(): + for taxID in list(true_abundance.keys()): assert taxID in calc_abundance rank_taxID = taxID while True: @@ -322,11 +322,11 @@ calc_abundance_rank[rank_taxID] += calc_abundance[taxID] ssr = 0.0 # Sum of Squared Residuals - for taxID in true_abundance_rank.keys(): + for taxID in list(true_abundance_rank.keys()): assert taxID in calc_abundance_rank ssr += (true_abundance_rank[taxID] - calc_abundance_rank[taxID]) ** 2 - print >> abundance_file, "%s\t%.6f\t%.6f\t%s" % (taxID, true_abundance_rank[taxID], calc_abundance_rank[taxID], rank) - print >> sys.stderr, "%s) Sum of squared residuals: %.6f" % (rank, ssr) + print("%s\t%.6f\t%.6f\t%s" % (taxID, true_abundance_rank[taxID], calc_abundance_rank[taxID], rank), file=abundance_file) + print("%s) Sum of squared residuals: %.6f" % (rank, ssr), file=sys.stderr) abundance_file.close()