summarylogtreecommitdiffstats
path: root/fastaread.patch
blob: 44e80df4fda0d190b029b8a4e73305dc0017563a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
Description: Slighly more robust method to obtain taxid of fasta files to at
  least enable reading the provided data examples of autopkgtest
Bug-Debian: https://bugs.debian.org/889623
Forwarded: https://github.com/DerrickWood/kraken/issues/111
Author: Andreas Tille <tille@debian.org>
Last-Update: Wed, 07 Feb 2018 13:41:39 +0100

--- a/scan_fasta_file.pl
+++ b/scan_fasta_file.pl
@@ -32,6 +32,8 @@ while (<>) {
   # while (/.../g) needed because non-redundant DBs sometimes have multiple
   #   sequence IDs in the header; extra sequence IDs are prefixed by
   #   '\x01' characters (if downloaded in FASTA format from NCBI FTP directly).
+  s/^>gi\|/>/;
+  s/\| .*//;
   while (/(?:^>|\x01)(\S+)/g) {
     my $seqid = $1;
     my $taxid = kraken2lib::check_seqid($seqid);