summarylogtreecommitdiffstats
diff options
context:
space:
mode:
authorFabian Klötzl2021-03-11 17:06:14 +0000
committerFabian Klötzl2021-03-11 17:06:14 +0000
commita4b185e650b4f9d00f9abd29f904a91ae1f5741d (patch)
treef1ba9d33c8f084ebf1666b078c23cc87c4606ac4
parent353331833b52e72495fc374ee8d2f3aec1fe208f (diff)
downloadaur-mash.tar.gz
update to v2.3
-rw-r--r--.SRCINFO12
-rw-r--r--PKGBUILD14
-rw-r--r--faster-revcomp.patch59
-rw-r--r--manpages.patch848
4 files changed, 9 insertions, 924 deletions
diff --git a/.SRCINFO b/.SRCINFO
index 36698496bd8d..550834973a57 100644
--- a/.SRCINFO
+++ b/.SRCINFO
@@ -1,19 +1,15 @@
pkgbase = mash
pkgdesc = Fast genome and metagenome distance estimation using MinHash
- pkgver = 2.2.2
- pkgrel = 2
+ pkgver = 2.3
+ pkgrel = 1
url = https://github.com/marbl/Mash/
arch = x86_64
license = BSD
makedepends = capnproto
- source = https://github.com/marbl/Mash/archive/v2.2.2.tar.gz
+ source = https://github.com/marbl/Mash/archive/v2.3.tar.gz
source = dynamic-capnp.patch
- source = faster-revcomp.patch
- source = manpages.patch
- sha256sums = e4c2d702fd0254f689256b2d8f7d3cc3a68db3ea45b60f0a662ce926a4f5fc22
+ sha256sums = f96cf7305e010012c3debed966ac83ceecac0351dbbfeaa6cd7ad7f068d87fe1
sha256sums = 61cd860e66e57f6cc3dac317cb19665263aaa1de9b8c487cb9133ccde2388d92
- sha256sums = d363504438f8e6472063bb6ded7f43c8e895e2ca5de279aec01b19a82503b68b
- sha256sums = afd4263820301de7a2eeea3c8f5dbbce838834d34de8dbafffdd0f2c7624f7ae
pkgname = mash
diff --git a/PKGBUILD b/PKGBUILD
index 5a6bde752bac..92a34f949bf4 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -1,24 +1,20 @@
# Maintainer: Fabian Klötzl <fabian-aur@kloetzl.info>
pkgname=mash
-pkgver=2.2.2
-pkgrel=2
+pkgver=2.3
+pkgrel=1
pkgdesc="Fast genome and metagenome distance estimation using MinHash"
url="https://github.com/marbl/Mash/"
license=("BSD")
arch=("x86_64")
makedepends=("capnproto")
source=("https://github.com/marbl/Mash/archive/v${pkgver}.tar.gz"
- "dynamic-capnp.patch" "faster-revcomp.patch" "manpages.patch")
-sha256sums=('e4c2d702fd0254f689256b2d8f7d3cc3a68db3ea45b60f0a662ce926a4f5fc22'
- '61cd860e66e57f6cc3dac317cb19665263aaa1de9b8c487cb9133ccde2388d92'
- 'd363504438f8e6472063bb6ded7f43c8e895e2ca5de279aec01b19a82503b68b'
- 'afd4263820301de7a2eeea3c8f5dbbce838834d34de8dbafffdd0f2c7624f7ae')
+ "dynamic-capnp.patch")
+sha256sums=('f96cf7305e010012c3debed966ac83ceecac0351dbbfeaa6cd7ad7f068d87fe1'
+ '61cd860e66e57f6cc3dac317cb19665263aaa1de9b8c487cb9133ccde2388d92')
prepare() {
cd "Mash-${pkgver}"
patch -R -p1 -i ../../dynamic-capnp.patch
- patch -p1 -i ../../faster-revcomp.patch
- patch -p1 -i ../../manpages.patch
}
check() {
diff --git a/faster-revcomp.patch b/faster-revcomp.patch
deleted file mode 100644
index 96f7241c7157..000000000000
--- a/faster-revcomp.patch
+++ /dev/null
@@ -1,59 +0,0 @@
-diff --git a/src/mash/Sketch.cpp b/src/mash/Sketch.cpp
-index b2329fa..a15d769 100644
---- a/src/mash/Sketch.cpp
-+++ b/src/mash/Sketch.cpp
-@@ -1061,22 +1061,42 @@ Sketch::SketchOutput * loadCapnp(Sketch::SketchInput * input)
- return output;
- }
-
-+
-+/* Array from 0..25 of DNA complement of A..Z */
-+const char complement[] = {
-+ 'T', // 'A' = A
-+ 'V', // 'B' = not A = C,T,G
-+ 'G', // 'C' = C
-+ 'H', // 'D' = not C = A,T,G
-+ 'N', // 'E' = .
-+ 'N', // 'F' = .
-+ 'C', // 'G' = G
-+ 'D', // 'H' = not G = A,C,T
-+ 'N', // 'I' = .
-+ 'N', // 'J' = .
-+ 'M', // 'K' = T,G = Keto
-+ 'N', // 'L' = .
-+ 'K', // 'M' = A,C = Amino
-+ 'N', // 'N' = A,C,T,G = uNkNowN
-+ 'N', // 'O' = .
-+ 'N', // 'P' = .
-+ 'N', // 'Q' = .
-+ 'Y', // 'R' = A,G = puRine
-+ 'S', // 'S' = G,C = Strong
-+ 'A', // 'T' = T
-+ 'A', // 'U' = T (RNA)
-+ 'B', // 'V' = not T = A,C,G
-+ 'W', // 'W' = A,T = Weak
-+ 'N', // 'X' = .
-+ 'R', // 'Y' = pYrimidine = C,T
-+ 'N', // 'Z' = .
-+};
-+
- void reverseComplement(const char * src, char * dest, int length)
- {
- for ( int i = 0; i < length; i++ )
- {
-- char base = src[i];
--
-- switch ( base )
-- {
-- case 'A': base = 'T'; break;
-- case 'C': base = 'G'; break;
-- case 'G': base = 'C'; break;
-- case 'T': base = 'A'; break;
-- default: break;
-- }
--
-- dest[length - i - 1] = base;
-+ dest[i] = complement[ (int) src[length-i-1] - (int) 'A' ];
- }
- }
-
diff --git a/manpages.patch b/manpages.patch
deleted file mode 100644
index 35dd24868a01..000000000000
--- a/manpages.patch
+++ /dev/null
@@ -1,848 +0,0 @@
-diff --git a/Makefile.in b/Makefile.in
-index 88ce384..019e394 100644
---- a/Makefile.in
-+++ b/Makefile.in
-@@ -57,7 +57,12 @@ src/mash/memcpyWrap.o : src/mash/memcpyWrap.c
- src/mash/capnp/MinHash.capnp.c++ src/mash/capnp/MinHash.capnp.h : src/mash/capnp/MinHash.capnp
- cd src/mash/capnp;export PATH=@capnp@/bin/:${PATH};capnp compile -I @capnp@/include -oc++ MinHash.capnp
-
--install : mash
-+.PHONY: install-man install
-+install-man:
-+ mkdir -p @prefix@/share/man/man1
-+ cp `pwd`/doc/man/*.1 @prefix@/share/man/man1
-+
-+install : mash install-man
- mkdir -p @prefix@/bin/
- mkdir -p @prefix@/lib/
- mkdir -p @prefix@/include/
-@@ -68,12 +73,15 @@ install : mash
- cp `pwd`/src/mash/*.h @prefix@/include/mash/
- cp `pwd`/src/mash/capnp/MinHash.capnp.h @prefix@/include/mash/capnp/
-
--.PHONY: uninstall
--uninstall:
-+.PHONY: uninstall uninstall-man
-+uninstall: uninstall-man
- rm -f @prefix@/bin/mash
- rm -f @prefix@/lib/libmash.a
- rm -rf @prefix@/include/mash
-
-+uninstall-man:
-+ rm -f @prefix@/share/man/man1/mash*.1
-+
- clean :
- -rm mash
- -rm libmash.a
-diff --git a/doc/man/mash-dist.1 b/doc/man/mash-dist.1
-new file mode 100644
-index 0000000..9f1ae60
---- /dev/null
-+++ b/doc/man/mash-dist.1
-@@ -0,0 +1,162 @@
-+'\" t
-+.\" Title: mash-dist
-+.\" Author: [see the "AUTHOR(S)" section]
-+.\" Generator: Asciidoctor 2.0.10
-+.\" Date: 2019-12-13
-+.\" Manual: \ \&
-+.\" Source: \ \&
-+.\" Language: English
-+.\"
-+.TH "MASH\-DIST" "1" "2019-12-13" "\ \&" "\ \&"
-+.ie \n(.g .ds Aq \(aq
-+.el .ds Aq '
-+.ss \n[.ss] 0
-+.nh
-+.ad l
-+.de URL
-+\fI\\$2\fP <\\$1>\\$3
-+..
-+.als MTO URL
-+.if \n[.g] \{\
-+. mso www.tmac
-+. am URL
-+. ad l
-+. .
-+. am MTO
-+. ad l
-+. .
-+. LINKSTYLE blue R < >
-+.\}
-+.SH "NAME"
-+mash\-dist \- estimate the distance of query sequences to references
-+.SH "SYNOPSIS"
-+.sp
-+\fBmash dist\fP [options] <reference> <query> [<query>] ...
-+.SH "DESCRIPTION"
-+.sp
-+Estimate the distance of each query sequence to the reference. Both the
-+reference and queries can be fasta or fastq, gzipped or not, or Mash sketch
-+files (.msh) with matching k\-mer sizes. Query files can also be files of file
-+names (see \fB\-l\fP). Whole files are compared by default (see \fB\-i\fP). The output
-+fields are [reference\-ID, query\-ID, distance, p\-value, shared\-hashes].
-+.SH "OPTIONS"
-+.sp
-+\fB\-h\fP
-+.RS 4
-+Help
-+.RE
-+.sp
-+\fB\-p\fP <int>
-+.RS 4
-+Parallelism. This many threads will be spawned for processing. [1]
-+.RE
-+.SS "Input"
-+.sp
-+\fB\-l\fP
-+.RS 4
-+List input. Each query file contains a list of sequence files, one
-+per line. The reference file is not affected.
-+.RE
-+.SS "Output"
-+.sp
-+\fB\-t\fP
-+.RS 4
-+Table output (will not report p\-values, but fields will be blank if
-+they do not meet the p\-value threshold).
-+.RE
-+.sp
-+\fB\-v\fP <num>
-+.RS 4
-+Maximum p\-value to report. (0\-1) [1.0]
-+.RE
-+.sp
-+\fB\-d\fP <num>
-+.RS 4
-+Maximum distance to report. (0\-1) [1.0]
-+.RE
-+.SS "Sketching"
-+.sp
-+\fB\-k\fP <int>
-+.RS 4
-+K\-mer size. Hashes will be based on strings of this many
-+nucleotides. Canonical nucleotides are used by default (see
-+Alphabet options below). (1\-32) [21]
-+.RE
-+.sp
-+\fB\-s\fP <int>
-+.RS 4
-+Sketch size. Each sketch will have at most this many non\-redundant
-+min\-hashes. [1000]
-+.RE
-+.sp
-+\fB\-i\fP
-+.RS 4
-+Sketch individual sequences, rather than whole files.
-+.RE
-+.sp
-+\fB\-w\fP <num>
-+.RS 4
-+Probability threshold for warning about low k\-mer size. (0\-1) [0.01]
-+.RE
-+.sp
-+\fB\-r\fP
-+.RS 4
-+Input is a read set. See Reads options below. Incompatible with \fB\-i\fP.
-+.RE
-+.SS "Sketching (reads)"
-+.sp
-+\fB\-b\fP <size>
-+.RS 4
-+Use a Bloom filter of this size (raw bytes or with K/M/G/T) to
-+filter out unique k\-mers. This is useful if exact filtering with \fB\-m\fP
-+uses too much memory. However, some unique k\-mers may pass
-+erroneously, and copies cannot be counted beyond 2. Implies \fB\-r\fP.
-+.RE
-+.sp
-+\fB\-m\fP <int>
-+.RS 4
-+Minimum copies of each k\-mer required to pass noise filter for
-+reads. Implies \fB\-r\fP. [1]
-+.RE
-+.sp
-+\fB\-c\fP <num>
-+.RS 4
-+Target coverage. Sketching will conclude if this coverage is
-+reached before the end of the input file (estimated by average
-+k\-mer multiplicity). Implies \fB\-r\fP.
-+.RE
-+.sp
-+\fB\-g\fP <size>
-+.RS 4
-+Genome size. If specified, will be used for p\-value calculation
-+instead of an estimated size from k\-mer content. Implies \fB\-r\fP.
-+.RE
-+.SS "Sketching (alphabet)"
-+.sp
-+\fB\-n\fP
-+.RS 4
-+Preserve strand (by default, strand is ignored by using canonical
-+DNA k\-mers, which are alphabetical minima of forward\-reverse
-+pairs). Implied if an alphabet is specified with \fB\-a\fP or \fB\-z\fP.
-+.RE
-+.sp
-+\fB\-a\fP
-+.RS 4
-+Use amino acid alphabet (A\-Z, except BJOUXZ). Implies \fB\-n\fP, \fB\-k\fP 9.
-+.RE
-+.sp
-+\fB\-z\fP <text>
-+.RS 4
-+Alphabet to base hashes on (case ignored by default; see \fB\-Z\fP).
-+K\-mers with other characters will be ignored. Implies \fB\-n\fP.
-+.RE
-+.sp
-+\fB\-Z\fP
-+.RS 4
-+Preserve case in k\-mers and alphabet (case is ignored by default).
-+Sequence letters whose case is not in the current alphabet will be
-+skipped when sketching.
-+.RE
-+.SH "SEE ALSO"
-+.sp
-+mash(1)
-\ No newline at end of file
-diff --git a/doc/man/mash-info.1 b/doc/man/mash-info.1
-new file mode 100644
-index 0000000..25eb2c1
---- /dev/null
-+++ b/doc/man/mash-info.1
-@@ -0,0 +1,69 @@
-+'\" t
-+.\" Title: mash-info
-+.\" Author: [see the "AUTHOR(S)" section]
-+.\" Generator: Asciidoctor 2.0.10
-+.\" Date: 2019-12-13
-+.\" Manual: \ \&
-+.\" Source: \ \&
-+.\" Language: English
-+.\"
-+.TH "MASH\-INFO" "1" "2019-12-13" "\ \&" "\ \&"
-+.ie \n(.g .ds Aq \(aq
-+.el .ds Aq '
-+.ss \n[.ss] 0
-+.nh
-+.ad l
-+.de URL
-+\fI\\$2\fP <\\$1>\\$3
-+..
-+.als MTO URL
-+.if \n[.g] \{\
-+. mso www.tmac
-+. am URL
-+. ad l
-+. .
-+. am MTO
-+. ad l
-+. .
-+. LINKSTYLE blue R < >
-+.\}
-+.SH "NAME"
-+mash\-info \- display information about sketch files
-+.SH "SYNOPSIS"
-+.sp
-+\fBmash info\fP [options] <sketch>
-+.SH "DESCRIPTION"
-+.sp
-+Displays information about sketch files.
-+.SH "OPTIONS"
-+.sp
-+\fB\-h\fP
-+.RS 4
-+Help
-+.RE
-+.sp
-+\fB\-H\fP
-+.RS 4
-+Only show header info. Do not list each sketch. Incompatible with \fB\-t\fP
-+and \fB\-c\fP.
-+.RE
-+.sp
-+\fB\-t\fP
-+.RS 4
-+Tabular output (rather than padded), with no header. Incompatible with
-+\fB\-H\fP and \fB\-c\fP.
-+.RE
-+.sp
-+\fB\-c\fP
-+.RS 4
-+Show hash count histograms for each sketch. Incompatible with \fB\-H\fP and
-+\fB\-t\fP.
-+.RE
-+.sp
-+\fB\-d\fP
-+.RS 4
-+Dump sketches in JSON format. Incompatible with \fB\-H\fP, \fB\-t\fP, and \fB\-c\fP.
-+.RE
-+.SH "SEE ALSO"
-+.sp
-+mash(1)
-\ No newline at end of file
-diff --git a/doc/man/mash-paste.1 b/doc/man/mash-paste.1
-new file mode 100644
-index 0000000..740e5ed
---- /dev/null
-+++ b/doc/man/mash-paste.1
-@@ -0,0 +1,51 @@
-+'\" t
-+.\" Title: mash-paste
-+.\" Author: [see the "AUTHOR(S)" section]
-+.\" Generator: Asciidoctor 2.0.10
-+.\" Date: 2019-12-13
-+.\" Manual: \ \&
-+.\" Source: \ \&
-+.\" Language: English
-+.\"
-+.TH "MASH\-PASTE" "1" "2019-12-13" "\ \&" "\ \&"
-+.ie \n(.g .ds Aq \(aq
-+.el .ds Aq '
-+.ss \n[.ss] 0
-+.nh
-+.ad l
-+.de URL
-+\fI\\$2\fP <\\$1>\\$3
-+..
-+.als MTO URL
-+.if \n[.g] \{\
-+. mso www.tmac
-+. am URL
-+. ad l
-+. .
-+. am MTO
-+. ad l
-+. .
-+. LINKSTYLE blue R < >
-+.\}
-+.SH "NAME"
-+mash\-paste \- create a single sketch file from multiple sketch files
-+.SH "SYNOPSIS"
-+.sp
-+\fBmash paste\fP [options] <out_prefix> <sketch> [<sketch>] ...
-+.SH "DESCRIPTION"
-+.sp
-+Create a single sketch file from multiple sketch files.
-+.SH "OPTIONS"
-+.sp
-+\fB\-h\fP
-+.RS 4
-+Help
-+.RE
-+.sp
-+\fB\-l\fP
-+.RS 4
-+Input files are lists of file names.
-+.RE
-+.SH "SEE ALSO"
-+.sp
-+mash(1)
-\ No newline at end of file
-diff --git a/doc/man/mash-screen.1 b/doc/man/mash-screen.1
-new file mode 100644
-index 0000000..afd7874
---- /dev/null
-+++ b/doc/man/mash-screen.1
-@@ -0,0 +1,81 @@
-+'\" t
-+.\" Title: mash-screen
-+.\" Author: [see the "AUTHOR(S)" section]
-+.\" Generator: Asciidoctor 2.0.10
-+.\" Date: 2019-12-13
-+.\" Manual: \ \&
-+.\" Source: \ \&
-+.\" Language: English
-+.\"
-+.TH "MASH\-SCREEN" "1" "2019-12-13" "\ \&" "\ \&"
-+.ie \n(.g .ds Aq \(aq
-+.el .ds Aq '
-+.ss \n[.ss] 0
-+.nh
-+.ad l
-+.de URL
-+\fI\\$2\fP <\\$1>\\$3
-+..
-+.als MTO URL
-+.if \n[.g] \{\
-+. mso www.tmac
-+. am URL
-+. ad l
-+. .
-+. am MTO
-+. ad l
-+. .
-+. LINKSTYLE blue R < >
-+.\}
-+.SH "NAME"
-+mash\-screen \- determine whether query sequences are within a larger pool of sequences
-+.SH "SYNOPSIS"
-+.sp
-+\fBmash screen\fP [options] <queries>.msh <pool> [<pool>] ...
-+.SH "DESCRIPTION"
-+.sp
-+Determine how well query sequences are contained within a pool of sequences.
-+The queries must be formatted as a single Mash sketch file (.msh), created
-+with the \f(CRmash sketch\fP command. The <pool> files can be contigs or reads, in
-+fasta or fastq, gzipped or not, and "\-" can be given for <pool> to read from
-+standard input. The <pool> sequences are assumed to be nucleotides, and will
-+be 6\-frame translated if the <queries> are amino acids. The output fields are
-+[identity, shared\-hashes, median\-multiplicity, p\-value, query\-ID, query\-comment],
-+where median\-multiplicity is computed for shared hashes, based on the number of
-+observations of those hashes within the pool.
-+.SH "OPTIONS"
-+.sp
-+\fB\-h\fP
-+.RS 4
-+Help
-+.RE
-+.sp
-+\fB\-p\fP <int>
-+.RS 4
-+Parallelism. This many threads will be spawned for processing.
-+.RE
-+.sp
-+\fB\-w\fP
-+.RS 4
-+Winner\-takes\-all strategy for identity estimates. After counting
-+hashes for each query, hashes that appear in multiple queries will
-+be removed from all except the one with the best identity (ties
-+broken by larger query), and other identities will be reduced. This
-+removes output redundancy, providing a rough compositional outline.
-+.RE
-+.SS "Output"
-+.sp
-+\fB\-i\fP <num>
-+.RS 4
-+Minimum identity to report. Inclusive unless set to zero, in which
-+case only identities greater than zero (i.e. with at least one
-+shared hash) will be reported. Set to \-1 to output everything.
-+.RE
-+.sp
-+\fB\-v\fP <num>
-+.RS 4
-+Maximum p\-value to report.
-+.RE
-+.SH "SEE ALSO"
-+.sp
-+mash(1)
-\ No newline at end of file
-diff --git a/doc/man/mash-sketch.1 b/doc/man/mash-sketch.1
-new file mode 100644
-index 0000000..96c329a
---- /dev/null
-+++ b/doc/man/mash-sketch.1
-@@ -0,0 +1,154 @@
-+'\" t
-+.\" Title: mash-sketch
-+.\" Author: [see the "AUTHOR(S)" section]
-+.\" Generator: Asciidoctor 2.0.10
-+.\" Date: 2019-12-13
-+.\" Manual: \ \&
-+.\" Source: \ \&
-+.\" Language: English
-+.\"
-+.TH "MASH\-SKETCH" "1" "2019-12-13" "\ \&" "\ \&"
-+.ie \n(.g .ds Aq \(aq
-+.el .ds Aq '
-+.ss \n[.ss] 0
-+.nh
-+.ad l
-+.de URL
-+\fI\\$2\fP <\\$1>\\$3
-+..
-+.als MTO URL
-+.if \n[.g] \{\
-+. mso www.tmac
-+. am URL
-+. ad l
-+. .
-+. am MTO
-+. ad l
-+. .
-+. LINKSTYLE blue R < >
-+.\}
-+.SH "NAME"
-+mash\-sketch \- create sketches (reduced representations for fast operations)
-+.SH "SYNOPSIS"
-+.sp
-+\fBmash sketch\fP [options] fast(a|q)[.gz] ...
-+.SH "DESCRIPTION"
-+.sp
-+Create a sketch file, which is a reduced representation of a sequence or set
-+of sequences (based on min\-hashes) that can be used for fast distance
-+estimations. Input can be fasta or fastq files (gzipped or not), and "\-" can
-+be given to read from standard input. Input files can also be files of file
-+names (see \fB\-l\fP). For output, one sketch file will be generated, but it can have
-+multiple sketches within it, divided by sequences or files (see \fB\-i\fP). By
-+default, the output file name will be the first input file with a \(aq.msh\(aq
-+extension, or \(aqstdin.msh\(aq if standard input is used (see \fB\-o\fP).
-+.SH "OPTIONS"
-+.sp
-+\fB\-h\fP
-+.RS 4
-+Help
-+.RE
-+.sp
-+\fB\-p\fP <int>
-+.RS 4
-+Parallelism. This many threads will be spawned for processing. [1]
-+.RE
-+.SS "Input"
-+.sp
-+\fB\-l\fP
-+.RS 4
-+List input. Each file contains a list of sequence files, one per line.
-+.RE
-+.SS "Output"
-+.sp
-+\fB\-o\fP <path>
-+.RS 4
-+Output prefix (first input file used if unspecified). The suffix
-+\(aq.msh\(aq will be appended.
-+.RE
-+.SS "Sketching"
-+.sp
-+\fB\-k\fP <int>
-+.RS 4
-+K\-mer size. Hashes will be based on strings of this many
-+nucleotides. Canonical nucleotides are used by default (see
-+Alphabet options below). (1\-32) [21]
-+.RE
-+.sp
-+\fB\-s\fP <int>
-+.RS 4
-+Sketch size. Each sketch will have at most this many non\-redundant
-+min\-hashes. [1000]
-+.RE
-+.sp
-+\fB\-i\fP
-+.RS 4
-+Sketch individual sequences, rather than whole files.
-+.RE
-+.sp
-+\fB\-w\fP <num>
-+.RS 4
-+Probability threshold for warning about low k\-mer size. (0\-1) [0.01]
-+.RE
-+.sp
-+\fB\-r\fP
-+.RS 4
-+Input is a read set. See Reads options below. Incompatible with \fB\-i\fP.
-+.RE
-+.SS "Sketching (reads)"
-+.sp
-+\fB\-b\fP <size>
-+.RS 4
-+Use a Bloom filter of this size (raw bytes or with K/M/G/T) to
-+filter out unique k\-mers. This is useful if exact filtering with \fB\-m\fP
-+uses too much memory. However, some unique k\-mers may pass
-+erroneously, and copies cannot be counted beyond 2. Implies \fB\-r\fP.
-+.RE
-+.sp
-+\fB\-m\fP <int>
-+.RS 4
-+Minimum copies of each k\-mer required to pass noise filter for
-+reads. Implies \fB\-r\fP. [1]
-+.RE
-+.sp
-+\fB\-c\fP <num>
-+.RS 4
-+Target coverage. Sketching will conclude if this coverage is
-+reached before the end of the input file (estimated by average
-+k\-mer multiplicity). Implies \fB\-r\fP.
-+.RE
-+.sp
-+\fB\-g\fP <size>
-+.RS 4
-+Genome size. If specified, will be used for p\-value calculation
-+instead of an estimated size from k\-mer content. Implies \fB\-r\fP.
-+.RE
-+.SS "Sketching (alphabet)"
-+.sp
-+\fB\-n\fP
-+.RS 4
-+Preserve strand (by default, strand is ignored by using canonical
-+DNA k\-mers, which are alphabetical minima of forward\-reverse
-+pairs). Implied if an alphabet is specified with \fB\-a\fP or \fB\-z\fP.
-+.RE
-+.sp
-+\fB\-a\fP
-+.RS 4
-+Use amino acid alphabet (A\-Z, except BJOUXZ). Implies \fB\-n\fP, \fB\-k\fP 9.
-+.RE
-+.sp
-+\fB\-z\fP <text>
-+.RS 4
-+Alphabet to base hashes on (case ignored by default; see \fB\-Z\fP).
-+K\-mers with other characters will be ignored. Implies \fB\-n\fP.
-+.RE
-+.sp
-+\fB\-Z\fP
-+.RS 4
-+Preserve case in k\-mers and alphabet (case is ignored by default).
-+Sequence letters whose case is not in the current alphabet will be
-+skipped when sketching.
-+.RE
-+.SH "SEE ALSO"
-+.sp
-+mash(1)
-\ No newline at end of file
-diff --git a/doc/man/mash-triangle.1 b/doc/man/mash-triangle.1
-new file mode 100644
-index 0000000..a20e027
---- /dev/null
-+++ b/doc/man/mash-triangle.1
-@@ -0,0 +1,169 @@
-+'\" t
-+.\" Title: mash-triangle
-+.\" Author: [see the "AUTHOR(S)" section]
-+.\" Generator: Asciidoctor 2.0.10
-+.\" Date: 2019-12-13
-+.\" Manual: \ \&
-+.\" Source: \ \&
-+.\" Language: English
-+.\"
-+.TH "MASH\-TRIANGLE" "1" "2019-12-13" "\ \&" "\ \&"
-+.ie \n(.g .ds Aq \(aq
-+.el .ds Aq '
-+.ss \n[.ss] 0
-+.nh
-+.ad l
-+.de URL
-+\fI\\$2\fP <\\$1>\\$3
-+..
-+.als MTO URL
-+.if \n[.g] \{\
-+. mso www.tmac
-+. am URL
-+. ad l
-+. .
-+. am MTO
-+. ad l
-+. .
-+. LINKSTYLE blue R < >
-+.\}
-+.SH "NAME"
-+mash\-triangle \- estimate a lower\-triangular distance matrix
-+.SH "SYNOPSIS"
-+.sp
-+\fBmash triangle\fP [options] <seq1> [<seq2>] ...
-+.SH "DESCRIPTION"
-+.sp
-+Estimate the distance of each input sequence to every other input
-+sequence. Outputs a lower\-triangular distance matrix in relaxed Phylip
-+format. The input sequences can be fasta or fastq, gzipped or not, or
-+Mash sketch files (.msh) with matching k\-mer sizes. Input files can also
-+be files of file names (see \-l). If more than one input file is provided,
-+whole files are compared by default (see \-i).
-+.SH "OPTIONS"
-+.sp
-+\fB\-h\fP
-+.RS 4
-+Help
-+.RE
-+.sp
-+\fB\-p\fP <int>
-+.RS 4
-+Parallelism. This many threads will be spawned for processing. [1]
-+.RE
-+.SS "Input"
-+.sp
-+\fB\-l\fP
-+.RS 4
-+List input. Each query file contains a list of sequence files, one
-+per line. The reference file is not affected.
-+.RE
-+.SS "Output"
-+.sp
-+\fB\-C\fP
-+.RS 4
-+Use comment fields for sequence names instead of IDs.
-+.RE
-+.sp
-+\fB\-E\fP
-+.RS 4
-+Output edge list instead of Phylip matrix, with fields [seq1, seq2,
-+dist, p\-val, shared\-hashes].
-+.RE
-+.sp
-+\fB\-v\fP <num>
-+.RS 4
-+Maximum p\-value to report in edge list. Implies \-E. (0\-1) [1.0]
-+.RE
-+.sp
-+\fB\-d\fP <num>
-+.RS 4
-+Maximum distance to report in edge list. Implies \-E. (0\-1) [1.0]
-+.RE
-+.SS "Sketching"
-+.sp
-+\fB\-k\fP <int>
-+.RS 4
-+K\-mer size. Hashes will be based on strings of this many
-+nucleotides. Canonical nucleotides are used by default (see
-+Alphabet options below). (1\-32) [21]
-+.RE
-+.sp
-+\fB\-s\fP <int>
-+.RS 4
-+Sketch size. Each sketch will have at most this many non\-redundant
-+min\-hashes. [1000]
-+.RE
-+.sp
-+\fB\-i\fP
-+.RS 4
-+Sketch individual sequences, rather than whole files, e.g. for
-+multi\-fastas of single\-chromosome genomes or pair\-wise gene comparisons.
-+.RE
-+.sp
-+\fB\-w\fP <num>
-+.RS 4
-+Probability threshold for warning about low k\-mer size. (0\-1) [0.01]
-+.RE
-+.sp
-+\fB\-r\fP
-+.RS 4
-+Input is a read set. See Reads options below. Incompatible with \fB\-i\fP.
-+.RE
-+.SS "Sketching (reads)"
-+.sp
-+\fB\-b\fP <size>
-+.RS 4
-+Use a Bloom filter of this size (raw bytes or with K/M/G/T) to
-+filter out unique k\-mers. This is useful if exact filtering with \fB\-m\fP
-+uses too much memory. However, some unique k\-mers may pass
-+erroneously, and copies cannot be counted beyond 2. Implies \fB\-r\fP.
-+.RE
-+.sp
-+\fB\-m\fP <int>
-+.RS 4
-+Minimum copies of each k\-mer required to pass noise filter for
-+reads. Implies \fB\-r\fP. [1]
-+.RE
-+.sp
-+\fB\-c\fP <num>
-+.RS 4
-+Target coverage. Sketching will conclude if this coverage is
-+reached before the end of the input file (estimated by average
-+k\-mer multiplicity). Implies \fB\-r\fP.
-+.RE
-+.sp
-+\fB\-g\fP <size>
-+.RS 4
-+Genome size. If specified, will be used for p\-value calculation
-+instead of an estimated size from k\-mer content. Implies \fB\-r\fP.
-+.RE
-+.SS "Sketching (alphabet)"
-+.sp
-+\fB\-n\fP
-+.RS 4
-+Preserve strand (by default, strand is ignored by using canonical
-+DNA k\-mers, which are alphabetical minima of forward\-reverse
-+pairs). Implied if an alphabet is specified with \fB\-a\fP or \fB\-z\fP.
-+.RE
-+.sp
-+\fB\-a\fP
-+.RS 4
-+Use amino acid alphabet (A\-Z, except BJOUXZ). Implies \fB\-n\fP, \fB\-k\fP 9.
-+.RE
-+.sp
-+\fB\-z\fP <text>
-+.RS 4
-+Alphabet to base hashes on (case ignored by default; see \fB\-Z\fP).
-+K\-mers with other characters will be ignored. Implies \fB\-n\fP.
-+.RE
-+.sp
-+\fB\-Z\fP
-+.RS 4
-+Preserve case in k\-mers and alphabet (case is ignored by default).
-+Sequence letters whose case is not in the current alphabet will be
-+skipped when sketching.
-+.RE
-+.SH "SEE ALSO"
-+.sp
-+mash(1)
-\ No newline at end of file
-diff --git a/doc/man/mash.1 b/doc/man/mash.1
-new file mode 100644
-index 0000000..b5e6d75
---- /dev/null
-+++ b/doc/man/mash.1
-@@ -0,0 +1,77 @@
-+'\" t
-+.\" Title: mash
-+.\" Author: [see the "AUTHOR(S)" section]
-+.\" Generator: Asciidoctor 2.0.10
-+.\" Date: 2019-12-13
-+.\" Manual: \ \&
-+.\" Source: \ \&
-+.\" Language: English
-+.\"
-+.TH "MASH" "1" "2019-12-13" "\ \&" "\ \&"
-+.ie \n(.g .ds Aq \(aq
-+.el .ds Aq '
-+.ss \n[.ss] 0
-+.nh
-+.ad l
-+.de URL
-+\fI\\$2\fP <\\$1>\\$3
-+..
-+.als MTO URL
-+.if \n[.g] \{\
-+. mso www.tmac
-+. am URL
-+. ad l
-+. .
-+. am MTO
-+. ad l
-+. .
-+. LINKSTYLE blue R < >
-+.\}
-+.SH "NAME"
-+mash \- fast genome and metagenome distance estimation using MinHash
-+.SH "SYNOPSIS"
-+.sp
-+\fBmash\fP <command> [options] [arguments ...]
-+.SH "DESCRIPTION"
-+.sp
-+\fBmash\fP is the main executable for the \fBMash\fP software. The actual
-+functionality is provided by the subtools (\(aqcommands\(aq):
-+.SS "Commands"
-+.sp
-+\fBbounds\fP
-+.RS 4
-+Print a table of Mash error bounds.
-+.RE
-+.sp
-+\fBdist\fP
-+.RS 4
-+Estimate the distance of query sequences to references.
-+.RE
-+.sp
-+\fBinfo\fP
-+.RS 4
-+Display information about sketch files.
-+.RE
-+.sp
-+\fBpaste\fP
-+.RS 4
-+Create a single sketch file from multiple sketch files.
-+.RE
-+.sp
-+\fBscreen\fP
-+.RS 4
-+Determine whether query sequences are within a larger pool of sequences.
-+.RE
-+.sp
-+\fBsketch\fP
-+.RS 4
-+Create sketches (reduced representations for fast operations).
-+.RE
-+.sp
-+\fBtriangle\fP
-+.RS 4
-+Estimate a lower\-triangular distance matrix.
-+.RE
-+.SH "SEE ALSO"
-+.sp
-+mash\-dist(1), mash\-info(1), mash\-paste(1), mash\-screen(1), mash\-sketch(1), mash\-triangle(1)
-\ No newline at end of file