summarylogtreecommitdiffstats
diff options
context:
space:
mode:
authorFabian Klötzl2020-03-31 15:27:53 +0200
committerFabian Klötzl2020-03-31 15:27:53 +0200
commit353331833b52e72495fc374ee8d2f3aec1fe208f (patch)
tree904b423cbb09c69d1f732be581eeec12f3ff38f7
parent6e58aa6e37adc1513e301b8ccc3ae7e72b3eae11 (diff)
downloadaur-353331833b52e72495fc374ee8d2f3aec1fe208f.tar.gz
add manpages
-rw-r--r--.SRCINFO6
-rw-r--r--PKGBUILD11
-rw-r--r--faster-revcomp.patch59
-rw-r--r--manpages.patch848
4 files changed, 920 insertions, 4 deletions
diff --git a/.SRCINFO b/.SRCINFO
index 76eac7090ab6..36698496bd8d 100644
--- a/.SRCINFO
+++ b/.SRCINFO
@@ -1,15 +1,19 @@
pkgbase = mash
pkgdesc = Fast genome and metagenome distance estimation using MinHash
pkgver = 2.2.2
- pkgrel = 1
+ pkgrel = 2
url = https://github.com/marbl/Mash/
arch = x86_64
license = BSD
makedepends = capnproto
source = https://github.com/marbl/Mash/archive/v2.2.2.tar.gz
source = dynamic-capnp.patch
+ source = faster-revcomp.patch
+ source = manpages.patch
sha256sums = e4c2d702fd0254f689256b2d8f7d3cc3a68db3ea45b60f0a662ce926a4f5fc22
sha256sums = 61cd860e66e57f6cc3dac317cb19665263aaa1de9b8c487cb9133ccde2388d92
+ sha256sums = d363504438f8e6472063bb6ded7f43c8e895e2ca5de279aec01b19a82503b68b
+ sha256sums = afd4263820301de7a2eeea3c8f5dbbce838834d34de8dbafffdd0f2c7624f7ae
pkgname = mash
diff --git a/PKGBUILD b/PKGBUILD
index 09cd856591db..5a6bde752bac 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -1,19 +1,24 @@
# Maintainer: Fabian Klötzl <fabian-aur@kloetzl.info>
pkgname=mash
pkgver=2.2.2
-pkgrel=1
+pkgrel=2
pkgdesc="Fast genome and metagenome distance estimation using MinHash"
url="https://github.com/marbl/Mash/"
license=("BSD")
arch=("x86_64")
makedepends=("capnproto")
-source=("https://github.com/marbl/Mash/archive/v${pkgver}.tar.gz" "dynamic-capnp.patch")
+source=("https://github.com/marbl/Mash/archive/v${pkgver}.tar.gz"
+ "dynamic-capnp.patch" "faster-revcomp.patch" "manpages.patch")
sha256sums=('e4c2d702fd0254f689256b2d8f7d3cc3a68db3ea45b60f0a662ce926a4f5fc22'
- '61cd860e66e57f6cc3dac317cb19665263aaa1de9b8c487cb9133ccde2388d92')
+ '61cd860e66e57f6cc3dac317cb19665263aaa1de9b8c487cb9133ccde2388d92'
+ 'd363504438f8e6472063bb6ded7f43c8e895e2ca5de279aec01b19a82503b68b'
+ 'afd4263820301de7a2eeea3c8f5dbbce838834d34de8dbafffdd0f2c7624f7ae')
prepare() {
cd "Mash-${pkgver}"
patch -R -p1 -i ../../dynamic-capnp.patch
+ patch -p1 -i ../../faster-revcomp.patch
+ patch -p1 -i ../../manpages.patch
}
check() {
diff --git a/faster-revcomp.patch b/faster-revcomp.patch
new file mode 100644
index 000000000000..96f7241c7157
--- /dev/null
+++ b/faster-revcomp.patch
@@ -0,0 +1,59 @@
+diff --git a/src/mash/Sketch.cpp b/src/mash/Sketch.cpp
+index b2329fa..a15d769 100644
+--- a/src/mash/Sketch.cpp
++++ b/src/mash/Sketch.cpp
+@@ -1061,22 +1061,42 @@ Sketch::SketchOutput * loadCapnp(Sketch::SketchInput * input)
+ return output;
+ }
+
++
++/* Array from 0..25 of DNA complement of A..Z */
++const char complement[] = {
++ 'T', // 'A' = A
++ 'V', // 'B' = not A = C,T,G
++ 'G', // 'C' = C
++ 'H', // 'D' = not C = A,T,G
++ 'N', // 'E' = .
++ 'N', // 'F' = .
++ 'C', // 'G' = G
++ 'D', // 'H' = not G = A,C,T
++ 'N', // 'I' = .
++ 'N', // 'J' = .
++ 'M', // 'K' = T,G = Keto
++ 'N', // 'L' = .
++ 'K', // 'M' = A,C = Amino
++ 'N', // 'N' = A,C,T,G = uNkNowN
++ 'N', // 'O' = .
++ 'N', // 'P' = .
++ 'N', // 'Q' = .
++ 'Y', // 'R' = A,G = puRine
++ 'S', // 'S' = G,C = Strong
++ 'A', // 'T' = T
++ 'A', // 'U' = T (RNA)
++ 'B', // 'V' = not T = A,C,G
++ 'W', // 'W' = A,T = Weak
++ 'N', // 'X' = .
++ 'R', // 'Y' = pYrimidine = C,T
++ 'N', // 'Z' = .
++};
++
+ void reverseComplement(const char * src, char * dest, int length)
+ {
+ for ( int i = 0; i < length; i++ )
+ {
+- char base = src[i];
+-
+- switch ( base )
+- {
+- case 'A': base = 'T'; break;
+- case 'C': base = 'G'; break;
+- case 'G': base = 'C'; break;
+- case 'T': base = 'A'; break;
+- default: break;
+- }
+-
+- dest[length - i - 1] = base;
++ dest[i] = complement[ (int) src[length-i-1] - (int) 'A' ];
+ }
+ }
+
diff --git a/manpages.patch b/manpages.patch
new file mode 100644
index 000000000000..35dd24868a01
--- /dev/null
+++ b/manpages.patch
@@ -0,0 +1,848 @@
+diff --git a/Makefile.in b/Makefile.in
+index 88ce384..019e394 100644
+--- a/Makefile.in
++++ b/Makefile.in
+@@ -57,7 +57,12 @@ src/mash/memcpyWrap.o : src/mash/memcpyWrap.c
+ src/mash/capnp/MinHash.capnp.c++ src/mash/capnp/MinHash.capnp.h : src/mash/capnp/MinHash.capnp
+ cd src/mash/capnp;export PATH=@capnp@/bin/:${PATH};capnp compile -I @capnp@/include -oc++ MinHash.capnp
+
+-install : mash
++.PHONY: install-man install
++install-man:
++ mkdir -p @prefix@/share/man/man1
++ cp `pwd`/doc/man/*.1 @prefix@/share/man/man1
++
++install : mash install-man
+ mkdir -p @prefix@/bin/
+ mkdir -p @prefix@/lib/
+ mkdir -p @prefix@/include/
+@@ -68,12 +73,15 @@ install : mash
+ cp `pwd`/src/mash/*.h @prefix@/include/mash/
+ cp `pwd`/src/mash/capnp/MinHash.capnp.h @prefix@/include/mash/capnp/
+
+-.PHONY: uninstall
+-uninstall:
++.PHONY: uninstall uninstall-man
++uninstall: uninstall-man
+ rm -f @prefix@/bin/mash
+ rm -f @prefix@/lib/libmash.a
+ rm -rf @prefix@/include/mash
+
++uninstall-man:
++ rm -f @prefix@/share/man/man1/mash*.1
++
+ clean :
+ -rm mash
+ -rm libmash.a
+diff --git a/doc/man/mash-dist.1 b/doc/man/mash-dist.1
+new file mode 100644
+index 0000000..9f1ae60
+--- /dev/null
++++ b/doc/man/mash-dist.1
+@@ -0,0 +1,162 @@
++'\" t
++.\" Title: mash-dist
++.\" Author: [see the "AUTHOR(S)" section]
++.\" Generator: Asciidoctor 2.0.10
++.\" Date: 2019-12-13
++.\" Manual: \ \&
++.\" Source: \ \&
++.\" Language: English
++.\"
++.TH "MASH\-DIST" "1" "2019-12-13" "\ \&" "\ \&"
++.ie \n(.g .ds Aq \(aq
++.el .ds Aq '
++.ss \n[.ss] 0
++.nh
++.ad l
++.de URL
++\fI\\$2\fP <\\$1>\\$3
++..
++.als MTO URL
++.if \n[.g] \{\
++. mso www.tmac
++. am URL
++. ad l
++. .
++. am MTO
++. ad l
++. .
++. LINKSTYLE blue R < >
++.\}
++.SH "NAME"
++mash\-dist \- estimate the distance of query sequences to references
++.SH "SYNOPSIS"
++.sp
++\fBmash dist\fP [options] <reference> <query> [<query>] ...
++.SH "DESCRIPTION"
++.sp
++Estimate the distance of each query sequence to the reference. Both the
++reference and queries can be fasta or fastq, gzipped or not, or Mash sketch
++files (.msh) with matching k\-mer sizes. Query files can also be files of file
++names (see \fB\-l\fP). Whole files are compared by default (see \fB\-i\fP). The output
++fields are [reference\-ID, query\-ID, distance, p\-value, shared\-hashes].
++.SH "OPTIONS"
++.sp
++\fB\-h\fP
++.RS 4
++Help
++.RE
++.sp
++\fB\-p\fP <int>
++.RS 4
++Parallelism. This many threads will be spawned for processing. [1]
++.RE
++.SS "Input"
++.sp
++\fB\-l\fP
++.RS 4
++List input. Each query file contains a list of sequence files, one
++per line. The reference file is not affected.
++.RE
++.SS "Output"
++.sp
++\fB\-t\fP
++.RS 4
++Table output (will not report p\-values, but fields will be blank if
++they do not meet the p\-value threshold).
++.RE
++.sp
++\fB\-v\fP <num>
++.RS 4
++Maximum p\-value to report. (0\-1) [1.0]
++.RE
++.sp
++\fB\-d\fP <num>
++.RS 4
++Maximum distance to report. (0\-1) [1.0]
++.RE
++.SS "Sketching"
++.sp
++\fB\-k\fP <int>
++.RS 4
++K\-mer size. Hashes will be based on strings of this many
++nucleotides. Canonical nucleotides are used by default (see
++Alphabet options below). (1\-32) [21]
++.RE
++.sp
++\fB\-s\fP <int>
++.RS 4
++Sketch size. Each sketch will have at most this many non\-redundant
++min\-hashes. [1000]
++.RE
++.sp
++\fB\-i\fP
++.RS 4
++Sketch individual sequences, rather than whole files.
++.RE
++.sp
++\fB\-w\fP <num>
++.RS 4
++Probability threshold for warning about low k\-mer size. (0\-1) [0.01]
++.RE
++.sp
++\fB\-r\fP
++.RS 4
++Input is a read set. See Reads options below. Incompatible with \fB\-i\fP.
++.RE
++.SS "Sketching (reads)"
++.sp
++\fB\-b\fP <size>
++.RS 4
++Use a Bloom filter of this size (raw bytes or with K/M/G/T) to
++filter out unique k\-mers. This is useful if exact filtering with \fB\-m\fP
++uses too much memory. However, some unique k\-mers may pass
++erroneously, and copies cannot be counted beyond 2. Implies \fB\-r\fP.
++.RE
++.sp
++\fB\-m\fP <int>
++.RS 4
++Minimum copies of each k\-mer required to pass noise filter for
++reads. Implies \fB\-r\fP. [1]
++.RE
++.sp
++\fB\-c\fP <num>
++.RS 4
++Target coverage. Sketching will conclude if this coverage is
++reached before the end of the input file (estimated by average
++k\-mer multiplicity). Implies \fB\-r\fP.
++.RE
++.sp
++\fB\-g\fP <size>
++.RS 4
++Genome size. If specified, will be used for p\-value calculation
++instead of an estimated size from k\-mer content. Implies \fB\-r\fP.
++.RE
++.SS "Sketching (alphabet)"
++.sp
++\fB\-n\fP
++.RS 4
++Preserve strand (by default, strand is ignored by using canonical
++DNA k\-mers, which are alphabetical minima of forward\-reverse
++pairs). Implied if an alphabet is specified with \fB\-a\fP or \fB\-z\fP.
++.RE
++.sp
++\fB\-a\fP
++.RS 4
++Use amino acid alphabet (A\-Z, except BJOUXZ). Implies \fB\-n\fP, \fB\-k\fP 9.
++.RE
++.sp
++\fB\-z\fP <text>
++.RS 4
++Alphabet to base hashes on (case ignored by default; see \fB\-Z\fP).
++K\-mers with other characters will be ignored. Implies \fB\-n\fP.
++.RE
++.sp
++\fB\-Z\fP
++.RS 4
++Preserve case in k\-mers and alphabet (case is ignored by default).
++Sequence letters whose case is not in the current alphabet will be
++skipped when sketching.
++.RE
++.SH "SEE ALSO"
++.sp
++mash(1)
+\ No newline at end of file
+diff --git a/doc/man/mash-info.1 b/doc/man/mash-info.1
+new file mode 100644
+index 0000000..25eb2c1
+--- /dev/null
++++ b/doc/man/mash-info.1
+@@ -0,0 +1,69 @@
++'\" t
++.\" Title: mash-info
++.\" Author: [see the "AUTHOR(S)" section]
++.\" Generator: Asciidoctor 2.0.10
++.\" Date: 2019-12-13
++.\" Manual: \ \&
++.\" Source: \ \&
++.\" Language: English
++.\"
++.TH "MASH\-INFO" "1" "2019-12-13" "\ \&" "\ \&"
++.ie \n(.g .ds Aq \(aq
++.el .ds Aq '
++.ss \n[.ss] 0
++.nh
++.ad l
++.de URL
++\fI\\$2\fP <\\$1>\\$3
++..
++.als MTO URL
++.if \n[.g] \{\
++. mso www.tmac
++. am URL
++. ad l
++. .
++. am MTO
++. ad l
++. .
++. LINKSTYLE blue R < >
++.\}
++.SH "NAME"
++mash\-info \- display information about sketch files
++.SH "SYNOPSIS"
++.sp
++\fBmash info\fP [options] <sketch>
++.SH "DESCRIPTION"
++.sp
++Displays information about sketch files.
++.SH "OPTIONS"
++.sp
++\fB\-h\fP
++.RS 4
++Help
++.RE
++.sp
++\fB\-H\fP
++.RS 4
++Only show header info. Do not list each sketch. Incompatible with \fB\-t\fP
++and \fB\-c\fP.
++.RE
++.sp
++\fB\-t\fP
++.RS 4
++Tabular output (rather than padded), with no header. Incompatible with
++\fB\-H\fP and \fB\-c\fP.
++.RE
++.sp
++\fB\-c\fP
++.RS 4
++Show hash count histograms for each sketch. Incompatible with \fB\-H\fP and
++\fB\-t\fP.
++.RE
++.sp
++\fB\-d\fP
++.RS 4
++Dump sketches in JSON format. Incompatible with \fB\-H\fP, \fB\-t\fP, and \fB\-c\fP.
++.RE
++.SH "SEE ALSO"
++.sp
++mash(1)
+\ No newline at end of file
+diff --git a/doc/man/mash-paste.1 b/doc/man/mash-paste.1
+new file mode 100644
+index 0000000..740e5ed
+--- /dev/null
++++ b/doc/man/mash-paste.1
+@@ -0,0 +1,51 @@
++'\" t
++.\" Title: mash-paste
++.\" Author: [see the "AUTHOR(S)" section]
++.\" Generator: Asciidoctor 2.0.10
++.\" Date: 2019-12-13
++.\" Manual: \ \&
++.\" Source: \ \&
++.\" Language: English
++.\"
++.TH "MASH\-PASTE" "1" "2019-12-13" "\ \&" "\ \&"
++.ie \n(.g .ds Aq \(aq
++.el .ds Aq '
++.ss \n[.ss] 0
++.nh
++.ad l
++.de URL
++\fI\\$2\fP <\\$1>\\$3
++..
++.als MTO URL
++.if \n[.g] \{\
++. mso www.tmac
++. am URL
++. ad l
++. .
++. am MTO
++. ad l
++. .
++. LINKSTYLE blue R < >
++.\}
++.SH "NAME"
++mash\-paste \- create a single sketch file from multiple sketch files
++.SH "SYNOPSIS"
++.sp
++\fBmash paste\fP [options] <out_prefix> <sketch> [<sketch>] ...
++.SH "DESCRIPTION"
++.sp
++Create a single sketch file from multiple sketch files.
++.SH "OPTIONS"
++.sp
++\fB\-h\fP
++.RS 4
++Help
++.RE
++.sp
++\fB\-l\fP
++.RS 4
++Input files are lists of file names.
++.RE
++.SH "SEE ALSO"
++.sp
++mash(1)
+\ No newline at end of file
+diff --git a/doc/man/mash-screen.1 b/doc/man/mash-screen.1
+new file mode 100644
+index 0000000..afd7874
+--- /dev/null
++++ b/doc/man/mash-screen.1
+@@ -0,0 +1,81 @@
++'\" t
++.\" Title: mash-screen
++.\" Author: [see the "AUTHOR(S)" section]
++.\" Generator: Asciidoctor 2.0.10
++.\" Date: 2019-12-13
++.\" Manual: \ \&
++.\" Source: \ \&
++.\" Language: English
++.\"
++.TH "MASH\-SCREEN" "1" "2019-12-13" "\ \&" "\ \&"
++.ie \n(.g .ds Aq \(aq
++.el .ds Aq '
++.ss \n[.ss] 0
++.nh
++.ad l
++.de URL
++\fI\\$2\fP <\\$1>\\$3
++..
++.als MTO URL
++.if \n[.g] \{\
++. mso www.tmac
++. am URL
++. ad l
++. .
++. am MTO
++. ad l
++. .
++. LINKSTYLE blue R < >
++.\}
++.SH "NAME"
++mash\-screen \- determine whether query sequences are within a larger pool of sequences
++.SH "SYNOPSIS"
++.sp
++\fBmash screen\fP [options] <queries>.msh <pool> [<pool>] ...
++.SH "DESCRIPTION"
++.sp
++Determine how well query sequences are contained within a pool of sequences.
++The queries must be formatted as a single Mash sketch file (.msh), created
++with the \f(CRmash sketch\fP command. The <pool> files can be contigs or reads, in
++fasta or fastq, gzipped or not, and "\-" can be given for <pool> to read from
++standard input. The <pool> sequences are assumed to be nucleotides, and will
++be 6\-frame translated if the <queries> are amino acids. The output fields are
++[identity, shared\-hashes, median\-multiplicity, p\-value, query\-ID, query\-comment],
++where median\-multiplicity is computed for shared hashes, based on the number of
++observations of those hashes within the pool.
++.SH "OPTIONS"
++.sp
++\fB\-h\fP
++.RS 4
++Help
++.RE
++.sp
++\fB\-p\fP <int>
++.RS 4
++Parallelism. This many threads will be spawned for processing.
++.RE
++.sp
++\fB\-w\fP
++.RS 4
++Winner\-takes\-all strategy for identity estimates. After counting
++hashes for each query, hashes that appear in multiple queries will
++be removed from all except the one with the best identity (ties
++broken by larger query), and other identities will be reduced. This
++removes output redundancy, providing a rough compositional outline.
++.RE
++.SS "Output"
++.sp
++\fB\-i\fP <num>
++.RS 4
++Minimum identity to report. Inclusive unless set to zero, in which
++case only identities greater than zero (i.e. with at least one
++shared hash) will be reported. Set to \-1 to output everything.
++.RE
++.sp
++\fB\-v\fP <num>
++.RS 4
++Maximum p\-value to report.
++.RE
++.SH "SEE ALSO"
++.sp
++mash(1)
+\ No newline at end of file
+diff --git a/doc/man/mash-sketch.1 b/doc/man/mash-sketch.1
+new file mode 100644
+index 0000000..96c329a
+--- /dev/null
++++ b/doc/man/mash-sketch.1
+@@ -0,0 +1,154 @@
++'\" t
++.\" Title: mash-sketch
++.\" Author: [see the "AUTHOR(S)" section]
++.\" Generator: Asciidoctor 2.0.10
++.\" Date: 2019-12-13
++.\" Manual: \ \&
++.\" Source: \ \&
++.\" Language: English
++.\"
++.TH "MASH\-SKETCH" "1" "2019-12-13" "\ \&" "\ \&"
++.ie \n(.g .ds Aq \(aq
++.el .ds Aq '
++.ss \n[.ss] 0
++.nh
++.ad l
++.de URL
++\fI\\$2\fP <\\$1>\\$3
++..
++.als MTO URL
++.if \n[.g] \{\
++. mso www.tmac
++. am URL
++. ad l
++. .
++. am MTO
++. ad l
++. .
++. LINKSTYLE blue R < >
++.\}
++.SH "NAME"
++mash\-sketch \- create sketches (reduced representations for fast operations)
++.SH "SYNOPSIS"
++.sp
++\fBmash sketch\fP [options] fast(a|q)[.gz] ...
++.SH "DESCRIPTION"
++.sp
++Create a sketch file, which is a reduced representation of a sequence or set
++of sequences (based on min\-hashes) that can be used for fast distance
++estimations. Input can be fasta or fastq files (gzipped or not), and "\-" can
++be given to read from standard input. Input files can also be files of file
++names (see \fB\-l\fP). For output, one sketch file will be generated, but it can have
++multiple sketches within it, divided by sequences or files (see \fB\-i\fP). By
++default, the output file name will be the first input file with a \(aq.msh\(aq
++extension, or \(aqstdin.msh\(aq if standard input is used (see \fB\-o\fP).
++.SH "OPTIONS"
++.sp
++\fB\-h\fP
++.RS 4
++Help
++.RE
++.sp
++\fB\-p\fP <int>
++.RS 4
++Parallelism. This many threads will be spawned for processing. [1]
++.RE
++.SS "Input"
++.sp
++\fB\-l\fP
++.RS 4
++List input. Each file contains a list of sequence files, one per line.
++.RE
++.SS "Output"
++.sp
++\fB\-o\fP <path>
++.RS 4
++Output prefix (first input file used if unspecified). The suffix
++\(aq.msh\(aq will be appended.
++.RE
++.SS "Sketching"
++.sp
++\fB\-k\fP <int>
++.RS 4
++K\-mer size. Hashes will be based on strings of this many
++nucleotides. Canonical nucleotides are used by default (see
++Alphabet options below). (1\-32) [21]
++.RE
++.sp
++\fB\-s\fP <int>
++.RS 4
++Sketch size. Each sketch will have at most this many non\-redundant
++min\-hashes. [1000]
++.RE
++.sp
++\fB\-i\fP
++.RS 4
++Sketch individual sequences, rather than whole files.
++.RE
++.sp
++\fB\-w\fP <num>
++.RS 4
++Probability threshold for warning about low k\-mer size. (0\-1) [0.01]
++.RE
++.sp
++\fB\-r\fP
++.RS 4
++Input is a read set. See Reads options below. Incompatible with \fB\-i\fP.
++.RE
++.SS "Sketching (reads)"
++.sp
++\fB\-b\fP <size>
++.RS 4
++Use a Bloom filter of this size (raw bytes or with K/M/G/T) to
++filter out unique k\-mers. This is useful if exact filtering with \fB\-m\fP
++uses too much memory. However, some unique k\-mers may pass
++erroneously, and copies cannot be counted beyond 2. Implies \fB\-r\fP.
++.RE
++.sp
++\fB\-m\fP <int>
++.RS 4
++Minimum copies of each k\-mer required to pass noise filter for
++reads. Implies \fB\-r\fP. [1]
++.RE
++.sp
++\fB\-c\fP <num>
++.RS 4
++Target coverage. Sketching will conclude if this coverage is
++reached before the end of the input file (estimated by average
++k\-mer multiplicity). Implies \fB\-r\fP.
++.RE
++.sp
++\fB\-g\fP <size>
++.RS 4
++Genome size. If specified, will be used for p\-value calculation
++instead of an estimated size from k\-mer content. Implies \fB\-r\fP.
++.RE
++.SS "Sketching (alphabet)"
++.sp
++\fB\-n\fP
++.RS 4
++Preserve strand (by default, strand is ignored by using canonical
++DNA k\-mers, which are alphabetical minima of forward\-reverse
++pairs). Implied if an alphabet is specified with \fB\-a\fP or \fB\-z\fP.
++.RE
++.sp
++\fB\-a\fP
++.RS 4
++Use amino acid alphabet (A\-Z, except BJOUXZ). Implies \fB\-n\fP, \fB\-k\fP 9.
++.RE
++.sp
++\fB\-z\fP <text>
++.RS 4
++Alphabet to base hashes on (case ignored by default; see \fB\-Z\fP).
++K\-mers with other characters will be ignored. Implies \fB\-n\fP.
++.RE
++.sp
++\fB\-Z\fP
++.RS 4
++Preserve case in k\-mers and alphabet (case is ignored by default).
++Sequence letters whose case is not in the current alphabet will be
++skipped when sketching.
++.RE
++.SH "SEE ALSO"
++.sp
++mash(1)
+\ No newline at end of file
+diff --git a/doc/man/mash-triangle.1 b/doc/man/mash-triangle.1
+new file mode 100644
+index 0000000..a20e027
+--- /dev/null
++++ b/doc/man/mash-triangle.1
+@@ -0,0 +1,169 @@
++'\" t
++.\" Title: mash-triangle
++.\" Author: [see the "AUTHOR(S)" section]
++.\" Generator: Asciidoctor 2.0.10
++.\" Date: 2019-12-13
++.\" Manual: \ \&
++.\" Source: \ \&
++.\" Language: English
++.\"
++.TH "MASH\-TRIANGLE" "1" "2019-12-13" "\ \&" "\ \&"
++.ie \n(.g .ds Aq \(aq
++.el .ds Aq '
++.ss \n[.ss] 0
++.nh
++.ad l
++.de URL
++\fI\\$2\fP <\\$1>\\$3
++..
++.als MTO URL
++.if \n[.g] \{\
++. mso www.tmac
++. am URL
++. ad l
++. .
++. am MTO
++. ad l
++. .
++. LINKSTYLE blue R < >
++.\}
++.SH "NAME"
++mash\-triangle \- estimate a lower\-triangular distance matrix
++.SH "SYNOPSIS"
++.sp
++\fBmash triangle\fP [options] <seq1> [<seq2>] ...
++.SH "DESCRIPTION"
++.sp
++Estimate the distance of each input sequence to every other input
++sequence. Outputs a lower\-triangular distance matrix in relaxed Phylip
++format. The input sequences can be fasta or fastq, gzipped or not, or
++Mash sketch files (.msh) with matching k\-mer sizes. Input files can also
++be files of file names (see \-l). If more than one input file is provided,
++whole files are compared by default (see \-i).
++.SH "OPTIONS"
++.sp
++\fB\-h\fP
++.RS 4
++Help
++.RE
++.sp
++\fB\-p\fP <int>
++.RS 4
++Parallelism. This many threads will be spawned for processing. [1]
++.RE
++.SS "Input"
++.sp
++\fB\-l\fP
++.RS 4
++List input. Each query file contains a list of sequence files, one
++per line. The reference file is not affected.
++.RE
++.SS "Output"
++.sp
++\fB\-C\fP
++.RS 4
++Use comment fields for sequence names instead of IDs.
++.RE
++.sp
++\fB\-E\fP
++.RS 4
++Output edge list instead of Phylip matrix, with fields [seq1, seq2,
++dist, p\-val, shared\-hashes].
++.RE
++.sp
++\fB\-v\fP <num>
++.RS 4
++Maximum p\-value to report in edge list. Implies \-E. (0\-1) [1.0]
++.RE
++.sp
++\fB\-d\fP <num>
++.RS 4
++Maximum distance to report in edge list. Implies \-E. (0\-1) [1.0]
++.RE
++.SS "Sketching"
++.sp
++\fB\-k\fP <int>
++.RS 4
++K\-mer size. Hashes will be based on strings of this many
++nucleotides. Canonical nucleotides are used by default (see
++Alphabet options below). (1\-32) [21]
++.RE
++.sp
++\fB\-s\fP <int>
++.RS 4
++Sketch size. Each sketch will have at most this many non\-redundant
++min\-hashes. [1000]
++.RE
++.sp
++\fB\-i\fP
++.RS 4
++Sketch individual sequences, rather than whole files, e.g. for
++multi\-fastas of single\-chromosome genomes or pair\-wise gene comparisons.
++.RE
++.sp
++\fB\-w\fP <num>
++.RS 4
++Probability threshold for warning about low k\-mer size. (0\-1) [0.01]
++.RE
++.sp
++\fB\-r\fP
++.RS 4
++Input is a read set. See Reads options below. Incompatible with \fB\-i\fP.
++.RE
++.SS "Sketching (reads)"
++.sp
++\fB\-b\fP <size>
++.RS 4
++Use a Bloom filter of this size (raw bytes or with K/M/G/T) to
++filter out unique k\-mers. This is useful if exact filtering with \fB\-m\fP
++uses too much memory. However, some unique k\-mers may pass
++erroneously, and copies cannot be counted beyond 2. Implies \fB\-r\fP.
++.RE
++.sp
++\fB\-m\fP <int>
++.RS 4
++Minimum copies of each k\-mer required to pass noise filter for
++reads. Implies \fB\-r\fP. [1]
++.RE
++.sp
++\fB\-c\fP <num>
++.RS 4
++Target coverage. Sketching will conclude if this coverage is
++reached before the end of the input file (estimated by average
++k\-mer multiplicity). Implies \fB\-r\fP.
++.RE
++.sp
++\fB\-g\fP <size>
++.RS 4
++Genome size. If specified, will be used for p\-value calculation
++instead of an estimated size from k\-mer content. Implies \fB\-r\fP.
++.RE
++.SS "Sketching (alphabet)"
++.sp
++\fB\-n\fP
++.RS 4
++Preserve strand (by default, strand is ignored by using canonical
++DNA k\-mers, which are alphabetical minima of forward\-reverse
++pairs). Implied if an alphabet is specified with \fB\-a\fP or \fB\-z\fP.
++.RE
++.sp
++\fB\-a\fP
++.RS 4
++Use amino acid alphabet (A\-Z, except BJOUXZ). Implies \fB\-n\fP, \fB\-k\fP 9.
++.RE
++.sp
++\fB\-z\fP <text>
++.RS 4
++Alphabet to base hashes on (case ignored by default; see \fB\-Z\fP).
++K\-mers with other characters will be ignored. Implies \fB\-n\fP.
++.RE
++.sp
++\fB\-Z\fP
++.RS 4
++Preserve case in k\-mers and alphabet (case is ignored by default).
++Sequence letters whose case is not in the current alphabet will be
++skipped when sketching.
++.RE
++.SH "SEE ALSO"
++.sp
++mash(1)
+\ No newline at end of file
+diff --git a/doc/man/mash.1 b/doc/man/mash.1
+new file mode 100644
+index 0000000..b5e6d75
+--- /dev/null
++++ b/doc/man/mash.1
+@@ -0,0 +1,77 @@
++'\" t
++.\" Title: mash
++.\" Author: [see the "AUTHOR(S)" section]
++.\" Generator: Asciidoctor 2.0.10
++.\" Date: 2019-12-13
++.\" Manual: \ \&
++.\" Source: \ \&
++.\" Language: English
++.\"
++.TH "MASH" "1" "2019-12-13" "\ \&" "\ \&"
++.ie \n(.g .ds Aq \(aq
++.el .ds Aq '
++.ss \n[.ss] 0
++.nh
++.ad l
++.de URL
++\fI\\$2\fP <\\$1>\\$3
++..
++.als MTO URL
++.if \n[.g] \{\
++. mso www.tmac
++. am URL
++. ad l
++. .
++. am MTO
++. ad l
++. .
++. LINKSTYLE blue R < >
++.\}
++.SH "NAME"
++mash \- fast genome and metagenome distance estimation using MinHash
++.SH "SYNOPSIS"
++.sp
++\fBmash\fP <command> [options] [arguments ...]
++.SH "DESCRIPTION"
++.sp
++\fBmash\fP is the main executable for the \fBMash\fP software. The actual
++functionality is provided by the subtools (\(aqcommands\(aq):
++.SS "Commands"
++.sp
++\fBbounds\fP
++.RS 4
++Print a table of Mash error bounds.
++.RE
++.sp
++\fBdist\fP
++.RS 4
++Estimate the distance of query sequences to references.
++.RE
++.sp
++\fBinfo\fP
++.RS 4
++Display information about sketch files.
++.RE
++.sp
++\fBpaste\fP
++.RS 4
++Create a single sketch file from multiple sketch files.
++.RE
++.sp
++\fBscreen\fP
++.RS 4
++Determine whether query sequences are within a larger pool of sequences.
++.RE
++.sp
++\fBsketch\fP
++.RS 4
++Create sketches (reduced representations for fast operations).
++.RE
++.sp
++\fBtriangle\fP
++.RS 4
++Estimate a lower\-triangular distance matrix.
++.RE
++.SH "SEE ALSO"
++.sp
++mash\-dist(1), mash\-info(1), mash\-paste(1), mash\-screen(1), mash\-sketch(1), mash\-triangle(1)
+\ No newline at end of file