diff options
author | Yishen Miao | 2017-04-26 12:36:28 +0000 |
---|---|---|
committer | Yishen Miao | 2017-04-26 12:36:28 +0000 |
commit | 3d2c209b72a0908dad5122c848e1dacebb2c477f (patch) | |
tree | 35c54079838498c1497306ac7e9ecd73d803327e | |
parent | fd0fc8f1222a2635ab9d92b20835efaeb83713ff (diff) | |
download | aur-3d2c209b72a0908dad5122c848e1dacebb2c477f.tar.gz |
Update to 1.1.0e
Update to 1.1.0e.
modified: .SRCINFO
modified: PKGBUILD
modified: ca-dir.patch
deleted: no-rpath.patch
new file: openssl__1.1.0_chacha20_poly1305.patch
deleted: openssl__chacha20_poly1305_draft_and_rfc_ossl102j.patch
deleted: ssl3-test-failure.patch
-rw-r--r-- | .SRCINFO | 20 | ||||
-rw-r--r-- | PKGBUILD | 26 | ||||
-rw-r--r-- | ca-dir.patch | 44 | ||||
-rw-r--r-- | no-rpath.patch | 11 | ||||
-rw-r--r-- | openssl__1.1.0_chacha20_poly1305.patch | 60 | ||||
-rw-r--r-- | openssl__chacha20_poly1305_draft_and_rfc_ossl102j.patch | 4718 | ||||
-rw-r--r-- | ssl3-test-failure.patch | 26 |
7 files changed, 97 insertions, 4808 deletions
@@ -1,6 +1,6 @@ pkgbase = openssl-chacha20 pkgdesc = The Open Source toolkit for Secure Sockets Layer and Transport Layer Security with Chacha20 cipher - pkgver = 1.0.2.k + pkgver = 1.1.0.e pkgrel = 1 url = https://www.openssl.org arch = i686 @@ -8,23 +8,19 @@ pkgbase = openssl-chacha20 license = custom:BSD depends = perl optdepends = ca-certificates - provides = openssl=1.0.2.k + provides = openssl=1.1.0.e conflicts = openssl options = !makeflags backup = etc/ssl/openssl.cnf - source = https://www.openssl.org/source/openssl-1.0.2k.tar.gz - source = https://www.openssl.org/source/openssl-1.0.2k.tar.gz.asc - source = no-rpath.patch - source = ssl3-test-failure.patch + source = https://www.openssl.org/source/openssl-1.1.0e.tar.gz + source = https://www.openssl.org/source/openssl-1.1.0e.tar.gz.asc source = ca-dir.patch - source = openssl__chacha20_poly1305_draft_and_rfc_ossl102j.patch + source = openssl__1.1.0_chacha20_poly1305.patch validpgpkeys = 8657ABB260F056B1E5190839D9C4D26D0E604491 - sha256sums = 6b3977c61f2aedf0f96367dcfb5c6e578cf37e7b8d913b4ecb6643c3cb88d8c0 + sha256sums = 57be8618979d80c910728cfc99369bf97b2a1abd8f366ab6ebdee8975ad3874c sha256sums = SKIP - sha256sums = 754d6107a306311e15a1db6a1cc031b81691c8b9865e8809ac60ca6f184c957c - sha256sums = c54ae87c602eaa1530a336ab7c6e22e12898e1941012349c153e52553df64a13 - sha256sums = 9e8126f3a748f4c1d6fe34d4436de72b16a40e97a6d18234d2e88caa179d50c4 - sha256sums = d6f9427d5cb63c7299563c201cd8708c7166e0f8c98b57a1fee69767362bf0f7 + sha256sums = 90c7411fed0157116f2df8f4be755aaf5a26e8484351b4e6a79492805d5f2790 + sha256sums = 3c1b39f8d17dc384486ebe61aa783cc4a649ed9d7b633c02f36693b8af265160 pkgname = openssl-chacha20 @@ -3,7 +3,7 @@ _pkgname=openssl pkgname=${_pkgname}-chacha20 -_ver=1.0.2k +_ver=1.1.0e # use a pacman compatible version scheme pkgver=${_ver/[a-z]/.${_ver//[0-9.]/}} #pkgver=$_ver @@ -20,33 +20,23 @@ conflicts=('openssl') provides=("openssl=${pkgver}") source=("https://www.openssl.org/source/${_pkgname}-${_ver}.tar.gz" "https://www.openssl.org/source/${_pkgname}-${_ver}.tar.gz.asc" - 'no-rpath.patch' - 'ssl3-test-failure.patch' 'ca-dir.patch' - 'openssl__chacha20_poly1305_draft_and_rfc_ossl102j.patch') -sha256sums=('6b3977c61f2aedf0f96367dcfb5c6e578cf37e7b8d913b4ecb6643c3cb88d8c0' + 'openssl__1.1.0_chacha20_poly1305.patch') +sha256sums=('57be8618979d80c910728cfc99369bf97b2a1abd8f366ab6ebdee8975ad3874c' 'SKIP' - '754d6107a306311e15a1db6a1cc031b81691c8b9865e8809ac60ca6f184c957c' - 'c54ae87c602eaa1530a336ab7c6e22e12898e1941012349c153e52553df64a13' - '9e8126f3a748f4c1d6fe34d4436de72b16a40e97a6d18234d2e88caa179d50c4' - 'd6f9427d5cb63c7299563c201cd8708c7166e0f8c98b57a1fee69767362bf0f7') + '90c7411fed0157116f2df8f4be755aaf5a26e8484351b4e6a79492805d5f2790' + '3c1b39f8d17dc384486ebe61aa783cc4a649ed9d7b633c02f36693b8af265160') validpgpkeys=('8657ABB260F056B1E5190839D9C4D26D0E604491') prepare() { cd $srcdir/$_pkgname-$_ver - # remove rpath: http://bugs.archlinux.org/task/14367 - patch -p0 -i $srcdir/no-rpath.patch - - # disable a test that fails when ssl3 is disabled - patch -p1 -i $srcdir/ssl3-test-failure.patch - # set ca dir to /etc/ssl by default patch -p0 -i $srcdir/ca-dir.patch # Cloudflare patch - # https://github.com/cloudflare/sslconfig/blob/master/patches/openssl__chacha20_poly1305_draft_and_rfc_ossl102j.patch - patch -p1 -i $srcdir/openssl__chacha20_poly1305_draft_and_rfc_ossl102j.patch + # https://raw.githubusercontent.com/cloudflare/sslconfig/master/patches/openssl__1.1.0_chacha20_poly1305.patch + patch -p1 -i $srcdir/openssl__1.1.0_chacha20_poly1305.patch } build() { @@ -81,6 +71,6 @@ check() { package() { cd $srcdir/$_pkgname-$_ver - make INSTALL_PREFIX=$pkgdir MANDIR=/usr/share/man MANSUFFIX=ssl install + make DESTDIR=$pkgdir MANDIR=/usr/share/man MANSUFFIX=ssl install_sw install_ssldirs install_man_docs install -D -m644 LICENSE $pkgdir/usr/share/licenses/$_pkgname/LICENSE } diff --git a/ca-dir.patch b/ca-dir.patch index 41d1386d3d06..1daba849b4ca 100644 --- a/ca-dir.patch +++ b/ca-dir.patch @@ -1,27 +1,16 @@ ---- apps/CA.pl.in 2006-04-28 02:30:49.000000000 +0200 -+++ apps/CA.pl.in 2010-04-01 00:35:02.600553509 +0200 -@@ -53,7 +53,7 @@ - $X509="$openssl x509"; - $PKCS12="$openssl pkcs12"; +--- apps/CA.pl.in 2016-09-26 11:46:04.000000000 +0200 ++++ apps/CA.pl.in 2016-11-01 16:02:16.709616823 +0100 +@@ -33,7 +33,7 @@ + my $PKCS12 = "$openssl pkcs12"; --$CATOP="./demoCA"; -+$CATOP="/etc/ssl"; - $CAKEY="cakey.pem"; - $CAREQ="careq.pem"; - $CACERT="cacert.pem"; ---- apps/CA.sh 2009-10-15 19:27:47.000000000 +0200 -+++ apps/CA.sh 2010-04-01 00:35:02.600553509 +0200 -@@ -68,7 +68,7 @@ - X509="$OPENSSL x509" - PKCS12="openssl pkcs12" - --if [ -z "$CATOP" ] ; then CATOP=./demoCA ; fi -+if [ -z "$CATOP" ] ; then CATOP=/etc/ssl ; fi - CAKEY=./cakey.pem - CAREQ=./careq.pem - CACERT=./cacert.pem ---- apps/openssl.cnf 2009-04-04 20:09:43.000000000 +0200 -+++ apps/openssl.cnf 2010-04-01 00:35:02.607220681 +0200 + # default openssl.cnf file has setup as per the following +-my $CATOP = "./demoCA"; ++my $CATOP = "/etc/ssl"; + my $CAKEY = "cakey.pem"; + my $CAREQ = "careq.pem"; + my $CACERT = "cacert.pem"; +--- apps/openssl.cnf 2016-09-26 11:46:04.000000000 +0200 ++++ apps/openssl.cnf 2016-11-01 16:02:48.378503427 +0100 @@ -39,7 +39,7 @@ #################################################################### [ CA_default ] @@ -31,3 +20,12 @@ certs = $dir/certs # Where the issued certs are kept crl_dir = $dir/crl # Where the issued crl are kept database = $dir/index.txt # database index file. +@@ -323,7 +323,7 @@ + [ tsa_config1 ] + + # These are used by the TSA reply generation only. +-dir = ./demoCA # TSA root directory ++dir = /etc/ssl # TSA root directory + serial = $dir/tsaserial # The current serial number (mandatory) + crypto_device = builtin # OpenSSL engine to use for signing + signer_cert = $dir/tsacert.pem # The TSA signing certificate diff --git a/no-rpath.patch b/no-rpath.patch deleted file mode 100644 index ebd95e23d397..000000000000 --- a/no-rpath.patch +++ /dev/null @@ -1,11 +0,0 @@ ---- Makefile.shared.no-rpath 2005-06-23 22:47:54.000000000 +0200 -+++ Makefile.shared 2005-11-16 22:35:37.000000000 +0100 -@@ -153,7 +153,7 @@ - NOALLSYMSFLAGS='-Wl,--no-whole-archive'; \ - SHAREDFLAGS="$(CFLAGS) $(SHARED_LDFLAGS) -shared -Wl,-Bsymbolic -Wl,-soname=$$SHLIB$$SHLIB_SOVER$$SHLIB_SUFFIX" - --DO_GNU_APP=LDFLAGS="$(CFLAGS) -Wl,-rpath,$(LIBRPATH)" -+DO_GNU_APP=LDFLAGS="$(CFLAGS)" - - #This is rather special. It's a special target with which one can link - #applications without bothering with any features that have anything to diff --git a/openssl__1.1.0_chacha20_poly1305.patch b/openssl__1.1.0_chacha20_poly1305.patch new file mode 100644 index 000000000000..34da57b4af1d --- /dev/null +++ b/openssl__1.1.0_chacha20_poly1305.patch @@ -0,0 +1,60 @@ +diff --git a/ssl/s3_lib.c b/ssl/s3_lib.c +index e94ee83..3cd7e3a 100644 +--- a/ssl/s3_lib.c ++++ b/ssl/s3_lib.c +@@ -3582,6 +3582,7 @@ const SSL_CIPHER *ssl3_choose_cipher(SSL *s, STACK_OF(SSL_CIPHER) *clnt, + STACK_OF(SSL_CIPHER) *prio, *allow; + int i, ii, ok; + unsigned long alg_k, alg_a, mask_k, mask_a; ++ int use_chacha = 0; + + /* Let's see which ciphers we can support */ + +@@ -3610,13 +3611,20 @@ const SSL_CIPHER *ssl3_choose_cipher(SSL *s, STACK_OF(SSL_CIPHER) *clnt, + fprintf(stderr, "%p:%s\n", (void *)c, c->name); + } + #endif +- ++retry: + if (s->options & SSL_OP_CIPHER_SERVER_PREFERENCE || tls1_suiteb(s)) { + prio = srvr; + allow = clnt; ++ /* Use ChaCha20+Poly1305 if it's client's most preferred cipher suite */ ++ if (sk_SSL_CIPHER_num(clnt) > 0) { ++ c = sk_SSL_CIPHER_value(clnt, 0); ++ if (c->algorithm_enc == SSL_CHACHA20POLY1305) ++ use_chacha = 1; ++ } + } else { + prio = clnt; + allow = srvr; ++ use_chacha = 1; + } + + tls1_set_cert_validity(s); +@@ -3634,6 +3642,10 @@ const SSL_CIPHER *ssl3_choose_cipher(SSL *s, STACK_OF(SSL_CIPHER) *clnt, + DTLS_VERSION_GT(s->version, c->max_dtls))) + continue; + ++ /* Skip ChaCha unless top client priority */ ++ if (c->algorithm_enc == SSL_CHACHA20POLY1305 && !use_chacha) ++ continue; ++ + mask_k = s->s3->tmp.mask_k; + mask_a = s->s3->tmp.mask_a; + #ifndef OPENSSL_NO_SRP +@@ -3687,6 +3699,14 @@ const SSL_CIPHER *ssl3_choose_cipher(SSL *s, STACK_OF(SSL_CIPHER) *clnt, + break; + } + } ++ ++ if (ret == NULL && !use_chacha) { ++ /* If no shared cipher was found due to some unusual preferences, try ++ * again with CHACHA enabled even if not top priority */ ++ use_chacha = 1; ++ goto retry; ++ } ++ + return (ret); + } + diff --git a/openssl__chacha20_poly1305_draft_and_rfc_ossl102j.patch b/openssl__chacha20_poly1305_draft_and_rfc_ossl102j.patch deleted file mode 100644 index cdb767379f85..000000000000 --- a/openssl__chacha20_poly1305_draft_and_rfc_ossl102j.patch +++ /dev/null @@ -1,4718 +0,0 @@ -From dcf9b5698b8658c9248327b3fdb280090c5c78ec Mon Sep 17 00:00:00 2001 -From: vkrasnov <vlad@cloudflare.com> -Date: Tue, 4 Oct 2016 15:47:32 -0700 -Subject: [PATCH] ChaCha20-Poly1305 draft and RFC cipher suites for OpenSSL - 1.0.2j - ---- - Configure | 44 +- - Makefile.org | 4 +- - crypto/chacha20_poly1305/Makefile | 89 + - .../asm/chacha20_poly1305_x86_64.pl | 2299 ++++++++++++++++++++ - crypto/chacha20_poly1305/asm/chacha20_x86_64.pl | 415 ++++ - crypto/chacha20_poly1305/asm/poly1305_x86_64.pl | 280 +++ - crypto/chacha20_poly1305/chacha20.c | 142 ++ - crypto/chacha20_poly1305/chacha20poly1305.h | 64 + - crypto/chacha20_poly1305/poly1305.c | 355 +++ - crypto/evp/Makefile | 8 +- - crypto/evp/c_allc.c | 5 + - crypto/evp/e_chacha20_poly1305.c | 362 +++ - crypto/evp/evp.h | 5 + - crypto/objects/obj_dat.h | 13 +- - crypto/objects/obj_mac.h | 8 + - crypto/objects/obj_mac.num | 2 + - crypto/objects/objects.txt | 2 + - ssl/s3_lib.c | 128 +- - ssl/ssl.h | 2 + - ssl/ssl_ciph.c | 31 +- - ssl/ssl_locl.h | 2 + - ssl/tls1.h | 26 + - 22 files changed, 4260 insertions(+), 26 deletions(-) - create mode 100644 crypto/chacha20_poly1305/Makefile - create mode 100755 crypto/chacha20_poly1305/asm/chacha20_poly1305_x86_64.pl - create mode 100644 crypto/chacha20_poly1305/asm/chacha20_x86_64.pl - create mode 100644 crypto/chacha20_poly1305/asm/poly1305_x86_64.pl - create mode 100644 crypto/chacha20_poly1305/chacha20.c - create mode 100644 crypto/chacha20_poly1305/chacha20poly1305.h - create mode 100644 crypto/chacha20_poly1305/poly1305.c - create mode 100644 crypto/evp/e_chacha20_poly1305.c - -diff --git a/Configure b/Configure -index c39f71a..f5f7c06 100755 ---- a/Configure -+++ b/Configure -@@ -150,25 +150,25 @@ my $tlib="-lnsl -lsocket"; - my $bits1="THIRTY_TWO_BIT "; - my $bits2="SIXTY_FOUR_BIT "; - --my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o::des-586.o crypt586.o:aes-586.o vpaes-x86.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o:ghash-x86.o:"; -+my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o x86-gf2m.o::des-586.o crypt586.o:aes-586.o vpaes-x86.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o:ghash-x86.o::"; - - my $x86_elf_asm="$x86_asm:elf"; - --my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o rsaz_exp.o rsaz-x86_64.o rsaz-avx2.o:ecp_nistz256.o ecp_nistz256-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o aesni-sha256-x86_64.o aesni-mb-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o sha1-mb-x86_64.o sha256-mb-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o aesni-gcm-x86_64.o:"; --my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o:::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o::void"; --my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o::des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o:aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o::::::camellia.o cmll_misc.o cmll_cbc.o cmllt4-sparcv9.o:ghash-sparcv9.o::void"; --my $sparcv8_asm=":sparcv8.o::des_enc-sparc.o fcrypt_b.o:::::::::::::void"; --my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o::::::sha1-alpha.o:::::::ghash-alpha.o::void"; --my $mips64_asm=":bn-mips.o mips-mont.o:::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o::::::::"; -+my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o rsaz_exp.o rsaz-x86_64.o rsaz-avx2.o:ecp_nistz256.o ecp_nistz256-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o aesni-sha256-x86_64.o aesni-mb-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o sha1-mb-x86_64.o sha256-mb-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o aesni-gcm-x86_64.o:chacha20_poly1305_x86_64.o poly1305_x86_64.o chacha20_x86_64.o:"; -+my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o:::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o:::void"; -+my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o::des_enc-sparc.o fcrypt_b.o dest4-sparcv9.o:aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o::::::camellia.o cmll_misc.o cmll_cbc.o cmllt4-sparcv9.o:ghash-sparcv9.o:::void"; -+my $sparcv8_asm=":sparcv8.o::des_enc-sparc.o fcrypt_b.o::::::::::::::void"; -+my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o::::::sha1-alpha.o:::::::ghash-alpha.o:::void"; -+my $mips64_asm=":bn-mips.o mips-mont.o:::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o:::::::::"; - my $mips32_asm=$mips64_asm; $mips32_asm =~ s/\s*sha512\-mips\.o//; --my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o:::aes-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::ghash-s390x.o:"; --my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o:::aes_cbc.o aes-armv4.o bsaes-armv7.o aesv8-armx.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o ghashv8-armx.o::void"; --my $aarch64_asm="armcap.o arm64cpuid.o mem_clr.o::::aes_core.o aes_cbc.o aesv8-armx.o:::sha1-armv8.o sha256-armv8.o sha512-armv8.o:::::::ghashv8-armx.o:"; --my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o:::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::32"; --my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o:::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::64"; --my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o:::aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o aesp8-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o sha256p8-ppc.o sha512p8-ppc.o:::::::ghashp8-ppc.o:"; -+my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o:::aes-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::ghash-s390x.o::"; -+my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o:::aes_cbc.o aes-armv4.o bsaes-armv7.o aesv8-armx.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o ghashv8-armx.o:::void"; -+my $aarch64_asm="armcap.o arm64cpuid.o mem_clr.o::::aes_core.o aes_cbc.o aesv8-armx.o:::sha1-armv8.o sha256-armv8.o sha512-armv8.o:::::::ghashv8-armx.o::"; -+my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o:::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o:::32"; -+my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o:::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o:::64"; -+my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o:::aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o aesp8-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o sha256p8-ppc.o sha512p8-ppc.o:::::::ghashp8-ppc.o::"; - my $ppc32_asm=$ppc64_asm; --my $no_asm="::::::::::::::::void"; -+my $no_asm=":::::::::::::::::void"; - - # As for $BSDthreads. Idea is to maintain "collective" set of flags, - # which would cover all BSD flavors. -pthread applies to them all, -@@ -179,7 +179,7 @@ my $no_asm="::::::::::::::::void"; - # seems to be sufficient? - my $BSDthreads="-pthread -D_THREAD_SAFE -D_REENTRANT"; - --#config-string $cc : $cflags : $unistd : $thread_cflag : $sys_id : $lflags : $bn_ops : $cpuid_obj : $bn_obj : $ec_obj : $des_obj : $aes_obj : $bf_obj : $md5_obj : $sha1_obj : $cast_obj : $rc4_obj : $rmd160_obj : $rc5_obj : $wp_obj : $cmll_obj : $modes_obj : $engines_obj : $dso_scheme : $shared_target : $shared_cflag : $shared_ldflag : $shared_extension : $ranlib : $arflags : $multilib -+#config-string $cc : $cflags : $unistd : $thread_cflag : $sys_id : $lflags : $bn_ops : $cpuid_obj : $bn_obj : $ec_obj : $des_obj : $aes_obj : $bf_obj : $md5_obj : $sha1_obj : $cast_obj : $rc4_obj : $rmd160_obj : $rc5_obj : $wp_obj : $cmll_obj : $modes_obj : $chapoly_obj : $engines_obj : $dso_scheme : $shared_target : $shared_cflag : $shared_ldflag : $shared_extension : $ranlib : $arflags : $multilib - - my %table=( - # File 'TABLE' (created by 'make TABLE') contains the data from this list, -@@ -713,6 +713,7 @@ my $idx_rc5_obj = $idx++; - my $idx_wp_obj = $idx++; - my $idx_cmll_obj = $idx++; - my $idx_modes_obj = $idx++; -+my $idx_chapoly_obj = $idx++; - my $idx_engines_obj = $idx++; - my $idx_perlasm_scheme = $idx++; - my $idx_dso_scheme = $idx++; -@@ -1239,6 +1240,7 @@ my $rc5_obj = $fields[$idx_rc5_obj]; - my $wp_obj = $fields[$idx_wp_obj]; - my $cmll_obj = $fields[$idx_cmll_obj]; - my $modes_obj = $fields[$idx_modes_obj]; -+my $chapoly_obj= $fields[$idx_chapoly_obj]; - my $engines_obj = $fields[$idx_engines_obj]; - my $perlasm_scheme = $fields[$idx_perlasm_scheme]; - my $dso_scheme = $fields[$idx_dso_scheme]; -@@ -1407,7 +1409,8 @@ if ($no_asm) - { - $cpuid_obj=$bn_obj=$ec_obj= - $des_obj=$aes_obj=$bf_obj=$cast_obj=$rc4_obj=$rc5_obj=$cmll_obj= -- $modes_obj=$sha1_obj=$md5_obj=$rmd160_obj=$wp_obj=$engines_obj=""; -+ $modes_obj=$sha1_obj=$md5_obj=$rmd160_obj=$wp_obj=$engines_obj= -+ $chapoly_obj=""; - } - - if (!$no_shared) -@@ -1622,6 +1625,10 @@ if ($ec_obj =~ /ecp_nistz256/) - { - $cflags.=" -DECP_NISTZ256_ASM"; - } -+if ($chapoly_obj =~ /chacha20_poly1305/) -+ { -+ $cflags.=" -DCHAPOLY_ASM"; -+ } - - # "Stringify" the C flags string. This permits it to be made part of a string - # and works as well on command lines. -@@ -1751,6 +1758,7 @@ while (<IN>) - s/^WP_ASM_OBJ=.*$/WP_ASM_OBJ= $wp_obj/; - s/^CMLL_ENC=.*$/CMLL_ENC= $cmll_obj/; - s/^MODES_ASM_OBJ.=*$/MODES_ASM_OBJ= $modes_obj/; -+ s/^CHAPOLY_ASM=.*$/CHAPOLY_ASM= $chapoly_obj/; - s/^ENGINES_ASM_OBJ.=*$/ENGINES_ASM_OBJ= $engines_obj/; - s/^PERLASM_SCHEME=.*$/PERLASM_SCHEME= $perlasm_scheme/; - s/^PROCESSOR=.*/PROCESSOR= $processor/; -@@ -1812,6 +1820,7 @@ print "SHA1_OBJ_ASM =$sha1_obj\n"; - print "RMD160_OBJ_ASM=$rmd160_obj\n"; - print "CMLL_ENC =$cmll_obj\n"; - print "MODES_OBJ =$modes_obj\n"; -+print "CHAPOLY_ASM =$chapoly_obj\n"; - print "ENGINES_OBJ =$engines_obj\n"; - print "PROCESSOR =$processor\n"; - print "RANLIB =$ranlib\n"; -@@ -2211,7 +2220,7 @@ sub print_table_entry - my ($cc, $cflags, $unistd, $thread_cflag, $sys_id, $lflags, - $bn_ops, $cpuid_obj, $bn_obj, $ec_obj, $des_obj, $aes_obj, $bf_obj, - $md5_obj, $sha1_obj, $cast_obj, $rc4_obj, $rmd160_obj, -- $rc5_obj, $wp_obj, $cmll_obj, $modes_obj, $engines_obj, -+ $rc5_obj, $wp_obj, $cmll_obj, $modes_obj, $chapoly_obj, $engines_obj, - $perlasm_scheme, $dso_scheme, $shared_target, $shared_cflag, - $shared_ldflag, $shared_extension, $ranlib, $arflags, $multilib)= - split(/\s*:\s*/,$table{$target} . ":" x 30 , -1); -@@ -2241,6 +2250,7 @@ sub print_table_entry - \$wp_obj = $wp_obj - \$cmll_obj = $cmll_obj - \$modes_obj = $modes_obj -+\$chapoly_obj = $chapoly_obj - \$engines_obj = $engines_obj - \$perlasm_scheme = $perlasm_scheme - \$dso_scheme = $dso_scheme -diff --git a/Makefile.org b/Makefile.org -index 2377f50..1f20a61 100644 ---- a/Makefile.org -+++ b/Makefile.org -@@ -103,6 +103,7 @@ WP_ASM_OBJ= - CMLL_ENC= - MODES_ASM_OBJ= - ENGINES_ASM_OBJ= -+CHAPOLY_ASM= - PERLASM_SCHEME= - - # KRB5 stuff -@@ -149,7 +150,7 @@ SDIRS= \ - bn ec rsa dsa ecdsa dh ecdh dso engine \ - buffer bio stack lhash rand err \ - evp asn1 pem x509 x509v3 conf txt_db pkcs7 pkcs12 comp ocsp ui krb5 \ -- cms pqueue ts jpake srp store cmac -+ cms pqueue ts jpake srp store cmac chacha20_poly1305 - # keep in mind that the above list is adjusted by ./Configure - # according to no-xxx arguments... - -@@ -240,6 +241,7 @@ BUILDENV= LC_ALL=C PLATFORM='$(PLATFORM)' PROCESSOR='$(PROCESSOR)'\ - FIPSLIBDIR='${FIPSLIBDIR}' \ - FIPSDIR='${FIPSDIR}' \ - FIPSCANLIB="$${FIPSCANLIB:-$(FIPSCANLIB)}" \ -+ CHAPOLY_ASM='$(CHAPOLY_ASM)' \ - THIS=$${THIS:-$@} MAKEFILE=Makefile MAKEOVERRIDES= - # MAKEOVERRIDES= effectively "equalizes" GNU-ish and SysV-ish make flavors, - # which in turn eliminates ambiguities in variable treatment with -e. -diff --git a/crypto/chacha20_poly1305/Makefile b/crypto/chacha20_poly1305/Makefile -new file mode 100644 -index 0000000..87f4ba3 ---- /dev/null -+++ b/crypto/chacha20_poly1305/Makefile -@@ -0,0 +1,89 @@ -+#
-+# crypto/chacha20poly1305/Makefile
-+#
-+
-+DIR= chacha20poly1305
-+TOP= ../..
-+CC= cc
-+INCLUDES= -I.. -I$(TOP) -I../../include
-+CFLAG=-g
-+MAKEFILE= Makefile
-+AR= ar r
-+
-+CFLAGS= $(INCLUDES) $(CFLAG)
-+ASFLAGS= $(INCLUDES) $(ASFLAG)
-+AFLAGS= $(ASFLAGS)
-+
-+GENERAL=Makefile
-+TEST=
-+APPS=
-+
-+LIB=$(TOP)/libcrypto.a
-+LIBSRC= chacha20.c poly1305.c
-+LIBOBJ= chacha20.o poly1305.o $(CHAPOLY_ASM)
-+
-+SRC= $(LIBSRC)
-+
-+EXHEADER= chacha20poly1305.h
-+HEADER= $(EXHEADER)
-+
-+ALL= $(GENERAL) $(SRC) $(HEADER)
-+
-+top:
-+ (cd ../..; $(MAKE) DIRS=crypto SDIRS=$(DIR) sub_all)
-+
-+all: lib
-+
-+lib: $(LIBOBJ)
-+ $(AR) $(LIB) $(LIBOBJ)
-+ $(RANLIB) $(LIB) || echo Never mind.
-+ @touch lib
-+
-+chacha20_poly1305_x86_64.s: asm/chacha20_poly1305_x86_64.pl
-+ $(PERL) asm/chacha20_poly1305_x86_64.pl $(PERLASM_SCHEME) > $@
-+
-+poly1305_x86_64.s: asm/poly1305_x86_64.pl
-+ $(PERL) asm/poly1305_x86_64.pl $(PERLASM_SCHEME) > $@
-+
-+chacha20_x86_64.s: asm/chacha20_x86_64.pl
-+ $(PERL) asm/chacha20_x86_64.pl $(PERLASM_SCHEME) > $@
-+
-+files:
-+ $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
-+
-+links:
-+ @$(PERL) $(TOP)/util/mklink.pl ../../include/openssl $(EXHEADER)
-+ @$(PERL) $(TOP)/util/mklink.pl ../../test $(TEST)
-+ @$(PERL) $(TOP)/util/mklink.pl ../../apps $(APPS)
-+
-+install:
-+ @[ -n "$(INSTALLTOP)" ] # should be set by top Makefile...
-+ @headerlist="$(EXHEADER)"; for i in $$headerlist ; \
-+ do \
-+ (cp $$i $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i; \
-+ chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \
-+ done;
-+
-+tags:
-+ ctags $(SRC)
-+
-+tests:
-+
-+lint:
-+ lint -DLINT $(INCLUDES) $(SRC)>fluff
-+
-+depend:
-+ @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile...
-+ $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC)
-+
-+dclean:
-+ $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new
-+ mv -f Makefile.new $(MAKEFILE)
-+
-+clean:
-+ rm -f *.s *.o *.obj lib tags core .pure .nfs* *.old *.bak fluff
-+
-+# DO NOT DELETE THIS LINE -- make depend depends on it.
-+
-+chacha20.o: ../../include/openssl/chacha20poly1305.h chacha20.c
-+poly1305.o: ../../include/openssl/chacha20poly1305.h poly1305.c
-diff --git a/crypto/chacha20_poly1305/asm/chacha20_poly1305_x86_64.pl b/crypto/chacha20_poly1305/asm/chacha20_poly1305_x86_64.pl -new file mode 100755 -index 0000000..ef90831 ---- /dev/null -+++ b/crypto/chacha20_poly1305/asm/chacha20_poly1305_x86_64.pl -@@ -0,0 +1,2299 @@ -+#!/usr/bin/env perl -+ -+############################################################################## -+# # -+# Copyright 2016 CloudFlare LTD # -+# # -+# Licensed under the Apache License, Version 2.0 (the "License"); # -+# you may not use this file except in compliance with the License. # -+# You may obtain a copy of the License at # -+# # -+# http://www.apache.org/licenses/LICENSE-2.0 # -+# # -+# Unless required by applicable law or agreed to in writing, software # -+# distributed under the License is distributed on an "AS IS" BASIS, # -+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # -+# See the License for the specific language governing permissions and # -+# limitations under the License. # -+# # -+############################################################################## -+# # -+# Author: Vlad Krasnov # -+# # -+############################################################################## -+ -+$flavour = shift; -+$output = shift; -+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } -+ -+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); -+ -+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or -+die "can't locate x86_64-xlate.pl"; -+ -+open OUT,"| \"$^X\" $xlate $flavour $output"; -+*STDOUT=*OUT; -+ -+if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` -+ =~ /GNU assembler version ([2-9]\.[0-9]+)/) { -+ $avx = ($1>=2.19) + ($1>=2.22); -+} -+ -+if ($win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && -+ `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { -+ $avx = ($1>=2.09) + ($1>=2.10); -+} -+ -+if ($win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && -+ `ml64 2>&1` =~ /Version ([0-9]+)\./) { -+ $avx = ($1>=10) + ($1>=11); -+} -+ -+if (`$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9])\.([0-9]+)/) { -+ my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10 -+ $avx = ($ver>=3.0) + ($ver>=3.01); -+} -+ -+$code.=<<___; -+.text -+.extern OPENSSL_ia32cap_P -+.align 64 -+.chacha20_consts: -+.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' -+.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' -+.rol8: -+.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 -+.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 -+.rol16: -+.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 -+.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 -+.avx2_init: -+.long 0,0,0,0 -+.sse_inc: -+.long 1,0,0,0 -+.avx2_inc: -+.long 2,0,0,0,2,0,0,0 -+.clamp: -+.quad 0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC -+.quad 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF -+.align 16 -+.and_masks: -+.byte 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 -+.byte 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 -+.byte 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 -+.byte 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 -+.byte 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 -+.byte 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 -+.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 -+.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 -+.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00 -+.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00 -+.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00 -+.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00 -+.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00 -+.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00 -+.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 -+___ -+ -+my ($oup,$inp,$inl,$adp,$keyp,$itr1,$itr2)=("%rdi","%rsi","%rbx","%rcx","%r9","%rcx","%r8"); -+my ($acc0,$acc1,$acc2)=map("%r$_",(10..12)); -+my ($t0,$t1,$t2,$t3)=("%r13","%r14","%r15","%r9"); -+my ($A0,$A1,$A2,$A3,$B0,$B1,$B2,$B3,$C0,$C1,$C2,$C3,$D0,$D1,$D2,$D3)=map("%xmm$_",(0..15)); -+my ($T0,$T1,$T2,$T3)=($A3,$B3,$C3,$D3); -+my $r_store="0*16(%rbp)"; -+my $s_store="1*16(%rbp)"; -+my $len_store="2*16(%rbp)"; -+my $state1_store="3*16(%rbp)"; -+my $state2_store="4*16(%rbp)"; -+my $tmp_store="5*16(%rbp)"; -+my $ctr0_store="6*16(%rbp)"; -+my $ctr1_store="7*16(%rbp)"; -+my $ctr2_store="8*16(%rbp)"; -+my $ctr3_store="9*16(%rbp)"; -+ -+sub chacha_qr { -+my ($a,$b,$c,$d,$t,$dir)=@_; -+$code.="movdqa $t, $tmp_store\n" if ($dir =~ /store/); -+$code.="paddd $b, $a -+ pxor $a, $d -+ pshufb .rol16(%rip), $d -+ paddd $d, $c -+ pxor $c, $b -+ movdqa $b, $t -+ pslld \$12, $t -+ psrld \$20, $b -+ pxor $t, $b -+ paddd $b, $a -+ pxor $a, $d -+ pshufb .rol8(%rip), $d -+ paddd $d, $c -+ pxor $c, $b -+ movdqa $b, $t -+ pslld \$7, $t -+ psrld \$25, $b -+ pxor $t, $b\n"; -+$code.="palignr \$4, $b, $b -+ palignr \$8, $c, $c -+ palignr \$12, $d, $d\n" if ($dir =~ /left/); -+$code.="palignr \$12, $b, $b -+ palignr \$8, $c, $c -+ palignr \$4, $d, $d\n" if ($dir =~ /right/); -+$code.="movdqa $tmp_store, $t\n" if ($dir =~ /load/); -+} -+ -+sub poly_add { -+my ($src)=@_; -+$code.="add $src, $acc0 -+ adc 8+$src, $acc1 -+ adc \$1, $acc2\n"; -+} -+ -+sub poly_stage1 { -+$code.="mov 0+$r_store, %rax -+ mov %rax, $t2 -+ mul $acc0 -+ mov %rax, $t0 -+ mov %rdx, $t1 -+ mov 0+$r_store, %rax -+ mul $acc1 -+ imul $acc2, $t2 -+ add %rax, $t1 -+ adc %rdx, $t2\n"; -+} -+ -+sub poly_stage2 { -+$code.="mov 8+$r_store, %rax -+ mov %rax, $t3 -+ mul $acc0 -+ add %rax, $t1 -+ adc \$0, %rdx -+ mov %rdx, $acc0 -+ mov 8+$r_store, %rax -+ mul $acc1 -+ add %rax, $t2 -+ adc \$0, %rdx\n"; -+} -+ -+sub poly_stage3 { -+$code.="imul $acc2, $t3 -+ add $acc0, $t2 -+ adc %rdx, $t3\n"; -+} -+ -+sub poly_reduce_stage { -+$code.="mov $t0, $acc0 -+ mov $t1, $acc1 -+ mov $t2, $acc2 -+ and \$3, $acc2 -+ mov $t2, $t0 -+ and \$-4, $t0 -+ mov $t3, $t1 -+ shrd \$2, $t3, $t2 -+ shr \$2, $t3 -+ add $t0, $acc0 -+ adc $t1, $acc1 -+ adc \$0, $acc2 -+ add $t2, $acc0 -+ adc $t3, $acc1 -+ adc \$0, $acc2\n"; -+} -+ -+sub poly_mul { -+ &poly_stage1(); -+ &poly_stage2(); -+ &poly_stage3(); -+ &poly_reduce_stage(); -+} -+ -+sub prep_state { -+my ($n)=@_; -+$code.="movdqa .chacha20_consts(%rip), $A0 -+ movdqa $state1_store, $B0 -+ movdqa $state2_store, $C0\n"; -+$code.="movdqa $A0, $A1 -+ movdqa $B0, $B1 -+ movdqa $C0, $C1\n" if ($n ge 2); -+$code.="movdqa $A0, $A2 -+ movdqa $B0, $B2 -+ movdqa $C0, $C2\n" if ($n ge 3); -+$code.="movdqa $A0, $A3 -+ movdqa $B0, $B3 -+ movdqa $C0, $C3\n" if ($n ge 4); -+$code.="movdqa $ctr0_store, $D0 -+ paddd .sse_inc(%rip), $D0 -+ movdqa $D0, $ctr0_store\n" if ($n eq 1); -+$code.="movdqa $ctr0_store, $D1 -+ paddd .sse_inc(%rip), $D1 -+ movdqa $D1, $D0 -+ paddd .sse_inc(%rip), $D0 -+ movdqa $D0, $ctr0_store -+ movdqa $D1, $ctr1_store\n" if ($n eq 2); -+$code.="movdqa $ctr0_store, $D2 -+ paddd .sse_inc(%rip), $D2 -+ movdqa $D2, $D1 -+ paddd .sse_inc(%rip), $D1 -+ movdqa $D1, $D0 -+ paddd .sse_inc(%rip), $D0 -+ movdqa $D0, $ctr0_store -+ movdqa $D1, $ctr1_store -+ movdqa $D2, $ctr2_store\n" if ($n eq 3); -+$code.="movdqa $ctr0_store, $D3 -+ paddd .sse_inc(%rip), $D3 -+ movdqa $D3, $D2 -+ paddd .sse_inc(%rip), $D2 -+ movdqa $D2, $D1 -+ paddd .sse_inc(%rip), $D1 -+ movdqa $D1, $D0 -+ paddd .sse_inc(%rip), $D0 -+ movdqa $D0, $ctr0_store -+ movdqa $D1, $ctr1_store -+ movdqa $D2, $ctr2_store -+ movdqa $D3, $ctr3_store\n" if ($n eq 4); -+} -+ -+sub finalize_state { -+my ($n)=@_; -+$code.="paddd .chacha20_consts(%rip), $A3 -+ paddd $state1_store, $B3 -+ paddd $state2_store, $C3 -+ paddd $ctr3_store, $D3\n" if ($n eq 4); -+$code.="paddd .chacha20_consts(%rip), $A2 -+ paddd $state1_store, $B2 -+ paddd $state2_store, $C2 -+ paddd $ctr2_store, $D2\n" if ($n ge 3); -+$code.="paddd .chacha20_consts(%rip), $A1 -+ paddd $state1_store, $B1 -+ paddd $state2_store, $C1 -+ paddd $ctr1_store, $D1\n" if ($n ge 2); -+$code.="paddd .chacha20_consts(%rip), $A0 -+ paddd $state1_store, $B0 -+ paddd $state2_store, $C0 -+ paddd $ctr0_store, $D0\n"; -+} -+ -+sub xor_stream { -+my ($A, $B, $C, $D, $offset)=@_; -+$code.="movdqu 0*16 + $offset($inp), $A3 -+ movdqu 1*16 + $offset($inp), $B3 -+ movdqu 2*16 + $offset($inp), $C3 -+ movdqu 3*16 + $offset($inp), $D3 -+ pxor $A3, $A -+ pxor $B3, $B -+ pxor $C3, $C -+ pxor $D, $D3 -+ movdqu $A, 0*16 + $offset($oup) -+ movdqu $B, 1*16 + $offset($oup) -+ movdqu $C, 2*16 + $offset($oup) -+ movdqu $D3, 3*16 + $offset($oup)\n"; -+} -+ -+sub xor_stream_using_temp { -+my ($A, $B, $C, $D, $offset, $temp)=@_; -+$code.="movdqa $temp, $tmp_store -+ movdqu 0*16 + $offset($inp), $temp -+ pxor $A, $temp -+ movdqu $temp, 0*16 + $offset($oup) -+ movdqu 1*16 + $offset($inp), $temp -+ pxor $B, $temp -+ movdqu $temp, 1*16 + $offset($oup) -+ movdqu 2*16 + $offset($inp), $temp -+ pxor $C, $temp -+ movdqu $temp, 2*16 + $offset($oup) -+ movdqu 3*16 + $offset($inp), $temp -+ pxor $D, $temp -+ movdqu $temp, 3*16 + $offset($oup)\n"; -+} -+ -+sub gen_chacha_round { -+my ($rot1, $rot2, $shift)=@_; -+my $round=""; -+$round.="movdqa $C0, $tmp_store\n" if ($rot1 eq 20); -+$round.="movdqa $rot2, $C0 -+ paddd $B3, $A3 -+ paddd $B2, $A2 -+ paddd $B1, $A1 -+ paddd $B0, $A0 -+ pxor $A3, $D3 -+ pxor $A2, $D2 -+ pxor $A1, $D1 -+ pxor $A0, $D0 -+ pshufb $C0, $D3 -+ pshufb $C0, $D2 -+ pshufb $C0, $D1 -+ pshufb $C0, $D0 -+ movdqa $tmp_store, $C0 -+ paddd $D3, $C3 -+ paddd $D2, $C2 -+ paddd $D1, $C1 -+ paddd $D0, $C0 -+ pxor $C3, $B3 -+ pxor $C2, $B2 -+ pxor $C1, $B1 -+ pxor $C0, $B0 -+ movdqa $C0, $tmp_store -+ movdqa $B3, $C0 -+ psrld \$$rot1, $C0 -+ pslld \$32-$rot1, $B3 -+ pxor $C0, $B3 -+ movdqa $B2, $C0 -+ psrld \$$rot1, $C0 -+ pslld \$32-$rot1, $B2 -+ pxor $C0, $B2 -+ movdqa $B1, $C0 -+ psrld \$$rot1, $C0 -+ pslld \$32-$rot1, $B1 -+ pxor $C0, $B1 -+ movdqa $B0, $C0 -+ psrld \$$rot1, $C0 -+ pslld \$32-$rot1, $B0 -+ pxor $C0, $B0\n"; -+($s1,$s2,$s3)=(4,8,12) if ($shift =~ /left/); -+($s1,$s2,$s3)=(12,8,4) if ($shift =~ /right/); -+$round.="movdqa $tmp_store, $C0 -+ palignr \$$s1, $B3, $B3 -+ palignr \$$s2, $C3, $C3 -+ palignr \$$s3, $D3, $D3 -+ palignr \$$s1, $B2, $B2 -+ palignr \$$s2, $C2, $C2 -+ palignr \$$s3, $D2, $D2 -+ palignr \$$s1, $B1, $B1 -+ palignr \$$s2, $C1, $C1 -+ palignr \$$s3, $D1, $D1 -+ palignr \$$s1, $B0, $B0 -+ palignr \$$s2, $C0, $C0 -+ palignr \$$s3, $D0, $D0\n" -+if (($shift =~ /left/) || ($shift =~ /right/)); -+return $round; -+}; -+ -+$chacha_body = &gen_chacha_round(20, ".rol16(%rip)") . -+ &gen_chacha_round(25, ".rol8(%rip)", "left") . -+ &gen_chacha_round(20, ".rol16(%rip)") . -+ &gen_chacha_round(25, ".rol8(%rip)", "right"); -+ -+my @loop_body = split /\n/, $chacha_body; -+ -+sub emit_body { -+my ($n)=@_; -+ for (my $i=0; $i < $n; $i++) { -+ $code=$code.shift(@loop_body)."\n"; -+ }; -+} -+ -+{ -+################################################################################ -+# void poly_hash_ad_internal(); -+$code.=" -+.type poly_hash_ad_internal,\@function,2 -+.align 64 -+poly_hash_ad_internal: -+ xor $acc0, $acc0 -+ xor $acc1, $acc1 -+ xor $acc2, $acc2 -+ cmp \$13, $itr2 -+ jne hash_ad_loop -+poly_fast_tls_ad: -+ # Special treatment for the TLS case of 13 bytes -+ mov ($adp), $acc0 -+ mov 5($adp), $acc1 -+ shr \$24, $acc1 -+ mov \$1, $acc2\n"; -+ &poly_mul(); $code.=" -+ ret -+hash_ad_loop: -+ # Hash in 16 byte chunk -+ cmp \$16, $itr2 -+ jb hash_ad_tail\n"; -+ &poly_add("0($adp)"); -+ &poly_mul(); $code.=" -+ lea (1*16)($adp), $adp -+ sub \$16, $itr2 -+ jmp hash_ad_loop -+hash_ad_tail: -+ cmp \$0, $itr2 -+ je 1f -+ # Hash last < 16 byte tail -+ xor $t0, $t0 -+ xor $t1, $t1 -+ xor $t2, $t2 -+ add $itr2, $adp -+hash_ad_tail_loop: -+ shld \$8, $t0, $t1 -+ shl \$8, $t0 -+ movzxb -1($adp), $t2 -+ xor $t2, $t0 -+ dec $adp -+ dec $itr2 -+ jne hash_ad_tail_loop -+ -+ add $t0, $acc0 -+ adc $t1, $acc1 -+ adc \$1, $acc2\n"; -+ &poly_mul(); $code.=" -+ # Finished AD -+1: -+ ret -+.size poly_hash_ad_internal, .-poly_hash_ad_internal\n"; -+} -+ -+{ -+################################################################################ -+# int chacha20_poly1305_open(uint8_t *pt, uint8_t *ct, size_t len_in, uint8_t *ad, size_t len_ad, uint8_t *keyp); -+$code.=" -+.globl chacha20_poly1305_open -+.type chacha20_poly1305_open,\@function,2 -+.align 64 -+chacha20_poly1305_open: -+ push %rbp -+ push %rbx -+ push %r12 -+ push %r13 -+ push %r14 -+ push %r15 -+ sub \$288 + 32, %rsp -+ lea 32(%rsp), %rbp -+ and \$-32, %rbp -+ mov %rdx, 8+$len_store -+ mov %r8, 0+$len_store -+ mov %rdx, $inl\n"; $code.=" -+ mov OPENSSL_ia32cap_P+8(%rip), %eax -+ test \$`1<<5`, %eax -+ jnz chacha20_poly1305_open_avx2\n" if ($avx>1); -+$code.=" -+ cmp \$128, $inl -+ jbe open_sse_128 -+ # For long buffers, prepare the poly key first -+ movdqa .chacha20_consts(%rip), $A0 -+ movdqu 0*16($keyp), $B0 -+ movdqu 1*16($keyp), $C0 -+ movdqu 2*16($keyp), $D0 -+ movdqa $D0, $T1 -+ # Store on stack, to free keyp -+ movdqa $B0, $state1_store -+ movdqa $C0, $state2_store -+ movdqa $D0, $ctr0_store -+ mov \$10, $acc0 -+1: \n"; -+ &chacha_qr($A0,$B0,$C0,$D0,$T0,"left"); -+ &chacha_qr($A0,$B0,$C0,$D0,$T0,"right"); $code.=" -+ dec $acc0 -+ jne 1b -+ # A0|B0 hold the Poly1305 32-byte key, C0,D0 can be discarded -+ paddd .chacha20_consts(%rip), $A0 -+ paddd $state1_store, $B0 -+ # Clamp and store the key -+ pand .clamp(%rip), $A0 -+ movdqa $A0, $r_store -+ movdqa $B0, $s_store -+ # Hash -+ mov %r8, $itr2 -+ call poly_hash_ad_internal -+open_sse_main_loop: -+ cmp \$16*16, $inl -+ jb 2f -+ # Load state, increment counter blocks\n"; -+ &prep_state(4); $code.=" -+ # There are 10 ChaCha20 iterations of 2QR each, so for 6 iterations we -+ # hash 2 blocks, and for the remaining 4 only 1 block - for a total of 16 -+ mov \$4, $itr1 -+ mov $inp, $itr2 -+1: \n"; -+ &emit_body(20); -+ &poly_add("0($itr2)"); $code.=" -+ lea 2*8($itr2), $itr2\n"; -+ &emit_body(20); -+ &poly_stage1(); -+ &emit_body(20); -+ &poly_stage2(); -+ &emit_body(20); -+ &poly_stage3(); -+ &emit_body(20); -+ &poly_reduce_stage(); -+ foreach $l (@loop_body) {$code.=$l."\n";} -+ @loop_body = split /\n/, $chacha_body; $code.=" -+ dec $itr1 -+ jge 1b\n"; -+ &poly_add("0($itr2)"); -+ &poly_mul(); $code.=" -+ lea 2*8($itr2), $itr2 -+ cmp \$-6, $itr1 -+ jg 1b\n"; -+ &finalize_state(4); -+ &xor_stream_using_temp($A3, $B3, $C3, $D3, "0*16", $D0); -+ &xor_stream($A2, $B2, $C2, $D2, "4*16"); -+ &xor_stream($A1, $B1, $C1, $D1, "8*16"); -+ &xor_stream($A0, $B0, $C0, $tmp_store, "12*16"); $code.=" -+ lea 16*16($inp), $inp -+ lea 16*16($oup), $oup -+ sub \$16*16, $inl -+ jmp open_sse_main_loop -+2: -+ # Handle the various tail sizes efficiently -+ test $inl, $inl -+ jz open_sse_finalize -+ cmp \$4*16, $inl -+ ja 3f\n"; -+############################################################################### -+ # At most 64 bytes are left -+ &prep_state(1); $code.=" -+ xor $itr2, $itr2 -+ mov $inl, $itr1 -+ cmp \$16, $itr1 -+ jb 2f -+1: \n"; -+ &poly_add("0($inp, $itr2)"); -+ &poly_mul(); $code.=" -+ sub \$16, $itr1 -+2: -+ add \$16, $itr2\n"; -+ &chacha_qr($A0,$B0,$C0,$D0,$T0,"left"); -+ &chacha_qr($A0,$B0,$C0,$D0,$T0,"right"); $code.=" -+ cmp \$16, $itr1 -+ jae 1b -+ cmp \$10*16, $itr2 -+ jne 2b\n"; -+ &finalize_state(1); $code.=" -+ jmp open_sse_tail_64_dec_loop -+3: -+ cmp \$8*16, $inl -+ ja 3f\n"; -+############################################################################### -+ # 65 - 128 bytes are left -+ &prep_state(2); $code.=" -+ mov $inl, $itr1 -+ and \$-16, $itr1 -+ xor $itr2, $itr2 -+1: \n"; -+ &poly_add("0($inp, $itr2)"); -+ &poly_mul(); $code.=" -+2: -+ add \$16, $itr2\n"; -+ &chacha_qr($A0,$B0,$C0,$D0,$T0,"left"); -+ &chacha_qr($A1,$B1,$C1,$D1,$T0,"left"); -+ &chacha_qr($A0,$B0,$C0,$D0,$T0,"right"); -+ &chacha_qr($A1,$B1,$C1,$D1,$T0,"right");$code.=" -+ cmp $itr1, $itr2 -+ jb 1b -+ cmp \$10*16, $itr2 -+ jne 2b\n"; -+ &finalize_state(2); -+ &xor_stream($A1, $B1, $C1, $D1, "0*16"); $code.=" -+ sub \$4*16, $inl -+ lea 4*16($inp), $inp -+ lea 4*16($oup), $oup -+ jmp open_sse_tail_64_dec_loop -+3: -+ cmp \$12*16, $inl -+ ja 3f\n"; -+############################################################################### -+ # 129 - 192 bytes are left -+ &prep_state(3); $code.=" -+ mov $inl, $itr1 -+ mov \$10*16, $itr2 -+ cmp \$10*16, $itr1 -+ cmovg $itr2, $itr1 -+ and \$-16, $itr1 -+ xor $itr2, $itr2 -+1: \n"; -+ &poly_add("0($inp, $itr2)"); -+ &poly_mul(); $code.=" -+2: -+ add \$16, $itr2\n"; -+ &chacha_qr($A0,$B0,$C0,$D0,$T0,"left"); -+ &chacha_qr($A1,$B1,$C1,$D1,$T0,"left"); -+ &chacha_qr($A2,$B2,$C2,$D2,$T0,"left"); -+ &chacha_qr($A0,$B0,$C0,$D0,$T0,"right"); -+ &chacha_qr($A1,$B1,$C1,$D1,$T0,"right"); -+ &chacha_qr($A2,$B2,$C2,$D2,$T0,"right"); $code.=" -+ cmp $itr1, $itr2 -+ jb 1b -+ cmp \$10*16, $itr2 -+ jne 2b -+ cmp \$11*16, $inl -+ jb 1f\n"; -+ &poly_add("10*16($inp)"); -+ &poly_mul(); $code.=" -+ cmp \$12*16, $inl -+ jb 1f\n"; -+ &poly_add("11*16($inp)"); -+ &poly_mul(); $code.=" -+1: \n"; -+ &finalize_state(3); -+ &xor_stream($A2, $B2, $C2, $D2, "0*16"); -+ &xor_stream($A1, $B1, $C1, $D1, "4*16"); $code.=" -+ sub \$8*16, $inl -+ lea 8*16($inp), $inp -+ lea 8*16($oup), $oup -+ jmp open_sse_tail_64_dec_loop -+3: -+###############################################################################\n"; -+ # 193 - 255 bytes are left -+ &prep_state(4); $code.=" -+ xor $itr2, $itr2 -+1: \n"; -+ &poly_add("0($inp, $itr2)"); -+ &chacha_qr($A0,$B0,$C0,$D0,$C3,"store_left"); -+ &chacha_qr($A1,$B1,$C1,$D1,$C3,"left"); -+ &chacha_qr($A2,$B2,$C2,$D2,$C3,"left_load"); -+ &poly_stage1(); -+ &chacha_qr($A3,$B3,$C3,$D3,$C1,"store_left_load"); -+ &poly_stage2(); -+ &chacha_qr($A0,$B0,$C0,$D0,$C3,"store_right"); -+ &chacha_qr($A1,$B1,$C1,$D1,$C3,"right"); -+ &poly_stage3(); -+ &chacha_qr($A2,$B2,$C2,$D2,$C3,"right_load"); -+ &poly_reduce_stage(); -+ &chacha_qr($A3,$B3,$C3,$D3,$C1,"store_right_load"); $code.=" -+ add \$16, $itr2 -+ cmp \$10*16, $itr2 -+ jb 1b -+ mov $inl, $itr1 -+ and \$-16, $itr1 -+1: \n"; -+ &poly_add("0($inp, $itr2)"); -+ &poly_mul(); $code.=" -+ add \$16, $itr2 -+ cmp $itr1, $itr2 -+ jb 1b\n"; -+ &finalize_state(4); -+ &xor_stream_using_temp($A3, $B3, $C3, $D3, "0*16", $D0); -+ &xor_stream($A2, $B2, $C2, $D2, "4*16"); -+ &xor_stream($A1, $B1, $C1, $D1, "8*16"); $code.=" -+ movdqa $tmp_store, $D0 -+ sub \$12*16, $inl -+ lea 12*16($inp), $inp -+ lea 12*16($oup), $oup -+############################################################################### -+ # Decrypt the remaining data, 16B at a time, using existing stream -+open_sse_tail_64_dec_loop: -+ cmp \$16, $inl -+ jb 1f -+ sub \$16, $inl -+ movdqu ($inp), $T0 -+ pxor $T0, $A0 -+ movdqu $A0, ($oup) -+ lea 16($inp), $inp -+ lea 16($oup), $oup -+ movdqa $B0, $A0 -+ movdqa $C0, $B0 -+ movdqa $D0, $C0 -+ jmp open_sse_tail_64_dec_loop -+1: -+ movdqa $A0, $A1 -+ # Decrypt up to 16B -+open_sse_tail_16: -+ test $inl, $inl -+ jz open_sse_finalize -+ # We can safely load the CT from the end, because it is padded with the MAC -+ mov $inl, $itr2 -+ shl \$4, $itr2 -+ lea .and_masks(%rip), $t0 -+ movdqu ($inp), $T0 -+ add $inl, $inp -+ pand -16($t0, $itr2), $T0 -+ movq $T0, $t0 -+ pextrq \$1, $T0, $t1 -+ pxor $A1, $T0 -+ # We can only store 1 byte at a time, since plaintext can be shorter than 16 bytes -+2: -+ pextrb \$0, $T0, ($oup) -+ psrldq \$1, $T0 -+ inc $oup -+ dec $inl -+ jne 2b -+ -+ add $t0, $acc0 -+ adc $t1, $acc1 -+ adc \$1, $acc2\n"; -+ &poly_mul(); $code.=" -+ -+open_sse_finalize:\n"; -+ &poly_add($len_store); -+ &poly_mul(); $code.=" -+ # Final reduce -+ mov $acc0, $t0 -+ mov $acc1, $t1 -+ mov $acc2, $t2 -+ sub \$-5, $acc0 -+ sbb \$-1, $acc1 -+ sbb \$3, $acc2 -+ cmovc $t0, $acc0 -+ cmovc $t1, $acc1 -+ cmovc $t2, $acc2 -+ # Add in s part of the key -+ add 0+$s_store, $acc0 -+ adc 8+$s_store, $acc1 -+ # Constant time compare -+ xor %rax, %rax -+ mov \$1, %rdx -+ xor 0*8($inp), $acc0 -+ xor 1*8($inp), $acc1 -+ or $acc1, $acc0 -+ cmovz %rdx, %rax -+ -+ add \$288 + 32, %rsp -+ pop %r15 -+ pop %r14 -+ pop %r13 -+ pop %r12 -+ pop %rbx -+ pop %rbp -+ ret -+############################################################################### -+open_sse_128: -+ movdqu .chacha20_consts(%rip), $A0\nmovdqa $A0, $A1\nmovdqa $A0, $A2 -+ movdqu 0*16($keyp), $B0\nmovdqa $B0, $B1\nmovdqa $B0, $B2 -+ movdqu 1*16($keyp), $C0\nmovdqa $C0, $C1\nmovdqa $C0, $C2 -+ movdqu 2*16($keyp), $D0 -+ movdqa $D0, $D1\npaddd .sse_inc(%rip), $D1 -+ movdqa $D1, $D2\npaddd .sse_inc(%rip), $D2 -+ movdqa $B0, $T1\nmovdqa $C0, $T2\nmovdqa $D1, $T3 -+ mov \$10, $acc0 -+1: \n"; -+ &chacha_qr($A0,$B0,$C0,$D0,$T0,"left"); -+ &chacha_qr($A1,$B1,$C1,$D1,$T0,"left"); -+ &chacha_qr($A2,$B2,$C2,$D2,$T0,"left"); -+ &chacha_qr($A0,$B0,$C0,$D0,$T0,"right"); -+ &chacha_qr($A1,$B1,$C1,$D1,$T0,"right"); -+ &chacha_qr($A2,$B2,$C2,$D2,$T0,"right"); $code.=" -+ dec $acc0 -+ jnz 1b -+ paddd .chacha20_consts(%rip), $A0 -+ paddd .chacha20_consts(%rip), $A1 -+ paddd .chacha20_consts(%rip), $A2 -+ paddd $T1, $B0\npaddd $T1, $B1\npaddd $T1, $B2 -+ paddd $T2, $C1\npaddd $T2, $C2 -+ paddd $T3, $D1 -+ paddd .sse_inc(%rip), $T3 -+ paddd $T3, $D2 -+ # Clamp and store the key -+ pand .clamp(%rip), $A0 -+ movdqa $A0, $r_store -+ movdqa $B0, $s_store -+ # Hash -+ mov %r8, $itr2 -+ call poly_hash_ad_internal -+1: -+ cmp \$16, $inl -+ jb open_sse_tail_16 -+ sub \$16, $inl\n"; -+ # Load for hashing -+ &poly_add("0*8($inp)"); $code.=" -+ # Load for decryption -+ movdqu 0*16($inp), $T0 -+ pxor $T0, $A1 -+ movdqu $A1, 0*16($oup) -+ lea 1*16($inp), $inp -+ lea 1*16($oup), $oup\n"; -+ &poly_mul(); $code.=" -+ # Shift the stream left -+ movdqa $B1, $A1 -+ movdqa $C1, $B1 -+ movdqa $D1, $C1 -+ movdqa $A2, $D1 -+ movdqa $B2, $A2 -+ movdqa $C2, $B2 -+ movdqa $D2, $C2 -+ jmp 1b -+ jmp open_sse_tail_16 -+.size chacha20_poly1305_open, .-chacha20_poly1305_open -+################################################################################ -+################################################################################ -+# void chacha20_poly1305_seal(uint8_t *pt, uint8_t *ct, size_t len_in, uint8_t *ad, size_t len_ad, uint8_t *keyp); -+.globl chacha20_poly1305_seal -+.type chacha20_poly1305_seal,\@function,2 -+.align 64 -+chacha20_poly1305_seal: -+ push %rbp -+ push %rbx -+ push %r12 -+ push %r13 -+ push %r14 -+ push %r15 -+ sub \$288 + 32, %rsp -+ lea 32(%rsp), %rbp -+ and \$-32, %rbp -+ mov %rdx, 8+$len_store -+ mov %r8, 0+$len_store -+ mov %rdx, $inl\n"; $code.=" -+ mov OPENSSL_ia32cap_P+8(%rip), %eax -+ test \$`1<<5`, %eax -+ jnz chacha20_poly1305_seal_avx2\n" if ($avx>1); -+$code.=" -+ cmp \$128, $inl -+ jbe seal_sse_128 -+ # For longer buffers, prepare the poly key + some stream -+ movdqa .chacha20_consts(%rip), $A0 -+ movdqu 0*16($keyp), $B0 -+ movdqu 1*16($keyp), $C0 -+ movdqu 2*16($keyp), $D0 -+ movdqa $A0, $A1 -+ movdqa $A0, $A2 -+ movdqa $A0, $A3 -+ movdqa $B0, $B1 -+ movdqa $B0, $B2 -+ movdqa $B0, $B3 -+ movdqa $C0, $C1 -+ movdqa $C0, $C2 -+ movdqa $C0, $C3 -+ movdqa $D0, $D3 -+ paddd .sse_inc(%rip), $D0 -+ movdqa $D0, $D2 -+ paddd .sse_inc(%rip), $D0 -+ movdqa $D0, $D1 -+ paddd .sse_inc(%rip), $D0 -+ # Store on stack -+ movdqa $B0, $state1_store -+ movdqa $C0, $state2_store -+ movdqa $D0, $ctr0_store -+ movdqa $D1, $ctr1_store -+ movdqa $D2, $ctr2_store -+ movdqa $D3, $ctr3_store -+ mov \$10, $acc0 -+1: \n"; -+ foreach $l (@loop_body) {$code.=$l."\n";} -+ @loop_body = split /\n/, $chacha_body; $code.=" -+ dec $acc0 -+ jnz 1b\n"; -+ &finalize_state(4); $code.=" -+ # Clamp and store the key -+ pand .clamp(%rip), $A3 -+ movdqa $A3, $r_store -+ movdqa $B3, $s_store -+ # Hash -+ mov %r8, $itr2 -+ call poly_hash_ad_internal\n"; -+ &xor_stream($A2,$B2,$C2,$D2,"0*16"); -+ &xor_stream($A1,$B1,$C1,$D1,"4*16"); $code.=" -+ cmp \$12*16, $inl -+ ja 1f -+ mov \$8*16, $itr1 -+ sub \$8*16, $inl -+ lea 8*16($inp), $inp -+ jmp seal_sse_128_seal_hash -+1: \n"; -+ &xor_stream($A0, $B0, $C0, $D0, "8*16"); $code.=" -+ mov \$12*16, $itr1 -+ sub \$12*16, $inl -+ lea 12*16($inp), $inp -+ mov \$2, $itr1 -+ mov \$8, $itr2 -+ cmp \$4*16, $inl -+ jbe seal_sse_tail_64 -+ cmp \$8*16, $inl -+ jbe seal_sse_tail_128 -+ cmp \$12*16, $inl -+ jbe seal_sse_tail_192 -+ -+1: \n"; -+ # The main loop -+ &prep_state(4); $code.=" -+2: \n"; -+ &emit_body(20); -+ &poly_add("0($oup)"); -+ &emit_body(20); -+ &poly_stage1(); -+ &emit_body(20); -+ &poly_stage2(); -+ &emit_body(20); -+ &poly_stage3(); -+ &emit_body(20); -+ &poly_reduce_stage(); -+ foreach $l (@loop_body) {$code.=$l."\n";} -+ @loop_body = split /\n/, $chacha_body; $code.=" -+ lea 16($oup), $oup -+ dec $itr2 -+ jge 2b\n"; -+ &poly_add("0*8($oup)"); -+ &poly_mul(); $code.=" -+ lea 16($oup), $oup -+ dec $itr1 -+ jg 2b\n"; -+ -+ &finalize_state(4);$code.=" -+ movdqa $D2, $tmp_store\n"; -+ &xor_stream_using_temp($A3,$B3,$C3,$D3,0*16,$D2); $code.=" -+ movdqa $tmp_store, $D2\n"; -+ &xor_stream($A2,$B2,$C2,$D2, 4*16); -+ &xor_stream($A1,$B1,$C1,$D1, 8*16); $code.=" -+ cmp \$16*16, $inl -+ ja 3f -+ -+ mov \$12*16, $itr1 -+ sub \$12*16, $inl -+ lea 12*16($inp), $inp -+ jmp seal_sse_128_seal_hash -+3: \n"; -+ &xor_stream($A0,$B0,$C0,$D0,"12*16"); $code.=" -+ lea 16*16($inp), $inp -+ sub \$16*16, $inl -+ mov \$6, $itr1 -+ mov \$4, $itr2 -+ cmp \$12*16, $inl -+ jg 1b -+ mov $inl, $itr1 -+ test $inl, $inl -+ je seal_sse_128_seal_hash -+ mov \$6, $itr1 -+ cmp \$4*16, $inl -+ jg 3f -+############################################################################### -+seal_sse_tail_64:\n"; -+ &prep_state(1); $code.=" -+1: \n"; -+ &poly_add("0($oup)"); -+ &poly_mul(); $code.=" -+ lea 16($oup), $oup -+2: \n"; -+ &chacha_qr($A0,$B0,$C0,$D0,$T0,"left"); -+ &chacha_qr($A0,$B0,$C0,$D0,$T0,"right"); -+ &poly_add("0($oup)"); -+ &poly_mul(); $code.=" -+ lea 16($oup), $oup -+ dec $itr1 -+ jg 1b -+ dec $itr2 -+ jge 2b\n"; -+ &finalize_state(1); $code.=" -+ jmp seal_sse_128_seal -+3: -+ cmp \$8*16, $inl -+ jg 3f -+############################################################################### -+seal_sse_tail_128:\n"; -+ &prep_state(2); $code.=" -+1: \n"; -+ &poly_add("0($oup)"); -+ &poly_mul(); $code.=" -+ lea 16($oup), $oup -+2: \n"; -+ &chacha_qr($A0,$B0,$C0,$D0,$T0,"left"); -+ &chacha_qr($A1,$B1,$C1,$D1,$T0,"left"); -+ &poly_add("0($oup)"); -+ &poly_mul(); -+ &chacha_qr($A0,$B0,$C0,$D0,$T0,"right"); -+ &chacha_qr($A1,$B1,$C1,$D1,$T0,"right"); $code.=" -+ lea 16($oup), $oup -+ dec $itr1 -+ jg 1b -+ dec $itr2 -+ jge 2b\n"; -+ &finalize_state(2); -+ &xor_stream($A1,$B1,$C1,$D1,0*16); $code.=" -+ mov \$4*16, $itr1 -+ sub \$4*16, $inl -+ lea 4*16($inp), $inp -+ jmp seal_sse_128_seal_hash -+3: -+############################################################################### -+seal_sse_tail_192:\n"; -+ &prep_state(3); $code.=" -+1: \n"; -+ &poly_add("0($oup)"); -+ &poly_mul(); $code.=" -+ lea 16($oup), $oup -+2: \n"; -+ &chacha_qr($A0,$B0,$C0,$D0,$T0,"left"); -+ &chacha_qr($A1,$B1,$C1,$D1,$T0,"left"); -+ &chacha_qr($A2,$B2,$C2,$D2,$T0,"left"); -+ &poly_add("0($oup)"); -+ &poly_mul(); -+ &chacha_qr($A0,$B0,$C0,$D0,$T0,"right"); -+ &chacha_qr($A1,$B1,$C1,$D1,$T0,"right"); -+ &chacha_qr($A2,$B2,$C2,$D2,$T0,"right"); $code.=" -+ lea 16($oup), $oup -+ dec $itr1 -+ jg 1b -+ dec $itr2 -+ jge 2b\n"; -+ &finalize_state(3); -+ &xor_stream($A2,$B2,$C2,$D2,0*16); -+ &xor_stream($A1,$B1,$C1,$D1,4*16); $code.=" -+ mov \$8*16, $itr1 -+ sub \$8*16, $inl -+ lea 8*16($inp), $inp -+############################################################################### -+seal_sse_128_seal_hash: -+ cmp \$16, $itr1 -+ jb seal_sse_128_seal\n"; -+ &poly_add("0($oup)"); -+ &poly_mul(); $code.=" -+ sub \$16, $itr1 -+ lea 16($oup), $oup -+ jmp seal_sse_128_seal_hash -+ -+seal_sse_128_seal: -+ cmp \$16, $inl -+ jb seal_sse_tail_16 -+ sub \$16, $inl -+ # Load for decryption -+ movdqu 0*16($inp), $T0 -+ pxor $T0, $A0 -+ movdqu $A0, 0*16($oup) -+ # Then hash -+ add 0*8($oup), $acc0 -+ adc 1*8($oup), $acc1 -+ adc \$1, $acc2 -+ lea 1*16($inp), $inp -+ lea 1*16($oup), $oup\n"; -+ &poly_mul(); $code.=" -+ # Shift the stream left -+ movdqa $B0, $A0 -+ movdqa $C0, $B0 -+ movdqa $D0, $C0 -+ movdqa $A1, $D0 -+ movdqa $B1, $A1 -+ movdqa $C1, $B1 -+ movdqa $D1, $C1 -+ jmp seal_sse_128_seal -+ -+seal_sse_tail_16: -+ test $inl, $inl -+ jz seal_sse_finalize -+ # We can only load the PT one byte at a time to avoid buffer overread -+ mov $inl, $itr2 -+ shl \$4, $itr2 -+ lea .and_masks(%rip), $t0 -+ mov $inl, $itr1 -+ lea -1($inp, $inl), $inp -+ pxor $T3, $T3 -+1: -+ pslldq \$1, $T3 -+ pinsrb \$0, ($inp), $T3 -+ lea -1($inp), $inp -+ dec $itr1 -+ jne 1b -+ pxor $A0, $T3 -+ movdqu $T3, ($oup) -+ pand -16($t0, $itr2), $T3 -+ movq $T3, $t0 -+ pextrq \$1, $T3, $t1 -+ add $t0, $acc0 -+ adc $t1, $acc1 -+ adc \$1, $acc2 -+ lea ($inl, $oup), $oup\n"; -+ &poly_mul(); $code.=" -+seal_sse_finalize:\n"; -+ &poly_add($len_store); -+ &poly_mul(); $code.=" -+ # Final reduce -+ mov $acc0, $t0 -+ mov $acc1, $t1 -+ mov $acc2, $t2 -+ sub \$-5, $acc0 -+ sbb \$-1, $acc1 -+ sbb \$3, $acc2 -+ cmovc $t0, $acc0 -+ cmovc $t1, $acc1 -+ cmovc $t2, $acc2 -+ # Add in s part of the key -+ add 0+$s_store, $acc0 -+ adc 8+$s_store, $acc1 -+ mov $acc0, 0*8($oup) -+ mov $acc1, 1*8($oup) -+ add \$288 + 32, %rsp -+ pop %r15 -+ pop %r14 -+ pop %r13 -+ pop %r12 -+ pop %rbx -+ pop %rbp -+ ret -+################################################################################ -+seal_sse_128: -+ movdqu .chacha20_consts(%rip), $A0\nmovdqa $A0, $A1\nmovdqa $A0, $A2 -+ movdqu 0*16($keyp), $B0\nmovdqa $B0, $B1\nmovdqa $B0, $B2 -+ movdqu 1*16($keyp), $C0\nmovdqa $C0, $C1\nmovdqa $C0, $C2 -+ movdqu 2*16($keyp), $D2 -+ movdqa $D2, $D0\npaddd .sse_inc(%rip), $D0 -+ movdqa $D0, $D1\npaddd .sse_inc(%rip), $D1 -+ movdqa $B0, $T1\nmovdqa $C0, $T2\nmovdqa $D0, $T3 -+ mov \$10, $acc0 -+1:\n"; -+ &chacha_qr($A0,$B0,$C0,$D0,$T0,"left"); -+ &chacha_qr($A1,$B1,$C1,$D1,$T0,"left"); -+ &chacha_qr($A2,$B2,$C2,$D2,$T0,"left"); -+ &chacha_qr($A0,$B0,$C0,$D0,$T0,"right"); -+ &chacha_qr($A1,$B1,$C1,$D1,$T0,"right"); -+ &chacha_qr($A2,$B2,$C2,$D2,$T0,"right"); $code.=" -+ dec $acc0 -+ jnz 1b -+ paddd .chacha20_consts(%rip), $A0 -+ paddd .chacha20_consts(%rip), $A1 -+ paddd .chacha20_consts(%rip), $A2 -+ paddd $T1, $B0\npaddd $T1, $B1\npaddd $T1, $B2 -+ paddd $T2, $C0\npaddd $T2, $C1 -+ paddd $T3, $D0 -+ paddd .sse_inc(%rip), $T3 -+ paddd $T3, $D1 -+ # Clamp and store the key -+ pand .clamp(%rip), $A2 -+ movdqa $A2, $r_store -+ movdqa $B2, $s_store -+ # Hash -+ mov %r8, $itr2 -+ call poly_hash_ad_internal -+ jmp seal_sse_128_seal -+.size chacha20_poly1305_seal, .-chacha20_poly1305_seal\n"; -+} -+ -+if ($avx>1) { -+ -+($A0,$A1,$A2,$A3,$B0,$B1,$B2,$B3,$C0,$C1,$C2,$C3,$D0,$D1,$D2,$D3)=map("%ymm$_",(0..15)); -+my ($A0x,$A1x,$A2x,$A3x,$B0x,$B1x,$B2x,$B3x,$C0x,$C1x,$C2x,$C3x,$D0x,$D1x,$D2x,$D3x)=map("%xmm$_",(0..15)); -+($T0,$T1,$T2,$T3)=($A3,$B3,$C3,$D3); -+$state1_store="2*32(%rbp)"; -+$state2_store="3*32(%rbp)"; -+$tmp_store="4*32(%rbp)"; -+$ctr0_store="5*32(%rbp)"; -+$ctr1_store="6*32(%rbp)"; -+$ctr2_store="7*32(%rbp)"; -+$ctr3_store="8*32(%rbp)"; -+ -+sub chacha_qr_avx2 { -+my ($a,$b,$c,$d,$t,$dir)=@_; -+$code.=<<___ if ($dir =~ /store/); -+ vmovdqa $t, $tmp_store -+___ -+$code.=<<___; -+ vpaddd $b, $a, $a -+ vpxor $a, $d, $d -+ vpshufb .rol16(%rip), $d, $d -+ vpaddd $d, $c, $c -+ vpxor $c, $b, $b -+ vpsrld \$20, $b, $t -+ vpslld \$12, $b, $b -+ vpxor $t, $b, $b -+ vpaddd $b, $a, $a -+ vpxor $a, $d, $d -+ vpshufb .rol8(%rip), $d, $d -+ vpaddd $d, $c, $c -+ vpxor $c, $b, $b -+ vpslld \$7, $b, $t -+ vpsrld \$25, $b, $b -+ vpxor $t, $b, $b -+___ -+$code.=<<___ if ($dir =~ /left/); -+ vpalignr \$12, $d, $d, $d -+ vpalignr \$8, $c, $c, $c -+ vpalignr \$4, $b, $b, $b -+___ -+$code.=<<___ if ($dir =~ /right/); -+ vpalignr \$4, $d, $d, $d -+ vpalignr \$8, $c, $c, $c -+ vpalignr \$12, $b, $b, $b -+___ -+$code.=<<___ if ($dir =~ /load/); -+ vmovdqa $tmp_store, $t -+___ -+} -+ -+sub prep_state_avx2 { -+my ($n)=@_; -+$code.=<<___; -+ vmovdqa .chacha20_consts(%rip), $A0 -+ vmovdqa $state1_store, $B0 -+ vmovdqa $state2_store, $C0 -+___ -+$code.=<<___ if ($n ge 2); -+ vmovdqa $A0, $A1 -+ vmovdqa $B0, $B1 -+ vmovdqa $C0, $C1 -+___ -+$code.=<<___ if ($n ge 3); -+ vmovdqa $A0, $A2 -+ vmovdqa $B0, $B2 -+ vmovdqa $C0, $C2 -+___ -+$code.=<<___ if ($n ge 4); -+ vmovdqa $A0, $A3 -+ vmovdqa $B0, $B3 -+ vmovdqa $C0, $C3 -+___ -+$code.=<<___ if ($n eq 1); -+ vmovdqa .avx2_inc(%rip), $D0 -+ vpaddd $ctr0_store, $D0, $D0 -+ vmovdqa $D0, $ctr0_store -+___ -+$code.=<<___ if ($n eq 2); -+ vmovdqa .avx2_inc(%rip), $D0 -+ vpaddd $ctr0_store, $D0, $D1 -+ vpaddd $D1, $D0, $D0 -+ vmovdqa $D0, $ctr0_store -+ vmovdqa $D1, $ctr1_store -+___ -+$code.=<<___ if ($n eq 3); -+ vmovdqa .avx2_inc(%rip), $D0 -+ vpaddd $ctr0_store, $D0, $D2 -+ vpaddd $D2, $D0, $D1 -+ vpaddd $D1, $D0, $D0 -+ vmovdqa $D0, $ctr0_store -+ vmovdqa $D1, $ctr1_store -+ vmovdqa $D2, $ctr2_store -+___ -+$code.=<<___ if ($n eq 4); -+ vmovdqa .avx2_inc(%rip), $D0 -+ vpaddd $ctr0_store, $D0, $D3 -+ vpaddd $D3, $D0, $D2 -+ vpaddd $D2, $D0, $D1 -+ vpaddd $D1, $D0, $D0 -+ vmovdqa $D3, $ctr3_store -+ vmovdqa $D2, $ctr2_store -+ vmovdqa $D1, $ctr1_store -+ vmovdqa $D0, $ctr0_store -+___ -+} -+ -+sub finalize_state_avx2 { -+my ($n)=@_; -+$code.=<<___ if ($n eq 4); -+ vpaddd .chacha20_consts(%rip), $A3, $A3 -+ vpaddd $state1_store, $B3, $B3 -+ vpaddd $state2_store, $C3, $C3 -+ vpaddd $ctr3_store, $D3, $D3 -+___ -+$code.=<<___ if ($n ge 3); -+ vpaddd .chacha20_consts(%rip), $A2, $A2 -+ vpaddd $state1_store, $B2, $B2 -+ vpaddd $state2_store, $C2, $C2 -+ vpaddd $ctr2_store, $D2, $D2 -+___ -+$code.=<<___ if ($n ge 2); -+ vpaddd .chacha20_consts(%rip), $A1, $A1 -+ vpaddd $state1_store, $B1, $B1 -+ vpaddd $state2_store, $C1, $C1 -+ vpaddd $ctr1_store, $D1, $D1 -+___ -+$code.=<<___; -+ vpaddd .chacha20_consts(%rip), $A0, $A0 -+ vpaddd $state1_store, $B0, $B0 -+ vpaddd $state2_store, $C0, $C0 -+ vpaddd $ctr0_store, $D0, $D0 -+___ -+} -+ -+sub xor_stream_avx2 { -+my ($A, $B, $C, $D, $offset, $hlp)=@_; -+$code.=<<___; -+ vperm2i128 \$0x02, $A, $B, $hlp -+ vperm2i128 \$0x13, $A, $B, $B -+ vperm2i128 \$0x02, $C, $D, $A -+ vperm2i128 \$0x13, $C, $D, $C -+ vpxor 0*32+$offset($inp), $hlp, $hlp -+ vpxor 1*32+$offset($inp), $A, $A -+ vpxor 2*32+$offset($inp), $B, $B -+ vpxor 3*32+$offset($inp), $C, $C -+ vmovdqu $hlp, 0*32+$offset($oup) -+ vmovdqu $A, 1*32+$offset($oup) -+ vmovdqu $B, 2*32+$offset($oup) -+ vmovdqu $C, 3*32+$offset($oup) -+___ -+} -+ -+sub finish_stream_avx2 { -+my ($A, $B, $C, $D, $hlp)=@_; -+$code.=<<___; -+ vperm2i128 \$0x13, $A, $B, $hlp -+ vperm2i128 \$0x02, $A, $B, $A -+ vperm2i128 \$0x02, $C, $D, $B -+ vperm2i128 \$0x13, $C, $D, $D -+ vmovdqa $hlp, $C -+___ -+} -+ -+sub poly_stage1_mulx { -+$code.=<<___; -+ mov 0+$r_store, %rdx -+ mov %rdx, $t2 -+ mulx $acc0, $t0, $t1 -+ mulx $acc1, %rax, %rdx -+ imul $acc2, $t2 -+ add %rax, $t1 -+ adc %rdx, $t2 -+___ -+} -+ -+sub poly_stage2_mulx { -+$code.=<<___; -+ mov 8+$r_store, %rdx -+ mulx $acc0, $acc0, %rax -+ add $acc0, $t1 -+ mulx $acc1, $acc1, $t3 -+ adc $acc1, $t2 -+ adc \$0, $t3 -+ imul $acc2, %rdx -+___ -+} -+ -+sub poly_stage3_mulx { -+$code.=<<___; -+ add %rax, $t2 -+ adc %rdx, $t3 -+___ -+} -+ -+sub poly_mul_mulx { -+ &poly_stage1_mulx(); -+ &poly_stage2_mulx(); -+ &poly_stage3_mulx(); -+ &poly_reduce_stage(); -+} -+ -+sub gen_chacha_round_avx2 { -+my ($rot1, $rot2, $shift)=@_; -+my $round=""; -+$round=$round ."vmovdqa $C0, $tmp_store\n" if ($rot1 eq 20); -+$round=$round ."vmovdqa $rot2, $C0 -+ vpaddd $B3, $A3, $A3 -+ vpaddd $B2, $A2, $A2 -+ vpaddd $B1, $A1, $A1 -+ vpaddd $B0, $A0, $A0 -+ vpxor $A3, $D3, $D3 -+ vpxor $A2, $D2, $D2 -+ vpxor $A1, $D1, $D1 -+ vpxor $A0, $D0, $D0 -+ vpshufb $C0, $D3, $D3 -+ vpshufb $C0, $D2, $D2 -+ vpshufb $C0, $D1, $D1 -+ vpshufb $C0, $D0, $D0 -+ vmovdqa $tmp_store, $C0 -+ vpaddd $D3, $C3, $C3 -+ vpaddd $D2, $C2, $C2 -+ vpaddd $D1, $C1, $C1 -+ vpaddd $D0, $C0, $C0 -+ vpxor $C3, $B3, $B3 -+ vpxor $C2, $B2, $B2 -+ vpxor $C1, $B1, $B1 -+ vpxor $C0, $B0, $B0 -+ vmovdqa $C0, $tmp_store -+ vpsrld \$$rot1, $B3, $C0 -+ vpslld \$32-$rot1, $B3, $B3 -+ vpxor $C0, $B3, $B3 -+ vpsrld \$$rot1, $B2, $C0 -+ vpslld \$32-$rot1, $B2, $B2 -+ vpxor $C0, $B2, $B2 -+ vpsrld \$$rot1, $B1, $C0 -+ vpslld \$32-$rot1, $B1, $B1 -+ vpxor $C0, $B1, $B1 -+ vpsrld \$$rot1, $B0, $C0 -+ vpslld \$32-$rot1, $B0, $B0 -+ vpxor $C0, $B0, $B0\n"; -+($s1,$s2,$s3)=(4,8,12) if ($shift =~ /left/); -+($s1,$s2,$s3)=(12,8,4) if ($shift =~ /right/); -+$round=$round ."vmovdqa $tmp_store, $C0 -+ vpalignr \$$s1, $B3, $B3, $B3 -+ vpalignr \$$s2, $C3, $C3, $C3 -+ vpalignr \$$s3, $D3, $D3, $D3 -+ vpalignr \$$s1, $B2, $B2, $B2 -+ vpalignr \$$s2, $C2, $C2, $C2 -+ vpalignr \$$s3, $D2, $D2, $D2 -+ vpalignr \$$s1, $B1, $B1, $B1 -+ vpalignr \$$s2, $C1, $C1, $C1 -+ vpalignr \$$s3, $D1, $D1, $D1 -+ vpalignr \$$s1, $B0, $B0, $B0 -+ vpalignr \$$s2, $C0, $C0, $C0 -+ vpalignr \$$s3, $D0, $D0, $D0\n" -+if (($shift =~ /left/) || ($shift =~ /right/)); -+return $round; -+}; -+ -+$chacha_body = &gen_chacha_round_avx2(20, ".rol16(%rip)") . -+ &gen_chacha_round_avx2(25, ".rol8(%rip)", "left") . -+ &gen_chacha_round_avx2(20, ".rol16(%rip)") . -+ &gen_chacha_round_avx2(25, ".rol8(%rip)", "right"); -+ -+@loop_body = split /\n/, $chacha_body; -+ -+$code.=" -+############################################################################### -+.type chacha20_poly1305_open_avx2,\@function,2 -+.align 64 -+chacha20_poly1305_open_avx2: -+ vzeroupper -+ vmovdqa .chacha20_consts(%rip), $A0 -+ vbroadcasti128 0*16($keyp), $B0 -+ vbroadcasti128 1*16($keyp), $C0 -+ vbroadcasti128 2*16($keyp), $D0 -+ vpaddd .avx2_init(%rip), $D0, $D0 -+ cmp \$6*32, $inl -+ jbe open_avx2_192 -+ cmp \$10*32, $inl -+ jbe open_avx2_320 -+ -+ vmovdqa $B0, $state1_store -+ vmovdqa $C0, $state2_store -+ vmovdqa $D0, $ctr0_store -+ mov \$10, $acc0 -+1: \n"; -+ &chacha_qr_avx2($A0,$B0,$C0,$D0,$T0,"left"); -+ &chacha_qr_avx2($A0,$B0,$C0,$D0,$T0,"right"); $code.=" -+ dec $acc0 -+ jne 1b -+ vpaddd .chacha20_consts(%rip), $A0, $A0 -+ vpaddd $state1_store, $B0, $B0 -+ vpaddd $state2_store, $C0, $C0 -+ vpaddd $ctr0_store, $D0, $D0 -+ -+ vperm2i128 \$0x02, $A0, $B0, $T0 -+ # Clamp and store key -+ vpand .clamp(%rip), $T0, $T0 -+ vmovdqa $T0, $r_store -+ # Stream for the first 64 bytes -+ vperm2i128 \$0x13, $A0, $B0, $A0 -+ vperm2i128 \$0x13, $C0, $D0, $B0 -+ # Hash AD + first 64 bytes -+ mov %r8, $itr2 -+ call poly_hash_ad_internal -+ xor $itr1, $itr1 -+ # Hash first 64 bytes -+1: \n"; -+ &poly_add("0($inp, $itr1)"); -+ &poly_mul(); $code.=" -+ add \$16, $itr1 -+ cmp \$2*32, $itr1 -+ jne 1b -+ # Decrypt first 64 bytes -+ vpxor 0*32($inp), $A0, $A0 -+ vpxor 1*32($inp), $B0, $B0 -+ vmovdqu $A0, 0*32($oup) -+ vmovdqu $B0, 1*32($oup) -+ lea 2*32($inp), $inp -+ lea 2*32($oup), $oup -+ sub \$2*32, $inl -+1: -+ # Hash and decrypt 512 bytes each iteration -+ cmp \$16*32, $inl -+ jb 3f\n"; -+ &prep_state_avx2(4); $code.=" -+ xor $itr1, $itr1 -+2: \n"; -+ &poly_add("0*8($inp, $itr1)"); -+ &emit_body(10); -+ &poly_stage1_mulx(); -+ &emit_body(9); -+ &poly_stage2_mulx(); -+ &emit_body(12); -+ &poly_stage3_mulx(); -+ &emit_body(10); -+ &poly_reduce_stage(); -+ &emit_body(9); -+ &poly_add("2*8($inp, $itr1)"); -+ &emit_body(8); -+ &poly_stage1_mulx(); -+ &emit_body(18); -+ &poly_stage2_mulx(); -+ &emit_body(18); -+ &poly_stage3_mulx(); -+ &emit_body(9); -+ &poly_reduce_stage(); -+ &emit_body(8); -+ &poly_add("4*8($inp, $itr1)"); $code.=" -+ lea 6*8($itr1), $itr1\n"; -+ &emit_body(18); -+ &poly_stage1_mulx(); -+ &emit_body(8); -+ &poly_stage2_mulx(); -+ &emit_body(8); -+ &poly_stage3_mulx(); -+ &emit_body(18); -+ &poly_reduce_stage(); -+ foreach $l (@loop_body) {$code.=$l."\n";} -+ @loop_body = split /\n/, $chacha_body; $code.=" -+ cmp \$10*6*8, $itr1 -+ jne 2b\n"; -+ &finalize_state_avx2(4); $code.=" -+ vmovdqa $A0, $tmp_store\n"; -+ &poly_add("10*6*8($inp)"); -+ &xor_stream_avx2($A3, $B3, $C3, $D3, 0*32, $A0); $code.=" -+ vmovdqa $tmp_store, $A0\n"; -+ &poly_mul(); -+ &xor_stream_avx2($A2, $B2, $C2, $D2, 4*32, $A3); -+ &poly_add("10*6*8+2*8($inp)"); -+ &xor_stream_avx2($A1, $B1, $C1, $D1, 8*32, $A3); -+ &poly_mul(); -+ &xor_stream_avx2($A0, $B0, $C0, $D0, 12*32, $A3); $code.=" -+ lea 16*32($inp), $inp -+ lea 16*32($oup), $oup -+ sub \$16*32, $inl -+ jmp 1b -+3: -+ test $inl, $inl -+ vzeroupper -+ je open_sse_finalize -+3: -+ cmp \$4*32, $inl -+ ja 3f\n"; -+############################################################################### -+ # 1-128 bytes left -+ &prep_state_avx2(1); $code.=" -+ xor $itr2, $itr2 -+ mov $inl, $itr1 -+ and \$-16, $itr1 -+ test $itr1, $itr1 -+ je 2f -+1: \n"; -+ &poly_add("0*8($inp, $itr2)"); -+ &poly_mul(); $code.=" -+2: -+ add \$16, $itr2\n"; -+ &chacha_qr_avx2($A0,$B0,$C0,$D0,$T0,"left"); -+ &chacha_qr_avx2($A0,$B0,$C0,$D0,$T0,"right"); $code.=" -+ cmp $itr1, $itr2 -+ jb 1b -+ cmp \$160, $itr2 -+ jne 2b\n"; -+ &finalize_state_avx2(1); -+ &finish_stream_avx2($A0,$B0,$C0,$D0,$T0); $code.=" -+ jmp open_avx2_tail_loop -+3: -+ cmp \$8*32, $inl -+ ja 3f\n"; -+############################################################################### -+ # 129-256 bytes left -+ &prep_state_avx2(2); $code.=" -+ mov $inl, $tmp_store -+ mov $inl, $itr1 -+ sub \$4*32, $itr1 -+ shr \$4, $itr1 -+ mov \$10, $itr2 -+ cmp \$10, $itr1 -+ cmovg $itr2, $itr1 -+ mov $inp, $inl -+ xor $itr2, $itr2 -+1: \n"; -+ &poly_add("0*8($inl)"); -+ &poly_mul_mulx(); $code.=" -+ lea 16($inl), $inl -+2: \n"; -+ &chacha_qr_avx2($A0,$B0,$C0,$D0,$T0,"left"); -+ &chacha_qr_avx2($A1,$B1,$C1,$D1,$T0,"left"); $code.=" -+ inc $itr2\n"; -+ &chacha_qr_avx2($A0,$B0,$C0,$D0,$T0,"right"); -+ &chacha_qr_avx2($A1,$B1,$C1,$D1,$T0,"right"); -+ &chacha_qr_avx2($A2,$B2,$C2,$D2,$T0,"right"); $code.=" -+ cmp $itr1, $itr2 -+ jb 1b -+ cmp \$10, $itr2 -+ jne 2b -+ mov $inl, $itr2 -+ sub $inp, $inl -+ mov $inl, $itr1 -+ mov $tmp_store, $inl -+1: -+ add \$16, $itr1 -+ cmp $inl, $itr1 -+ jg 1f\n"; -+ &poly_add("0*8($itr2)"); -+ &poly_mul_mulx(); $code.=" -+ lea 16($itr2), $itr2 -+ jmp 1b -+1: \n"; -+ &finalize_state_avx2(2); -+ &xor_stream_avx2($A1, $B1, $C1, $D1, 0*32, $T0); -+ &finish_stream_avx2($A0, $B0, $C0, $D0, $T0); $code.=" -+ lea 4*32($inp), $inp -+ lea 4*32($oup), $oup -+ sub \$4*32, $inl -+ jmp open_avx2_tail_loop -+3: -+ cmp \$12*32, $inl -+ ja 3f\n"; -+############################################################################### -+ # 257-383 bytes left -+ &prep_state_avx2(3); $code.=" -+ mov $inl, $tmp_store -+ mov $inl, $itr1 -+ sub \$8*32, $itr1 -+ shr \$4, $itr1 -+ add \$6, $itr1 -+ mov \$10, $itr2 -+ cmp \$10, $itr1 -+ cmovg $itr2, $itr1 -+ mov $inp, $inl -+ xor $itr2, $itr2 -+1: \n"; -+ &poly_add("0*8($inl)"); -+ &poly_mul_mulx(); $code.=" -+ lea 16($inl), $inl -+2: \n"; -+ &chacha_qr_avx2($A2,$B2,$C2,$D2,$T0,"left"); -+ &chacha_qr_avx2($A1,$B1,$C1,$D1,$T0,"left"); -+ &chacha_qr_avx2($A0,$B0,$C0,$D0,$T0,"left"); -+ &poly_add("0*8($inl)"); -+ &poly_mul(); $code.=" -+ lea 16($inl), $inl -+ inc $itr2\n"; -+ &chacha_qr_avx2($A2,$B2,$C2,$D2,$T0,"right"); -+ &chacha_qr_avx2($A1,$B1,$C1,$D1,$T0,"right"); -+ &chacha_qr_avx2($A0,$B0,$C0,$D0,$T0,"right"); $code.=" -+ cmp $itr1, $itr2 -+ jb 1b -+ cmp \$10, $itr2 -+ jne 2b -+ mov $inl, $itr2 -+ sub $inp, $inl -+ mov $inl, $itr1 -+ mov $tmp_store, $inl -+1: -+ add \$16, $itr1 -+ cmp $inl, $itr1 -+ jg 1f\n"; -+ &poly_add("0*8($itr2)"); -+ &poly_mul_mulx(); $code.=" -+ lea 16($itr2), $itr2 -+ jmp 1b -+1: \n"; -+ &finalize_state_avx2(3); -+ &xor_stream_avx2($A2, $B2, $C2, $D2, 0*32, $T0); -+ &xor_stream_avx2($A1, $B1, $C1, $D1, 4*32, $T0); -+ &finish_stream_avx2($A0, $B0, $C0, $D0, $T0); $code.=" -+ lea 8*32($inp), $inp -+ lea 8*32($oup), $oup -+ sub \$8*32, $inl -+ jmp open_avx2_tail_loop -+3: \n"; -+############################################################################### -+ # 384-512 bytes left -+ &prep_state_avx2(4); $code.=" -+ xor $itr1, $itr1 -+ mov $inp, $itr2 -+1: \n"; -+ &poly_add("0*8($itr2)"); -+ &poly_mul(); $code.=" -+ lea 2*8($itr2), $itr2 -+2: \n"; -+ &emit_body(37); -+ &poly_add("0*8($itr2)"); -+ &poly_mul_mulx(); -+ &emit_body(48); -+ &poly_add("2*8($itr2)"); -+ &poly_mul_mulx(); $code.=" -+ lea 4*8($itr2), $itr2\n"; -+ foreach $l (@loop_body) {$code.=$l."\n";} -+ @loop_body = split /\n/, $chacha_body; $code.=" -+ inc $itr1 -+ cmp \$4, $itr1 -+ jl 1b -+ cmp \$10, $itr1 -+ jne 2b -+ mov $inl, $itr1 -+ sub \$12*32, $itr1 -+ and \$-16, $itr1 -+1: -+ test $itr1, $itr1 -+ je 1f\n"; -+ &poly_add("0*8($itr2)"); -+ &poly_mul_mulx(); $code.=" -+ lea 2*8($itr2), $itr2 -+ sub \$2*8, $itr1 -+ jmp 1b -+1: \n"; -+ &finalize_state_avx2(4); $code.=" -+ vmovdqa $A0, $tmp_store\n"; -+ &xor_stream_avx2($A3, $B3, $C3, $D3, 0*32, $A0); $code.=" -+ vmovdqa $tmp_store, $A0\n"; -+ &xor_stream_avx2($A2, $B2, $C2, $D2, 4*32, $A3); -+ &xor_stream_avx2($A1, $B1, $C1, $D1, 8*32, $A3); -+ &finish_stream_avx2($A0, $B0, $C0, $D0, $A3); $code.=" -+ lea 12*32($inp), $inp -+ lea 12*32($oup), $oup -+ sub \$12*32, $inl -+open_avx2_tail_loop: -+ cmp \$32, $inl -+ jb open_avx2_tail -+ sub \$32, $inl -+ vpxor ($inp), $A0, $A0 -+ vmovdqu $A0, ($oup) -+ lea 1*32($inp), $inp -+ lea 1*32($oup), $oup -+ vmovdqa $B0, $A0 -+ vmovdqa $C0, $B0 -+ vmovdqa $D0, $C0 -+ jmp open_avx2_tail_loop -+open_avx2_tail: -+ cmp \$16, $inl -+ vmovdqa $A0x, $A1x -+ jb 1f -+ sub \$16, $inl -+ #load for decryption -+ vpxor ($inp), $A0x, $A1x -+ vmovdqu $A1x, ($oup) -+ lea 1*16($inp), $inp -+ lea 1*16($oup), $oup -+ vperm2i128 \$0x11, $A0, $A0, $A0 -+ vmovdqa $A0x, $A1x -+1: -+ vzeroupper -+ jmp open_sse_tail_16 -+############################################################################### -+open_avx2_192: -+ vmovdqa $A0, $A1 -+ vmovdqa $A0, $A2 -+ vmovdqa $B0, $B1 -+ vmovdqa $B0, $B2 -+ vmovdqa $C0, $C1 -+ vmovdqa $C0, $C2 -+ vpaddd .avx2_inc(%rip), $D0, $D1 -+ vmovdqa $D0, $T2 -+ vmovdqa $D1, $T3 -+ mov \$10, $acc0 -+1: \n"; -+ &chacha_qr_avx2($A0,$B0,$C0,$D0,$T0,"left"); -+ &chacha_qr_avx2($A1,$B1,$C1,$D1,$T0,"left"); -+ &chacha_qr_avx2($A0,$B0,$C0,$D0,$T0,"right"); -+ &chacha_qr_avx2($A1,$B1,$C1,$D1,$T0,"right"); $code.=" -+ dec $acc0 -+ jne 1b -+ vpaddd $A2, $A0, $A0 -+ vpaddd $A2, $A1, $A1 -+ vpaddd $B2, $B0, $B0 -+ vpaddd $B2, $B1, $B1 -+ vpaddd $C2, $C0, $C0 -+ vpaddd $C2, $C1, $C1 -+ vpaddd $T2, $D0, $D0 -+ vpaddd $T3, $D1, $D1 -+ vperm2i128 \$0x02, $A0, $B0, $T0 -+ # Clamp and store the key -+ vpand .clamp(%rip), $T0, $T0 -+ vmovdqa $T0, $r_store -+ # Stream for up to 192 bytes -+ vperm2i128 \$0x13, $A0, $B0, $A0 -+ vperm2i128 \$0x13, $C0, $D0, $B0 -+ vperm2i128 \$0x02, $A1, $B1, $C0 -+ vperm2i128 \$0x02, $C1, $D1, $D0 -+ vperm2i128 \$0x13, $A1, $B1, $A1 -+ vperm2i128 \$0x13, $C1, $D1, $B1 -+open_avx2_short: -+ mov %r8, $itr2 -+ call poly_hash_ad_internal -+open_avx2_hash_and_xor_loop: -+ cmp \$32, $inl -+ jb open_avx2_short_tail_32 -+ sub \$32, $inl\n"; -+ # Load + hash -+ &poly_add("0*8($inp)"); -+ &poly_mul(); -+ &poly_add("2*8($inp)"); -+ &poly_mul(); $code.=" -+ # Load + decrypt -+ vpxor ($inp), $A0, $A0 -+ vmovdqu $A0, ($oup) -+ lea 1*32($inp), $inp -+ lea 1*32($oup), $oup -+ # Shift stream -+ vmovdqa $B0, $A0 -+ vmovdqa $C0, $B0 -+ vmovdqa $D0, $C0 -+ vmovdqa $A1, $D0 -+ vmovdqa $B1, $A1 -+ vmovdqa $C1, $B1 -+ vmovdqa $D1, $C1 -+ vmovdqa $A2, $D1 -+ vmovdqa $B2, $A2 -+ jmp open_avx2_hash_and_xor_loop -+open_avx2_short_tail_32: -+ cmp \$16, $inl -+ vmovdqa $A0x, $A1x -+ jb 1f -+ sub \$16, $inl\n"; -+ &poly_add("0*8($inp)"); -+ &poly_mul(); $code.=" -+ vpxor ($inp), $A0x, $A3x -+ vmovdqu $A3x, ($oup) -+ lea 1*16($inp), $inp -+ lea 1*16($oup), $oup -+ vextracti128 \$1, $A0, $A1x -+1: -+ vzeroupper -+ jmp open_sse_tail_16 -+############################################################################### -+open_avx2_320: -+ vmovdqa $A0, $A1 -+ vmovdqa $A0, $A2 -+ vmovdqa $B0, $B1 -+ vmovdqa $B0, $B2 -+ vmovdqa $C0, $C1 -+ vmovdqa $C0, $C2 -+ vpaddd .avx2_inc(%rip), $D0, $D1 -+ vpaddd .avx2_inc(%rip), $D1, $D2 -+ vmovdqa $B0, $T1 -+ vmovdqa $C0, $T2 -+ vmovdqa $D0, $ctr0_store -+ vmovdqa $D1, $ctr1_store -+ vmovdqa $D2, $ctr2_store -+ mov \$10, $acc0 -+1: \n"; -+ &chacha_qr_avx2($A0,$B0,$C0,$D0,$T0,"left"); -+ &chacha_qr_avx2($A1,$B1,$C1,$D1,$T0,"left"); -+ &chacha_qr_avx2($A2,$B2,$C2,$D2,$T0,"left"); -+ &chacha_qr_avx2($A0,$B0,$C0,$D0,$T0,"right"); -+ &chacha_qr_avx2($A1,$B1,$C1,$D1,$T0,"right"); -+ &chacha_qr_avx2($A2,$B2,$C2,$D2,$T0,"right"); $code.=" -+ dec $acc0 -+ jne 1b -+ vpaddd .chacha20_consts(%rip), $A0, $A0 -+ vpaddd .chacha20_consts(%rip), $A1, $A1 -+ vpaddd .chacha20_consts(%rip), $A2, $A2 -+ vpaddd $T1, $B0, $B0 -+ vpaddd $T1, $B1, $B1 -+ vpaddd $T1, $B2, $B2 -+ vpaddd $T2, $C0, $C0 -+ vpaddd $T2, $C1, $C1 -+ vpaddd $T2, $C2, $C2 -+ vpaddd $ctr0_store, $D0, $D0 -+ vpaddd $ctr1_store, $D1, $D1 -+ vpaddd $ctr2_store, $D2, $D2 -+ vperm2i128 \$0x02, $A0, $B0, $T0 -+ # Clamp and store the key -+ vpand .clamp(%rip), $T0, $T0 -+ vmovdqa $T0, $r_store -+ # Stream for up to 320 bytes -+ vperm2i128 \$0x13, $A0, $B0, $A0 -+ vperm2i128 \$0x13, $C0, $D0, $B0 -+ vperm2i128 \$0x02, $A1, $B1, $C0 -+ vperm2i128 \$0x02, $C1, $D1, $D0 -+ vperm2i128 \$0x13, $A1, $B1, $A1 -+ vperm2i128 \$0x13, $C1, $D1, $B1 -+ vperm2i128 \$0x02, $A2, $B2, $C1 -+ vperm2i128 \$0x02, $C2, $D2, $D1 -+ vperm2i128 \$0x13, $A2, $B2, $A2 -+ vperm2i128 \$0x13, $C2, $D2, $B2 -+ jmp open_avx2_short -+.size chacha20_poly1305_open_avx2, .-chacha20_poly1305_open_avx2 -+############################################################################### -+############################################################################### -+.type chacha20_poly1305_seal_avx2,\@function,2 -+.align 64 -+chacha20_poly1305_seal_avx2: -+ vzeroupper -+ vmovdqa .chacha20_consts(%rip), $A0 -+ vbroadcasti128 0*16($keyp), $B0 -+ vbroadcasti128 1*16($keyp), $C0 -+ vbroadcasti128 2*16($keyp), $D0 -+ vpaddd .avx2_init(%rip), $D0, $D0 -+ cmp \$6*32, $inl -+ jbe seal_avx2_192 -+ cmp \$10*32, $inl -+ jbe seal_avx2_320 -+ vmovdqa $A0, $A1 -+ vmovdqa $A0, $A2 -+ vmovdqa $A0, $A3 -+ vmovdqa $B0, $B1 -+ vmovdqa $B0, $B2 -+ vmovdqa $B0, $B3 -+ vmovdqa $B0, $state1_store -+ vmovdqa $C0, $C1 -+ vmovdqa $C0, $C2 -+ vmovdqa $C0, $C3 -+ vmovdqa $C0, $state2_store -+ vmovdqa $D0, $D3 -+ vpaddd .avx2_inc(%rip), $D3, $D2 -+ vpaddd .avx2_inc(%rip), $D2, $D1 -+ vpaddd .avx2_inc(%rip), $D1, $D0 -+ vmovdqa $D0, $ctr0_store -+ vmovdqa $D1, $ctr1_store -+ vmovdqa $D2, $ctr2_store -+ vmovdqa $D3, $ctr3_store -+ mov \$10, $acc0 -+1: \n"; -+ foreach $l (@loop_body) {$code.=$l."\n";} -+ @loop_body = split /\n/, $chacha_body; $code.=" -+ dec $acc0 -+ jnz 1b\n"; -+ &finalize_state_avx2(4); $code.=" -+ vperm2i128 \$0x13, $C3, $D3, $C3 -+ vperm2i128 \$0x02, $A3, $B3, $D3 -+ vperm2i128 \$0x13, $A3, $B3, $A3 -+ vpand .clamp(%rip), $D3, $D3 -+ vmovdqa $D3, $r_store -+ mov %r8, $itr2 -+ call poly_hash_ad_internal -+ # Safely store 320 bytes (otherwise would handle with optimized call) -+ vpxor 0*32($inp), $A3, $A3 -+ vpxor 1*32($inp), $C3, $C3 -+ vmovdqu $A3, 0*32($oup) -+ vmovdqu $C3, 1*32($oup)\n"; -+ &xor_stream_avx2($A2,$B2,$C2,$D2,2*32,$T3); -+ &xor_stream_avx2($A1,$B1,$C1,$D1,6*32,$T3); -+ &finish_stream_avx2($A0,$B0,$C0,$D0,$T3); $code.=" -+ lea 10*32($inp), $inp -+ sub \$10*32, $inl -+ mov \$10*32, $itr1 -+ cmp \$4*32, $inl -+ jbe seal_avx2_hash -+ vpxor 0*32($inp), $A0, $A0 -+ vpxor 1*32($inp), $B0, $B0 -+ vpxor 2*32($inp), $C0, $C0 -+ vpxor 3*32($inp), $D0, $D0 -+ vmovdqu $A0, 10*32($oup) -+ vmovdqu $B0, 11*32($oup) -+ vmovdqu $C0, 12*32($oup) -+ vmovdqu $D0, 13*32($oup) -+ lea 4*32($inp), $inp -+ sub \$4*32, $inl -+ mov \$8, $itr1 -+ mov \$2, $itr2 -+ cmp \$4*32, $inl -+ jbe seal_avx2_tail_128 -+ cmp \$8*32, $inl -+ jbe seal_avx2_tail_256 -+ cmp \$12*32, $inl -+ jbe seal_avx2_tail_384 -+ cmp \$16*32, $inl -+ jbe seal_avx2_tail_512\n"; -+ # We have 448 bytes to hash, but main loop hashes 512 bytes at a time - perform some rounds, before the main loop -+ &prep_state_avx2(4); -+ foreach $l (@loop_body) {$code.=$l."\n";} -+ @loop_body = split /\n/, $chacha_body; -+ &emit_body(41); -+ @loop_body = split /\n/, $chacha_body; $code.=" -+ sub \$16, $oup -+ mov \$9, $itr1 -+ jmp 4f -+1: \n"; -+ &prep_state_avx2(4); $code.=" -+ mov \$10, $itr1 -+2: \n"; -+ &poly_add("0*8($oup)"); -+ &emit_body(10); -+ &poly_stage1_mulx(); -+ &emit_body(9); -+ &poly_stage2_mulx(); -+ &emit_body(12); -+ &poly_stage3_mulx(); -+ &emit_body(10); -+ &poly_reduce_stage(); $code.=" -+4: \n"; -+ &emit_body(9); -+ &poly_add("2*8($oup)"); -+ &emit_body(8); -+ &poly_stage1_mulx(); -+ &emit_body(18); -+ &poly_stage2_mulx(); -+ &emit_body(18); -+ &poly_stage3_mulx(); -+ &emit_body(9); -+ &poly_reduce_stage(); -+ &emit_body(8); -+ &poly_add("4*8($oup)"); $code.=" -+ lea 6*8($oup), $oup\n"; -+ &emit_body(18); -+ &poly_stage1_mulx(); -+ &emit_body(8); -+ &poly_stage2_mulx(); -+ &emit_body(8); -+ &poly_stage3_mulx(); -+ &emit_body(18); -+ &poly_reduce_stage(); -+ foreach $l (@loop_body) {$code.=$l."\n";} -+ @loop_body = split /\n/, $chacha_body; $code.=" -+ dec $itr1 -+ jne 2b\n"; -+ &finalize_state_avx2(4); $code.=" -+ lea 4*8($oup), $oup -+ vmovdqa $A0, $tmp_store\n"; -+ &poly_add("-4*8($oup)"); -+ &xor_stream_avx2($A3, $B3, $C3, $D3, 0*32, $A0); $code.=" -+ vmovdqa $tmp_store, $A0\n"; -+ &poly_mul(); -+ &xor_stream_avx2($A2, $B2, $C2, $D2, 4*32, $A3); -+ &poly_add("-2*8($oup)"); -+ &xor_stream_avx2($A1, $B1, $C1, $D1, 8*32, $A3); -+ &poly_mul(); -+ &xor_stream_avx2($A0, $B0, $C0, $D0, 12*32, $A3); $code.=" -+ lea 16*32($inp), $inp -+ sub \$16*32, $inl -+ cmp \$16*32, $inl -+ jg 1b\n"; -+ &poly_add("0*8($oup)"); -+ &poly_mul(); -+ &poly_add("2*8($oup)"); -+ &poly_mul(); $code.=" -+ lea 4*8($oup), $oup -+ mov \$10, $itr1 -+ xor $itr2, $itr2 -+ cmp \$4*32, $inl -+ ja 3f -+############################################################################### -+seal_avx2_tail_128:\n"; -+ &prep_state_avx2(1); $code.=" -+1: \n"; -+ &poly_add("0($oup)"); -+ &poly_mul(); $code.=" -+ lea 2*8($oup), $oup -+2: \n"; -+ &chacha_qr_avx2($A0,$B0,$C0,$D0,$T0,"left"); -+ &poly_add("0*8($oup)"); -+ &poly_mul(); -+ &chacha_qr_avx2($A0,$B0,$C0,$D0,$T0,"right"); -+ &poly_add("2*8($oup)"); -+ &poly_mul(); $code.=" -+ lea 4*8($oup), $oup -+ dec $itr1 -+ jg 1b -+ dec $itr2 -+ jge 2b\n"; -+ &finalize_state_avx2(1); -+ &finish_stream_avx2($A0,$B0,$C0,$D0,$T0); $code.=" -+ jmp seal_avx2_short_loop -+3: -+ cmp \$8*32, $inl -+ ja 3f -+############################################################################### -+seal_avx2_tail_256:\n"; -+ &prep_state_avx2(2); $code.=" -+1: \n"; -+ &poly_add("0($oup)"); -+ &poly_mul(); $code.=" -+ lea 2*8($oup), $oup -+2: \n"; -+ &chacha_qr_avx2($A0,$B0,$C0,$D0,$T0,"left"); -+ &chacha_qr_avx2($A1,$B1,$C1,$D1,$T0,"left"); -+ &poly_add("0*8($oup)"); -+ &poly_mul(); -+ &chacha_qr_avx2($A0,$B0,$C0,$D0,$T0,"right"); -+ &chacha_qr_avx2($A1,$B1,$C1,$D1,$T0,"right"); -+ &poly_add("2*8($oup)"); -+ &poly_mul(); $code.=" -+ lea 4*8($oup), $oup -+ dec $itr1 -+ jg 1b -+ dec $itr2 -+ jge 2b\n"; -+ &finalize_state_avx2(2); -+ &xor_stream_avx2($A1,$B1,$C1,$D1,0*32,$T0); -+ &finish_stream_avx2($A0,$B0,$C0,$D0,$T0); $code.=" -+ mov \$4*32, $itr1 -+ lea 4*32($inp), $inp -+ sub \$4*32, $inl -+ jmp seal_avx2_hash -+3: -+ cmp \$12*32, $inl -+ ja seal_avx2_tail_512 -+############################################################################### -+seal_avx2_tail_384:\n"; -+ &prep_state_avx2(3); $code.=" -+1: \n"; -+ &poly_add("0($oup)"); -+ &poly_mul(); $code.=" -+ lea 2*8($oup), $oup -+2: \n"; -+ &chacha_qr_avx2($A0,$B0,$C0,$D0,$T0,"left"); -+ &chacha_qr_avx2($A1,$B1,$C1,$D1,$T0,"left"); -+ &poly_add("0*8($oup)"); -+ &poly_mul(); -+ &chacha_qr_avx2($A2,$B2,$C2,$D2,$T0,"left"); -+ &chacha_qr_avx2($A0,$B0,$C0,$D0,$T0,"right"); -+ &poly_add("2*8($oup)"); -+ &poly_mul(); -+ &chacha_qr_avx2($A1,$B1,$C1,$D1,$T0,"right"); -+ &chacha_qr_avx2($A2,$B2,$C2,$D2,$T0,"right"); $code.=" -+ lea 4*8($oup), $oup -+ dec $itr1 -+ jg 1b -+ dec $itr2 -+ jge 2b\n"; -+ &finalize_state_avx2(3); -+ &xor_stream_avx2($A2,$B2,$C2,$D2,0*32,$T0); -+ &xor_stream_avx2($A1,$B1,$C1,$D1,4*32,$T0); -+ &finish_stream_avx2($A0,$B0,$C0,$D0,$T0); $code.=" -+ mov \$8*32, $itr1 -+ lea 8*32($inp), $inp -+ sub \$8*32, $inl -+ jmp seal_avx2_hash -+############################################################################### -+seal_avx2_tail_512:\n"; -+ &prep_state_avx2(4); $code.=" -+1: \n"; -+ &poly_add("0($oup)"); -+ &poly_mul_mulx(); $code.=" -+ lea 2*8($oup), $oup -+2: \n"; -+ &emit_body(20); -+ &poly_add("0*8($oup)"); -+ &emit_body(20); -+ &poly_stage1_mulx(); -+ &emit_body(20); -+ &poly_stage2_mulx(); -+ &emit_body(20); -+ &poly_stage3_mulx(); -+ &emit_body(20); -+ &poly_reduce_stage(); -+ &emit_body(20); -+ &poly_add("2*8($oup)"); -+ &emit_body(20); -+ &poly_stage1_mulx(); -+ &emit_body(20); -+ &poly_stage2_mulx(); -+ &emit_body(20); -+ &poly_stage3_mulx(); -+ &emit_body(20); -+ &poly_reduce_stage(); -+ foreach $l (@loop_body) {$code.=$l."\n";} -+ @loop_body = split /\n/, $chacha_body; $code.=" -+ lea 4*8($oup), $oup -+ dec $itr1 -+ jg 1b -+ dec $itr2 -+ jge 2b\n"; -+ &finalize_state_avx2(4); $code.=" -+ vmovdqa $A0, $tmp_store\n"; -+ &xor_stream_avx2($A3, $B3, $C3, $D3, 0*32, $A0); $code.=" -+ vmovdqa $tmp_store, $A0\n"; -+ &xor_stream_avx2($A2, $B2, $C2, $D2, 4*32, $A3); -+ &xor_stream_avx2($A1, $B1, $C1, $D1, 8*32, $A3); -+ &finish_stream_avx2($A0,$B0,$C0,$D0,$T0); $code.=" -+ mov \$12*32, $itr1 -+ lea 12*32($inp), $inp -+ sub \$12*32, $inl -+ jmp seal_avx2_hash -+################################################################################ -+seal_avx2_320: -+ vmovdqa $A0, $A1 -+ vmovdqa $A0, $A2 -+ vmovdqa $B0, $B1 -+ vmovdqa $B0, $B2 -+ vmovdqa $C0, $C1 -+ vmovdqa $C0, $C2 -+ vpaddd .avx2_inc(%rip), $D0, $D1 -+ vpaddd .avx2_inc(%rip), $D1, $D2 -+ vmovdqa $B0, $T1 -+ vmovdqa $C0, $T2 -+ vmovdqa $D0, $ctr0_store -+ vmovdqa $D1, $ctr1_store -+ vmovdqa $D2, $ctr2_store -+ mov \$10, $acc0 -+1: \n"; -+ &chacha_qr_avx2($A0,$B0,$C0,$D0,$T0,"left"); -+ &chacha_qr_avx2($A1,$B1,$C1,$D1,$T0,"left"); -+ &chacha_qr_avx2($A2,$B2,$C2,$D2,$T0,"left"); -+ &chacha_qr_avx2($A0,$B0,$C0,$D0,$T0,"right"); -+ &chacha_qr_avx2($A1,$B1,$C1,$D1,$T0,"right"); -+ &chacha_qr_avx2($A2,$B2,$C2,$D2,$T0,"right"); $code.=" -+ dec $acc0 -+ jne 1b -+ vpaddd .chacha20_consts(%rip), $A0, $A0 -+ vpaddd .chacha20_consts(%rip), $A1, $A1 -+ vpaddd .chacha20_consts(%rip), $A2, $A2 -+ vpaddd $T1, $B0, $B0 -+ vpaddd $T1, $B1, $B1 -+ vpaddd $T1, $B2, $B2 -+ vpaddd $T2, $C0, $C0 -+ vpaddd $T2, $C1, $C1 -+ vpaddd $T2, $C2, $C2 -+ vpaddd $ctr0_store, $D0, $D0 -+ vpaddd $ctr1_store, $D1, $D1 -+ vpaddd $ctr2_store, $D2, $D2 -+ vperm2i128 \$0x02, $A0, $B0, $T0 -+ # Clamp and store the key -+ vpand .clamp(%rip), $T0, $T0 -+ vmovdqa $T0, $r_store -+ # Stream for up to 320 bytes -+ vperm2i128 \$0x13, $A0, $B0, $A0 -+ vperm2i128 \$0x13, $C0, $D0, $B0 -+ vperm2i128 \$0x02, $A1, $B1, $C0 -+ vperm2i128 \$0x02, $C1, $D1, $D0 -+ vperm2i128 \$0x13, $A1, $B1, $A1 -+ vperm2i128 \$0x13, $C1, $D1, $B1 -+ vperm2i128 \$0x02, $A2, $B2, $C1 -+ vperm2i128 \$0x02, $C2, $D2, $D1 -+ vperm2i128 \$0x13, $A2, $B2, $A2 -+ vperm2i128 \$0x13, $C2, $D2, $B2 -+ jmp seal_avx2_short -+################################################################################ -+seal_avx2_192: -+ vmovdqa $A0, $A1 -+ vmovdqa $A0, $A2 -+ vmovdqa $B0, $B1 -+ vmovdqa $B0, $B2 -+ vmovdqa $C0, $C1 -+ vmovdqa $C0, $C2 -+ vpaddd .avx2_inc(%rip), $D0, $D1 -+ vmovdqa $D0, $T2 -+ vmovdqa $D1, $T3 -+ mov \$10, $acc0 -+1: \n"; -+ &chacha_qr_avx2($A0,$B0,$C0,$D0,$T0,"left"); -+ &chacha_qr_avx2($A1,$B1,$C1,$D1,$T0,"left"); -+ &chacha_qr_avx2($A0,$B0,$C0,$D0,$T0,"right"); -+ &chacha_qr_avx2($A1,$B1,$C1,$D1,$T0,"right"); $code.=" -+ dec $acc0 -+ jne 1b -+ vpaddd $A2, $A0, $A0 -+ vpaddd $A2, $A1, $A1 -+ vpaddd $B2, $B0, $B0 -+ vpaddd $B2, $B1, $B1 -+ vpaddd $C2, $C0, $C0 -+ vpaddd $C2, $C1, $C1 -+ vpaddd $T2, $D0, $D0 -+ vpaddd $T3, $D1, $D1 -+ vperm2i128 \$0x02, $A0, $B0, $T0 -+ # Clamp and store the key -+ vpand .clamp(%rip), $T0, $T0 -+ vmovdqa $T0, $r_store -+ # Stream for up to 192 bytes -+ vperm2i128 \$0x13, $A0, $B0, $A0 -+ vperm2i128 \$0x13, $C0, $D0, $B0 -+ vperm2i128 \$0x02, $A1, $B1, $C0 -+ vperm2i128 \$0x02, $C1, $D1, $D0 -+ vperm2i128 \$0x13, $A1, $B1, $A1 -+ vperm2i128 \$0x13, $C1, $D1, $B1 -+seal_avx2_short: -+ mov %r8, $itr2 -+ call poly_hash_ad_internal -+ xor $itr1, $itr1 -+seal_avx2_hash: -+ cmp \$16, $itr1 -+ jb seal_avx2_short_loop\n"; -+ &poly_add("0($oup)"); -+ &poly_mul(); $code.=" -+ sub \$16, $itr1 -+ add \$16, $oup -+ jmp seal_avx2_hash -+seal_avx2_short_loop: -+ cmp \$32, $inl -+ jb seal_avx2_short_tail -+ sub \$32, $inl -+ # Encrypt -+ vpxor ($inp), $A0, $A0 -+ vmovdqu $A0, ($oup) -+ lea 1*32($inp), $inp -+ # Load + hash\n"; -+ &poly_add("0*8($oup)"); -+ &poly_mul(); -+ &poly_add("2*8($oup)"); -+ &poly_mul(); $code.=" -+ lea 1*32($oup), $oup -+ # Shift stream -+ vmovdqa $B0, $A0 -+ vmovdqa $C0, $B0 -+ vmovdqa $D0, $C0 -+ vmovdqa $A1, $D0 -+ vmovdqa $B1, $A1 -+ vmovdqa $C1, $B1 -+ vmovdqa $D1, $C1 -+ vmovdqa $A2, $D1 -+ vmovdqa $B2, $A2 -+ jmp seal_avx2_short_loop -+seal_avx2_short_tail: -+ cmp \$16, $inl -+ jb 1f -+ sub \$16, $inl -+ vpxor ($inp), $A0x, $A3x -+ vmovdqu $A3x, ($oup) -+ lea 1*16($inp), $inp\n"; -+ &poly_add("0*8($oup)"); -+ &poly_mul(); $code.=" -+ lea 1*16($oup), $oup -+ vextracti128 \$1, $A0, $A0x -+1: -+ vzeroupper -+ jmp seal_sse_tail_16 -+"; -+} -+ -+$code =~ s/\`([^\`]*)\`/eval $1/gem; -+print $code; -+close STDOUT; -diff --git a/crypto/chacha20_poly1305/asm/chacha20_x86_64.pl b/crypto/chacha20_poly1305/asm/chacha20_x86_64.pl -new file mode 100644 -index 0000000..538af42 ---- /dev/null -+++ b/crypto/chacha20_poly1305/asm/chacha20_x86_64.pl -@@ -0,0 +1,415 @@ -+#!/usr/bin/env perl -+ -+############################################################################## -+# # -+# Copyright 2014 Intel Corporation # -+# # -+# Licensed under the Apache License, Version 2.0 (the "License"); # -+# you may not use this file except in compliance with the License. # -+# You may obtain a copy of the License at # -+# # -+# http://www.apache.org/licenses/LICENSE-2.0 # -+# # -+# Unless required by applicable law or agreed to in writing, software # -+# distributed under the License is distributed on an "AS IS" BASIS, # -+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # -+# See the License for the specific language governing permissions and # -+# limitations under the License. # -+# # -+############################################################################## -+# # -+# Developers and authors: # -+# Shay Gueron (1, 2), and Vlad Krasnov (1) # -+# (1) Intel Corporation, Israel Development Center # -+# (2) University of Haifa # -+# # -+# Related work: # -+# M. Goll, S. Gueron, "Vectorization on ChaCha Stream Cipher", IEEE # -+# Proceedings of 11th International Conference on Information # -+# Technology: New Generations (ITNG 2014), 612-615 (2014). # -+# M. Goll, S. Gueron, "Vectorization on Poly1305 Message Authentication Code"# -+# to be published. # -+# A. Langley, chacha20poly1305 for the AEAD head # -+# https://git.openssl.org/gitweb/?p=openssl.git;a=commit;h=9a8646510b3d0a48e950748f7a2aaa12ed40d5e0 # -+############################################################################## -+ -+ -+$flavour = shift; -+$output = shift; -+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } -+ -+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); -+ -+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or -+die "can't locate x86_64-xlate.pl"; -+ -+open OUT,"| \"$^X\" $xlate $flavour $output"; -+*STDOUT=*OUT; -+ -+if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` -+ =~ /GNU assembler version ([2-9]\.[0-9]+)/) { -+ $avx = ($1>=2.19) + ($1>=2.22); -+} -+ -+if ($win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && -+ `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { -+ $avx = ($1>=2.09) + ($1>=2.10); -+} -+ -+if ($win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && -+ `ml64 2>&1` =~ /Version ([0-9]+)\./) { -+ $avx = ($1>=10) + ($1>=11); -+} -+ -+if (`$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9])\.([0-9]+)/) { -+ my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10 -+ $avx = ($ver>=3.0) + ($ver>=3.01); -+} -+ -+{ -+ -+my ($rol8, $rol16, $state_cdef, $tmp, -+ $v0, $v1, $v2, $v3, $v4, $v5, $v6, $v7, -+ $v8, $v9, $v10, $v11)=map("%xmm$_",(0..15)); -+ -+sub chacha_qr { -+ -+my ($a,$b,$c,$d)=@_; -+$code.=<<___; -+ paddd $b, $a # a += b -+ pxor $a, $d # d ^= a -+ pshufb $rol16, $d # d <<<= 16 -+ -+ paddd $d, $c # c += d -+ pxor $c, $b # b ^= c -+ -+ movdqa $b, $tmp -+ pslld \$12, $tmp -+ psrld \$20, $b -+ pxor $tmp, $b # b <<<= 12 -+ -+ paddd $b, $a # a += b -+ pxor $a, $d # d ^= a -+ pshufb $rol8, $d # d <<<= 8 -+ -+ paddd $d, $c # c += d -+ pxor $c, $b # b ^= c -+ -+ movdqa $b, $tmp -+ pslld \$7, $tmp -+ psrld \$25, $b -+ pxor $tmp, $b # b <<<= 7 -+___ -+ -+} -+ -+$code.=<<___; -+.text -+.align 16 -+chacha20_consts: -+.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' -+.rol8: -+.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 -+.rol16: -+.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 -+.avxInc: -+.quad 1,0 -+___ -+ -+{ -+my ($out, $in, $in_len, $key_ptr, $nr) -+ =("%rdi", "%rsi", "%rdx", "%rcx", "%r8"); -+ -+$code.=<<___; -+.globl chacha_20_core_asm -+.type chacha_20_core_asm ,\@function,2 -+.align 64 -+chacha_20_core_asm: -+ -+ # Init state -+ movdqa .rol8(%rip), $rol8 -+ movdqa .rol16(%rip), $rol16 -+ movdqu 2*16($key_ptr), $state_cdef -+ -+2: -+ cmp \$3*64, $in_len -+ jb 2f -+ -+ movdqa chacha20_consts(%rip), $v0 -+ movdqu 0*16($key_ptr), $v1 -+ movdqu 1*16($key_ptr), $v2 -+ movdqa $state_cdef, $v3 -+ movdqa $v0, $v4 -+ movdqa $v0, $v8 -+ movdqa $v1, $v5 -+ movdqa $v1, $v9 -+ movdqa $v2, $v6 -+ movdqa $v2, $v10 -+ movdqa $v3, $v7 -+ paddd .avxInc(%rip), $v7 -+ movdqa $v7, $v11 -+ paddd .avxInc(%rip), $v11 -+ -+ mov \$10, $nr -+ -+ 1: -+___ -+ &chacha_qr( $v0, $v1, $v2, $v3); -+ &chacha_qr( $v4, $v5, $v6, $v7); -+ &chacha_qr( $v8, $v9,$v10,$v11); -+$code.=<<___; -+ palignr \$4, $v1, $v1 -+ palignr \$8, $v2, $v2 -+ palignr \$12, $v3, $v3 -+ palignr \$4, $v5, $v5 -+ palignr \$8, $v6, $v6 -+ palignr \$12, $v7, $v7 -+ palignr \$4, $v9, $v9 -+ palignr \$8, $v10, $v10 -+ palignr \$12, $v11, $v11 -+___ -+ &chacha_qr( $v0, $v1, $v2, $v3); -+ &chacha_qr( $v4, $v5, $v6, $v7); -+ &chacha_qr( $v8, $v9,$v10,$v11); -+$code.=<<___; -+ palignr \$12, $v1, $v1 -+ palignr \$8, $v2, $v2 -+ palignr \$4, $v3, $v3 -+ palignr \$12, $v5, $v5 -+ palignr \$8, $v6, $v6 -+ palignr \$4, $v7, $v7 -+ palignr \$12, $v9, $v9 -+ palignr \$8, $v10, $v10 -+ palignr \$4, $v11, $v11 -+ dec $nr -+ -+ jnz 1b -+ paddd chacha20_consts(%rip), $v0 -+ paddd chacha20_consts(%rip), $v4 -+ paddd chacha20_consts(%rip), $v8 -+ -+ movdqu 16*0($key_ptr), $tmp -+ paddd $tmp, $v1 -+ paddd $tmp, $v5 -+ paddd $tmp, $v9 -+ -+ movdqu 16*1($key_ptr), $tmp -+ paddd $tmp, $v2 -+ paddd $tmp, $v6 -+ paddd $tmp, $v10 -+ -+ paddd $state_cdef, $v3 -+ paddq .avxInc(%rip), $state_cdef -+ paddd $state_cdef, $v7 -+ paddq .avxInc(%rip), $state_cdef -+ paddd $state_cdef, $v11 -+ paddq .avxInc(%rip), $state_cdef -+ -+ movdqu 16*0($in), $tmp -+ pxor $tmp, $v0 -+ movdqu 16*1($in), $tmp -+ pxor $tmp, $v1 -+ movdqu 16*2($in), $tmp -+ pxor $tmp, $v2 -+ movdqu 16*3($in), $tmp -+ pxor $tmp, $v3 -+ -+ movdqu $v0, 16*0($out) -+ movdqu $v1, 16*1($out) -+ movdqu $v2, 16*2($out) -+ movdqu $v3, 16*3($out) -+ -+ movdqu 16*4($in), $tmp -+ pxor $tmp, $v4 -+ movdqu 16*5($in), $tmp -+ pxor $tmp, $v5 -+ movdqu 16*6($in), $tmp -+ pxor $tmp, $v6 -+ movdqu 16*7($in), $tmp -+ pxor $tmp, $v7 -+ -+ movdqu $v4, 16*4($out) -+ movdqu $v5, 16*5($out) -+ movdqu $v6, 16*6($out) -+ movdqu $v7, 16*7($out) -+ -+ movdqu 16*8($in), $tmp -+ pxor $tmp, $v8 -+ movdqu 16*9($in), $tmp -+ pxor $tmp, $v9 -+ movdqu 16*10($in), $tmp -+ pxor $tmp, $v10 -+ movdqu 16*11($in), $tmp -+ pxor $tmp, $v11 -+ -+ movdqu $v8, 16*8($out) -+ movdqu $v9, 16*9($out) -+ movdqu $v10, 16*10($out) -+ movdqu $v11, 16*11($out) -+ -+ lea 16*12($in), $in -+ lea 16*12($out), $out -+ sub \$16*12, $in_len -+ -+ jmp 2b -+ -+2: -+ cmp \$2*64, $in_len -+ jb 2f -+ -+ movdqa chacha20_consts(%rip), $v0 -+ movdqa chacha20_consts(%rip), $v4 -+ movdqu 16*0($key_ptr), $v1 -+ movdqu 16*0($key_ptr), $v5 -+ movdqu 16*1($key_ptr), $v2 -+ movdqu 16*1($key_ptr), $v6 -+ movdqa $state_cdef, $v3 -+ movdqa $v3, $v7 -+ paddd .avxInc(%rip), $v7 -+ -+ mov \$10, $nr -+ 1: -+___ -+ &chacha_qr($v0,$v1,$v2,$v3); -+ &chacha_qr($v4,$v5,$v6,$v7); -+$code.=<<___; -+ palignr \$4, $v1, $v1 -+ palignr \$8, $v2, $v2 -+ palignr \$12, $v3, $v3 -+ palignr \$4, $v5, $v5 -+ palignr \$8, $v6, $v6 -+ palignr \$12, $v7, $v7 -+___ -+ &chacha_qr($v0,$v1,$v2,$v3); -+ &chacha_qr($v4,$v5,$v6,$v7); -+$code.=<<___; -+ palignr \$12, $v1, $v1 -+ palignr \$8, $v2, $v2 -+ palignr \$4, $v3, $v3 -+ palignr \$12, $v5, $v5 -+ palignr \$8, $v6, $v6 -+ palignr \$4, $v7, $v7 -+ dec $nr -+ jnz 1b -+ -+ paddd chacha20_consts(%rip), $v0 -+ paddd chacha20_consts(%rip), $v4 -+ -+ movdqu 16*0($key_ptr), $tmp -+ paddd $tmp, $v1 -+ paddd $tmp, $v5 -+ -+ movdqu 16*1($key_ptr), $tmp -+ paddd $tmp, $v2 -+ paddd $tmp, $v6 -+ -+ paddd $state_cdef, $v3 -+ paddq .avxInc(%rip), $state_cdef -+ paddd $state_cdef, $v7 -+ paddq .avxInc(%rip), $state_cdef -+ -+ movdqu 16*0($in), $tmp -+ pxor $tmp, $v0 -+ movdqu 16*1($in), $tmp -+ pxor $tmp, $v1 -+ movdqu 16*2($in), $tmp -+ pxor $tmp, $v2 -+ movdqu 16*3($in), $tmp -+ pxor $tmp, $v3 -+ -+ movdqu $v0, 16*0($out) -+ movdqu $v1, 16*1($out) -+ movdqu $v2, 16*2($out) -+ movdqu $v3, 16*3($out) -+ -+ movdqu 16*4($in), $tmp -+ pxor $tmp, $v4 -+ movdqu 16*5($in), $tmp -+ pxor $tmp, $v5 -+ movdqu 16*6($in), $tmp -+ pxor $tmp, $v6 -+ movdqu 16*7($in), $tmp -+ pxor $tmp, $v7 -+ -+ movdqu $v4, 16*4($out) -+ movdqu $v5, 16*5($out) -+ movdqu $v6, 16*6($out) -+ movdqu $v7, 16*7($out) -+ -+ lea 16*8($in), $in -+ lea 16*8($out), $out -+ sub \$16*8, $in_len -+ -+ jmp 2b -+2: -+ cmp \$64, $in_len -+ jb 2f -+ -+ movdqa chacha20_consts(%rip), $v0 -+ movdqu 16*0($key_ptr), $v1 -+ movdqu 16*1($key_ptr), $v2 -+ movdqa $state_cdef, $v3 -+ -+ mov \$10, $nr -+ -+ 1: -+___ -+ &chacha_qr($v0,$v1,$v2,$v3); -+$code.=<<___; -+ palignr \$4, $v1, $v1 -+ palignr \$8, $v2, $v2 -+ palignr \$12, $v3, $v3 -+___ -+ &chacha_qr($v0,$v1,$v2,$v3); -+$code.=<<___; -+ palignr \$12, $v1, $v1 -+ palignr \$8, $v2, $v2 -+ palignr \$4, $v3, $v3 -+ dec $nr -+ jnz 1b -+ -+ paddd chacha20_consts(%rip), $v0 -+ -+ movdqu 16*0($key_ptr), $tmp -+ paddd $tmp, $v1 -+ -+ movdqu 16*1($key_ptr), $tmp -+ paddd $tmp, $v2 -+ -+ paddd $state_cdef, $v3 -+ paddq .avxInc(%rip), $state_cdef -+ -+ movdqu 16*0($in), $tmp -+ pxor $tmp, $v0 -+ movdqu 16*1($in), $tmp -+ pxor $tmp, $v1 -+ movdqu 16*2($in), $tmp -+ pxor $tmp, $v2 -+ movdqu 16*3($in), $tmp -+ pxor $tmp, $v3 -+ -+ movdqu $v0, 16*0($out) -+ movdqu $v1, 16*1($out) -+ movdqu $v2, 16*2($out) -+ movdqu $v3, 16*3($out) -+ -+ lea 16*4($in), $in -+ lea 16*4($out), $out -+ sub \$16*4, $in_len -+ jmp 2b -+ -+2: -+ movdqu $state_cdef, 16*2($key_ptr) -+ ret -+.size chacha_20_core_asm,.-chacha_20_core_asm -+___ -+} -+} -+ -+$code =~ s/\`([^\`]*)\`/eval($1)/gem; -+ -+print $code; -+ -+close STDOUT; -diff --git a/crypto/chacha20_poly1305/asm/poly1305_x86_64.pl b/crypto/chacha20_poly1305/asm/poly1305_x86_64.pl -new file mode 100644 -index 0000000..05e4bc5 ---- /dev/null -+++ b/crypto/chacha20_poly1305/asm/poly1305_x86_64.pl -@@ -0,0 +1,280 @@ -+############################################################################## -+# # -+# Copyright 2016 CloudFlare LTD # -+# # -+# Licensed under the Apache License, Version 2.0 (the "License"); # -+# you may not use this file except in compliance with the License. # -+# You may obtain a copy of the License at # -+# # -+# http://www.apache.org/licenses/LICENSE-2.0 # -+# # -+# Unless required by applicable law or agreed to in writing, software # -+# distributed under the License is distributed on an "AS IS" BASIS, # -+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # -+# See the License for the specific language governing permissions and # -+# limitations under the License. # -+# # -+############################################################################## -+# # -+# Author: Vlad Krasnov # -+# # -+############################################################################## -+ -+$flavour = shift; -+$output = shift; -+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } -+ -+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); -+ -+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or -+die "can't locate x86_64-xlate.pl"; -+ -+open OUT,"| \"$^X\" $xlate $flavour $output"; -+*STDOUT=*OUT; -+ -+if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` -+ =~ /GNU assembler version ([2-9]\.[0-9]+)/) { -+ $avx = ($1>=2.19) + ($1>=2.22); -+} -+ -+if ($win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && -+ `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { -+ $avx = ($1>=2.09) + ($1>=2.10); -+} -+ -+if ($win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && -+ `ml64 2>&1` =~ /Version ([0-9]+)\./) { -+ $avx = ($1>=10) + ($1>=11); -+} -+ -+if (`$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9])\.([0-9]+)/) { -+ my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10 -+ $avx = ($ver>=3.0) + ($ver>=3.01); -+} -+ -+ -+{ -+{ -+ -+my ($state, $key) -+ =("%rdi", "%rsi"); -+ -+$code.=<<___; -+ -+.LrSet: -+.align 16 -+.quad 0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC -+############################################################################### -+# void poly1305_init_x64(void *state, uint8_t key[32]) -+ -+.globl poly1305_init_x64 -+.type poly1305_init_x64, \@function, 2 -+.align 64 -+poly1305_init_x64: -+ -+ xor %rax, %rax -+ mov %rax, 8*0($state) -+ mov %rax, 8*1($state) -+ mov %rax, 8*2($state) -+ -+ movdqu 16*0($key), %xmm0 -+ movdqu 16*1($key), %xmm1 -+ pand .LrSet(%rip), %xmm0 -+ -+ movdqu %xmm0, 8*3($state) -+ movdqu %xmm1, 8*3+16($state) -+ movq \$0, 8*7($state) -+ -+ ret -+.size poly1305_init_x64,.-poly1305_init_x64 -+___ -+} -+ -+{ -+ -+my ($state, $inp) -+ =("%rdi", "%rsi"); -+ -+my ($acc0, $acc1, $acc2, $inl, $t0, $t1, $t2, $t3, $r0) -+ =("%rcx", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"); -+ -+my ($r1) -+ =("8*4($state)"); -+ -+$code.=<<___; -+############################################################################### -+# void* poly1305_update_x64(void* state, void* in, uint64_t in_len) -+.globl poly1305_update_x64 -+.type poly1305_update_x64, \@function, 2 -+.align 64 -+poly1305_update_x64: -+ -+ push %r11 -+ push %r12 -+ push %r13 -+ push %r14 -+ push %r15 -+ -+ mov %rdx, $inl -+ -+ mov 8*0($state), $acc0 -+ mov 8*1($state), $acc1 -+ mov 8*2($state), $acc2 -+ mov 8*3($state), $r0 -+ -+ cmp \$16, $inl -+ jb 2f -+ jmp 1f -+ -+.align 64 -+1: -+############################ -+ add 8*0($inp), $acc0 -+ adc 8*1($inp), $acc1 -+ lea 16($inp), $inp -+ adc \$1, $acc2 -+ -+5: -+ mov $r0, %rax -+ mulq $acc0 -+ mov %rax, $t0 -+ mov %rdx, $t1 -+ -+ mov $r0, %rax -+ mulq $acc1 -+ add %rax, $t1 -+ adc \$0, %rdx -+ -+ mov $r0, $t2 -+ imul $acc2, $t2 -+ add %rdx, $t2 -+############################ -+ mov $r1, %rax -+ mulq $acc0 -+ add %rax, $t1 -+ adc \$0, %rdx -+ mov %rdx, $acc0 -+ -+ mov $r1, %rax -+ mulq $acc1 -+ add $acc0, $t2 -+ adc \$0, %rdx -+ add %rax, $t2 -+ adc \$0, %rdx -+ -+ mov $r1, $t3 -+ imul $acc2, $t3 -+ add %rdx, $t3 -+############################ -+ -+ mov $t0, $acc0 -+ mov $t1, $acc1 -+ mov $t2, $acc2 -+ and \$3, $acc2 -+ -+ mov $t2, $t0 -+ mov $t3, $t1 -+ -+ and \$-4, $t0 -+ shrd \$2, $t3, $t2 -+ shr \$2, $t3 -+ -+ add $t0, $acc0 -+ adc $t1, $acc1 -+ adc \$0, $acc2 -+ -+ add $t2, $acc0 -+ adc $t3, $acc1 -+ adc \$0, $acc2 -+ -+ sub \$16, $inl -+ cmp \$16, $inl -+ jae 1b -+ -+2: -+ test $inl, $inl -+ jz 3f -+ -+ mov \$1, $t0 -+ xor $t1, $t1 -+ xor $t2, $t2 -+ add $inl, $inp -+ -+4: -+ shld \$8, $t0, $t1 -+ shl \$8, $t0 -+ movzxb -1($inp), $t2 -+ xor $t2, $t0 -+ dec $inp -+ dec $inl -+ jnz 4b -+ -+ add $t0, $acc0 -+ adc $t1, $acc1 -+ adc \$0, $acc2 -+ -+ mov \$16, $inl -+ jmp 5b -+ -+3: -+ -+ mov $acc0, 8*0($state) -+ mov $acc1, 8*1($state) -+ mov $acc2, 8*2($state) -+ -+ pop %r15 -+ pop %r14 -+ pop %r13 -+ pop %r12 -+ pop %r11 -+ ret -+.size poly1305_update_x64, .-poly1305_update_x64 -+___ -+} -+ -+{ -+ -+my ($mac, $state)=("%rsi", "%rdi"); -+ -+my ($acc0, $acc1, $acc2, $t0, $t1, $t2) -+ =("%rcx", "%rax", "%rdx", "%r8", "%r9", "%r10"); -+ -+$code.=<<___; -+############################################################################### -+# void poly1305_finish_x64(void* state, uint64_t mac[2]); -+.type poly1305_finish_x64,\@function, 2 -+.align 64 -+.globl poly1305_finish_x64 -+poly1305_finish_x64: -+ -+ mov 8*0($state), $acc0 -+ mov 8*1($state), $acc1 -+ mov 8*2($state), $acc2 -+ -+ mov $acc0, $t0 -+ mov $acc1, $t1 -+ mov $acc2, $t2 -+ -+ sub \$-5, $acc0 -+ sbb \$-1, $acc1 -+ sbb \$3, $acc2 -+ -+ cmovc $t0, $acc0 -+ cmovc $t1, $acc1 -+ cmovc $t2, $acc2 -+ -+ add 8*5($state), $acc0 -+ adc 8*6($state), $acc1 -+ mov $acc0, ($mac) -+ mov $acc1, 8($mac) -+ -+ ret -+.size poly1305_finish_x64, .-poly1305_finish_x64 -+___ -+} -+} -+$code =~ s/\`([^\`]*)\`/eval($1)/gem; -+print $code; -+close STDOUT; -diff --git a/crypto/chacha20_poly1305/chacha20.c b/crypto/chacha20_poly1305/chacha20.c -new file mode 100644 -index 0000000..b48d857 ---- /dev/null -+++ b/crypto/chacha20_poly1305/chacha20.c -@@ -0,0 +1,142 @@ -+/* Copyright (c) 2014, Google Inc. -+ * -+ * Permission to use, copy, modify, and/or distribute this software for any -+ * purpose with or without fee is hereby granted, provided that the above -+ * copyright notice and this permission notice appear in all copies. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY -+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION -+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN -+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -+ -+/* Adapted from the public domain, estream code by D. Bernstein. */ -+ -+#include "chacha20poly1305.h" -+ -+/* sigma contains the ChaCha constants, which happen to be an ASCII string. */ -+static const char sigma[16] = "expand 32-byte k"; -+ -+#define ROTATE(v, n) (((v) << (n)) | ((v) >> (32 - (n)))) -+#define XOR(v, w) ((v) ^ (w)) -+#define PLUS(x, y) ((x) + (y)) -+#define PLUSONE(v) (PLUS((v), 1)) -+ -+#define U32TO8_LITTLE(p, v) \ -+ { \ -+ (p)[0] = (v >> 0) & 0xff; \ -+ (p)[1] = (v >> 8) & 0xff; \ -+ (p)[2] = (v >> 16) & 0xff; \ -+ (p)[3] = (v >> 24) & 0xff; \ -+ } -+ -+#define U8TO32_LITTLE(p) \ -+ (((uint32_t)((p)[0])) | ((uint32_t)((p)[1]) << 8) | \ -+ ((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24)) -+ -+/* QUARTERROUND updates a, b, c, d with a ChaCha "quarter" round. */ -+#define QUARTERROUND(a,b,c,d) \ -+ x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]),16); \ -+ x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]),12); \ -+ x[a] = PLUS(x[a],x[b]); x[d] = ROTATE(XOR(x[d],x[a]), 8); \ -+ x[c] = PLUS(x[c],x[d]); x[b] = ROTATE(XOR(x[b],x[c]), 7); -+ -+/* chacha_core performs |num_rounds| rounds of ChaCha20 on the input words in -+ * |input| and writes the 64 output bytes to |output|. */ -+static void chacha_core(uint8_t output[64], const uint32_t input[16]) { -+ uint32_t x[16]; -+ int i; -+ -+ memcpy(x, input, sizeof(uint32_t) * 16); -+ for (i = 20; i > 0; i -= 2) { -+ QUARTERROUND(0, 4, 8, 12) -+ QUARTERROUND(1, 5, 9, 13) -+ QUARTERROUND(2, 6, 10, 14) -+ QUARTERROUND(3, 7, 11, 15) -+ QUARTERROUND(0, 5, 10, 15) -+ QUARTERROUND(1, 6, 11, 12) -+ QUARTERROUND(2, 7, 8, 13) -+ QUARTERROUND(3, 4, 9, 14) -+ } -+ -+ for (i = 0; i < 16; ++i) { -+ x[i] = PLUS(x[i], input[i]); -+ } -+ for (i = 0; i < 16; ++i) { -+ U32TO8_LITTLE(output + 4 * i, x[i]); -+ } -+} -+ -+#if CHAPOLY_ASM -+void chacha_20_core_asm(uint8_t *out, const uint8_t *in, size_t in_len, -+ uint8_t nonce[48]); -+#endif -+ -+void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len, -+ uint8_t nonce[48]) { -+ -+ uint8_t buf[64]; -+ uint32_t input[16]; -+ size_t todo, i; -+ -+#ifdef CHAPOLY_ASM -+ chacha_20_core_asm(out, in, in_len, nonce); -+ todo = in_len & (63); -+ -+ if(todo) { -+ out += in_len - todo; -+ in += in_len - todo; -+ memcpy(buf, in, todo); -+ -+ chacha_20_core_asm(buf, buf, sizeof(buf), nonce); -+ -+ memcpy(out, buf, todo); -+ memset(buf, 0, sizeof(buf)); -+ } -+ return; -+#endif -+ -+ input[0] = U8TO32_LITTLE(sigma + 0); -+ input[1] = U8TO32_LITTLE(sigma + 4); -+ input[2] = U8TO32_LITTLE(sigma + 8); -+ input[3] = U8TO32_LITTLE(sigma + 12); -+ -+ input[4] = U8TO32_LITTLE(nonce + 0); -+ input[5] = U8TO32_LITTLE(nonce + 4); -+ input[6] = U8TO32_LITTLE(nonce + 8); -+ input[7] = U8TO32_LITTLE(nonce + 12); -+ -+ input[8] = U8TO32_LITTLE(nonce + 16); -+ input[9] = U8TO32_LITTLE(nonce + 20); -+ input[10] = U8TO32_LITTLE(nonce + 24); -+ input[11] = U8TO32_LITTLE(nonce + 28); -+ -+ input[12] = U8TO32_LITTLE(nonce + 32); -+ input[13] = U8TO32_LITTLE(nonce + 36); -+ input[14] = U8TO32_LITTLE(nonce + 40); -+ input[15] = U8TO32_LITTLE(nonce + 44); -+ -+ while (in_len > 0) { -+ todo = 64; -+ if (in_len < todo) { -+ todo = in_len; -+ } -+ -+ chacha_core(buf, input); -+ for (i = 0; i < todo; i++) { -+ out[i] = in[i] ^ buf[i]; -+ } -+ -+ out += todo; -+ in += todo; -+ in_len -= todo; -+ -+ ((uint64_t*)input)[6]++; -+ } -+ -+ U32TO8_LITTLE(nonce + 32, input[12]); -+ U32TO8_LITTLE(nonce + 36, input[13]); -+} -+ -diff --git a/crypto/chacha20_poly1305/chacha20poly1305.h b/crypto/chacha20_poly1305/chacha20poly1305.h -new file mode 100644 -index 0000000..3968c40 ---- /dev/null -+++ b/crypto/chacha20_poly1305/chacha20poly1305.h -@@ -0,0 +1,64 @@ -+/* Copyright (c) 2014, Google Inc. -+ * -+ * Permission to use, copy, modify, and/or distribute this software for any -+ * purpose with or without fee is hereby granted, provided that the above -+ * copyright notice and this permission notice appear in all copies. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY -+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION -+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN -+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -+ -+#ifndef OPENSSL_HEADER_POLY1305_H -+#define OPENSSL_HEADER_POLY1305_H -+ -+#include <stdint.h> -+#include <stddef.h> -+#include <string.h> -+#include "crypto.h" -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#define POLY1305_MAC_LEN (16) -+#define POLY1305_PAD_LEN (16) -+ -+typedef unsigned char poly1305_state[92]; -+ -+ -+/* CRYPTO_poly1305_init sets up |state| so that it can be used to calculate an -+ * authentication tag with the one-time key |key|. Note that |key| is a -+ * one-time key and therefore there is no `reset' method because that would -+ * enable several messages to be authenticated with the same key. */ -+void CRYPTO_poly1305_init(poly1305_state* state, const uint8_t key[32]); -+ -+/* CRYPTO_poly1305_update processes |in_len| bytes from |in|. It can be called -+ * zero or more times after poly1305_init. */ -+void CRYPTO_poly1305_update(poly1305_state* state, const uint8_t* in, -+ size_t in_len); -+ -+/* CRYPTO_poly1305_finish completes the poly1305 calculation and writes a 16 -+ * byte authentication tag to |mac|. */ -+void CRYPTO_poly1305_finish(poly1305_state* state, -+ uint8_t mac[POLY1305_MAC_LEN]); -+ -+/* CRYPTO_chacha_20 encrypts |in_len| bytes from |in| with the given key and -+ * nonce and writes the result to |out|, which may be equal to |in|. The -+ * initial block counter is specified by |counter|. */ -+void CRYPTO_chacha_20(uint8_t *out, const uint8_t *in, size_t in_len, -+ uint8_t nonce[48]); -+ -+#if CHAPOLY_ASM -+int chacha20_poly1305_open(uint8_t *pt, const uint8_t *ct, size_t len_in, uint8_t *ad, size_t len_ad, uint8_t *key); -+void chacha20_poly1305_seal(uint8_t *ct, const uint8_t *pt, size_t len_in, uint8_t *ad, size_t len_ad, uint8_t *key); -+#endif -+ -+#if defined(__cplusplus) -+} /* extern C */ -+#endif -+ -+#endif /* OPENSSL_HEADER_POLY1305_H */ -diff --git a/crypto/chacha20_poly1305/poly1305.c b/crypto/chacha20_poly1305/poly1305.c -new file mode 100644 -index 0000000..6bd553b ---- /dev/null -+++ b/crypto/chacha20_poly1305/poly1305.c -@@ -0,0 +1,355 @@ -+/* Copyright (c) 2014, Google Inc. -+ * -+ * Permission to use, copy, modify, and/or distribute this software for any -+ * purpose with or without fee is hereby granted, provided that the above -+ * copyright notice and this permission notice appear in all copies. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY -+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION -+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN -+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -+ -+/* This implementation of poly1305 is by Andrew Moon -+ * (https://github.com/floodyberry/poly1305-donna) and released as public -+ * domain. */ -+ -+#include "chacha20poly1305.h" -+ -+#include <string.h> -+#ifndef CHAPOLY_ASM -+ -+#if !defined(B_ENDIAN) -+/* We can assume little-endian. */ -+static uint32_t U8TO32_LE(const uint8_t *m) { -+ uint32_t r; -+ memcpy(&r, m, sizeof(r)); -+ return r; -+} -+ -+static void U32TO8_LE(uint8_t *m, uint32_t v) { memcpy(m, &v, sizeof(v)); } -+#else -+static uint32_t U8TO32_LE(const uint8_t *m) { -+ return (uint32_t)m[0] | (uint32_t)m[1] << 8 | (uint32_t)m[2] << 16 | -+ (uint32_t)m[3] << 24; -+} -+ -+static void U32TO8_LE(uint8_t *m, uint32_t v) { -+ m[0] = v; -+ m[1] = v >> 8; -+ m[2] = v >> 16; -+ m[3] = v >> 24; -+} -+#endif -+ -+static uint64_t mul32x32_64(uint32_t a, uint32_t b) { return (uint64_t)a * b; } -+ -+struct poly1305_state_st { -+ uint32_t r0, r1, r2, r3, r4; -+ uint32_t s1, s2, s3, s4; -+ uint32_t h0, h1, h2, h3, h4; -+ uint8_t buf[16]; -+ unsigned int buf_used; -+ uint8_t key[16]; -+}; -+ -+/* poly1305_blocks updates |state| given some amount of input data. This -+ * function may only be called with a |len| that is not a multiple of 16 at the -+ * end of the data. Otherwise the input must be buffered into 16 byte blocks. */ -+static void poly1305_update(struct poly1305_state_st *state, const uint8_t *in, -+ size_t len) { -+ uint32_t t0, t1, t2, t3; -+ uint64_t t[5]; -+ uint32_t b; -+ uint64_t c; -+ size_t j; -+ uint8_t mp[16]; -+ -+ if (len < 16) { -+ goto poly1305_donna_atmost15bytes; -+ } -+ -+poly1305_donna_16bytes: -+ t0 = U8TO32_LE(in); -+ t1 = U8TO32_LE(in + 4); -+ t2 = U8TO32_LE(in + 8); -+ t3 = U8TO32_LE(in + 12); -+ -+ in += 16; -+ len -= 16; -+ -+ state->h0 += t0 & 0x3ffffff; -+ state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; -+ state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; -+ state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; -+ state->h4 += (t3 >> 8) | (1 << 24); -+ -+poly1305_donna_mul: -+ t[0] = mul32x32_64(state->h0, state->r0) + mul32x32_64(state->h1, state->s4) + -+ mul32x32_64(state->h2, state->s3) + mul32x32_64(state->h3, state->s2) + -+ mul32x32_64(state->h4, state->s1); -+ t[1] = mul32x32_64(state->h0, state->r1) + mul32x32_64(state->h1, state->r0) + -+ mul32x32_64(state->h2, state->s4) + mul32x32_64(state->h3, state->s3) + -+ mul32x32_64(state->h4, state->s2); -+ t[2] = mul32x32_64(state->h0, state->r2) + mul32x32_64(state->h1, state->r1) + -+ mul32x32_64(state->h2, state->r0) + mul32x32_64(state->h3, state->s4) + -+ mul32x32_64(state->h4, state->s3); -+ t[3] = mul32x32_64(state->h0, state->r3) + mul32x32_64(state->h1, state->r2) + -+ mul32x32_64(state->h2, state->r1) + mul32x32_64(state->h3, state->r0) + -+ mul32x32_64(state->h4, state->s4); -+ t[4] = mul32x32_64(state->h0, state->r4) + mul32x32_64(state->h1, state->r3) + -+ mul32x32_64(state->h2, state->r2) + mul32x32_64(state->h3, state->r1) + -+ mul32x32_64(state->h4, state->r0); -+ -+ state->h0 = (uint32_t)t[0] & 0x3ffffff; -+ c = (t[0] >> 26); -+ t[1] += c; -+ state->h1 = (uint32_t)t[1] & 0x3ffffff; -+ b = (uint32_t)(t[1] >> 26); -+ t[2] += b; -+ state->h2 = (uint32_t)t[2] & 0x3ffffff; -+ b = (uint32_t)(t[2] >> 26); -+ t[3] += b; -+ state->h3 = (uint32_t)t[3] & 0x3ffffff; -+ b = (uint32_t)(t[3] >> 26); -+ t[4] += b; -+ state->h4 = (uint32_t)t[4] & 0x3ffffff; -+ b = (uint32_t)(t[4] >> 26); -+ state->h0 += b * 5; -+ -+ if (len >= 16) -+ goto poly1305_donna_16bytes; -+ -+/* final bytes */ -+poly1305_donna_atmost15bytes: -+ if (!len) -+ return; -+ -+ for (j = 0; j < len; j++) -+ mp[j] = in[j]; -+ mp[j++] = 1; -+ for (; j < 16; j++) -+ mp[j] = 0; -+ len = 0; -+ -+ t0 = U8TO32_LE(mp + 0); -+ t1 = U8TO32_LE(mp + 4); -+ t2 = U8TO32_LE(mp + 8); -+ t3 = U8TO32_LE(mp + 12); -+ -+ state->h0 += t0 & 0x3ffffff; -+ state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; -+ state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; -+ state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; -+ state->h4 += (t3 >> 8); -+ -+ goto poly1305_donna_mul; -+} -+ -+void CRYPTO_poly1305_init(poly1305_state *statep, const uint8_t key[32]) { -+ struct poly1305_state_st *state = (struct poly1305_state_st *)statep; -+ uint32_t t0, t1, t2, t3; -+ -+ t0 = U8TO32_LE(key + 0); -+ t1 = U8TO32_LE(key + 4); -+ t2 = U8TO32_LE(key + 8); -+ t3 = U8TO32_LE(key + 12); -+ -+ /* precompute multipliers */ -+ state->r0 = t0 & 0x3ffffff; -+ t0 >>= 26; -+ t0 |= t1 << 6; -+ state->r1 = t0 & 0x3ffff03; -+ t1 >>= 20; -+ t1 |= t2 << 12; -+ state->r2 = t1 & 0x3ffc0ff; -+ t2 >>= 14; -+ t2 |= t3 << 18; -+ state->r3 = t2 & 0x3f03fff; -+ t3 >>= 8; -+ state->r4 = t3 & 0x00fffff; -+ -+ state->s1 = state->r1 * 5; -+ state->s2 = state->r2 * 5; -+ state->s3 = state->r3 * 5; -+ state->s4 = state->r4 * 5; -+ -+ /* init state */ -+ state->h0 = 0; -+ state->h1 = 0; -+ state->h2 = 0; -+ state->h3 = 0; -+ state->h4 = 0; -+ -+ state->buf_used = 0; -+ memcpy(state->key, key + 16, sizeof(state->key)); -+} -+ -+void CRYPTO_poly1305_update(poly1305_state *statep, const uint8_t *in, -+ size_t in_len) { -+ unsigned int i; -+ struct poly1305_state_st *state = (struct poly1305_state_st *)statep; -+ -+ if (state->buf_used) { -+ unsigned int todo = 16 - state->buf_used; -+ if (todo > in_len) -+ todo = in_len; -+ for (i = 0; i < todo; i++) -+ state->buf[state->buf_used + i] = in[i]; -+ state->buf_used += todo; -+ in_len -= todo; -+ in += todo; -+ -+ if (state->buf_used == 16) { -+ poly1305_update(state, state->buf, 16); -+ state->buf_used = 0; -+ } -+ } -+ -+ if (in_len >= 16) { -+ size_t todo = in_len & ~0xf; -+ poly1305_update(state, in, todo); -+ in += todo; -+ in_len &= 0xf; -+ } -+ -+ if (in_len) { -+ for (i = 0; i < in_len; i++) -+ state->buf[i] = in[i]; -+ state->buf_used = in_len; -+ } -+} -+ -+void CRYPTO_poly1305_finish(poly1305_state *statep, uint8_t mac[16]) { -+ struct poly1305_state_st *state = (struct poly1305_state_st *)statep; -+ -+ uint64_t f0, f1, f2, f3; -+ uint32_t g0, g1, g2, g3, g4; -+ uint32_t b, nb; -+ -+ if (state->buf_used) -+ poly1305_update(state, state->buf, state->buf_used); -+ -+ b = state->h0 >> 26; -+ state->h0 = state->h0 & 0x3ffffff; -+ state->h1 += b; -+ b = state->h1 >> 26; -+ state->h1 = state->h1 & 0x3ffffff; -+ state->h2 += b; -+ b = state->h2 >> 26; -+ state->h2 = state->h2 & 0x3ffffff; -+ state->h3 += b; -+ b = state->h3 >> 26; -+ state->h3 = state->h3 & 0x3ffffff; -+ state->h4 += b; -+ b = state->h4 >> 26; -+ state->h4 = state->h4 & 0x3ffffff; -+ state->h0 += b * 5; -+ -+ g0 = state->h0 + 5; -+ b = g0 >> 26; -+ g0 &= 0x3ffffff; -+ g1 = state->h1 + b; -+ b = g1 >> 26; -+ g1 &= 0x3ffffff; -+ g2 = state->h2 + b; -+ b = g2 >> 26; -+ g2 &= 0x3ffffff; -+ g3 = state->h3 + b; -+ b = g3 >> 26; -+ g3 &= 0x3ffffff; -+ g4 = state->h4 + b - (1 << 26); -+ -+ b = (g4 >> 31) - 1; -+ nb = ~b; -+ state->h0 = (state->h0 & nb) | (g0 & b); -+ state->h1 = (state->h1 & nb) | (g1 & b); -+ state->h2 = (state->h2 & nb) | (g2 & b); -+ state->h3 = (state->h3 & nb) | (g3 & b); -+ state->h4 = (state->h4 & nb) | (g4 & b); -+ -+ f0 = ((state->h0) | (state->h1 << 26)) + (uint64_t)U8TO32_LE(&state->key[0]); -+ f1 = ((state->h1 >> 6) | (state->h2 << 20)) + -+ (uint64_t)U8TO32_LE(&state->key[4]); -+ f2 = ((state->h2 >> 12) | (state->h3 << 14)) + -+ (uint64_t)U8TO32_LE(&state->key[8]); -+ f3 = ((state->h3 >> 18) | (state->h4 << 8)) + -+ (uint64_t)U8TO32_LE(&state->key[12]); -+ -+ U32TO8_LE(&mac[0], f0); -+ f1 += (f0 >> 32); -+ U32TO8_LE(&mac[4], f1); -+ f2 += (f1 >> 32); -+ U32TO8_LE(&mac[8], f2); -+ f3 += (f2 >> 32); -+ U32TO8_LE(&mac[12], f3); -+} -+ -+#else -+ -+struct poly1305_state_st { -+ uint8_t opaque[8*8]; -+ uint8_t buf[16]; -+ unsigned int buf_used; -+}; -+ -+void poly1305_init_x64(struct poly1305_state_st* state, const uint8_t key[32]); -+void poly1305_update_x64(struct poly1305_state_st* state, const uint8_t *in, size_t in_len); -+void poly1305_finish_x64(struct poly1305_state_st* state, uint8_t mac[16]); -+ -+#define poly1305_update poly1305_update_x64 -+ -+void CRYPTO_poly1305_init(poly1305_state *statep, const uint8_t key[32]) { -+ struct poly1305_state_st *state = (struct poly1305_state_st *)statep; -+ state->buf_used = 0; -+ return poly1305_init_x64(state, key); -+} -+ -+void CRYPTO_poly1305_update(poly1305_state *statep, const uint8_t *in, -+ size_t in_len) { -+ struct poly1305_state_st *state = (struct poly1305_state_st *)statep; -+ int todo; -+ /* Attempt to fill as many bytes as possible before calling the update -+ function */ -+ if (in_len < 16 || state->buf_used) { -+ todo = 16 - state->buf_used; -+ todo = in_len < todo ? in_len : todo; -+ memcpy(state->buf + state->buf_used, in, todo); -+ state->buf_used += todo; -+ in += todo; -+ in_len -= todo; -+ -+ if (state->buf_used == 16) { -+ poly1305_update_x64(state, state->buf, 16); -+ state->buf_used = 0; -+ } -+ } -+ -+ if (in_len >= 16) { -+ poly1305_update_x64(state, in, in_len & (-16)); -+ in += in_len & (-16); -+ in_len &= (15); -+ } -+ -+ if (in_len) { -+ memcpy(state->buf, in, in_len); -+ state->buf_used = in_len; -+ } -+} -+ -+void CRYPTO_poly1305_finish(poly1305_state *statep, uint8_t mac[16]) { -+ struct poly1305_state_st *state = (struct poly1305_state_st *)statep; -+ -+ if (state->buf_used) { -+ if (state->buf_used % POLY1305_PAD_LEN) { -+ memset(state->buf + state->buf_used, 0, -+ POLY1305_PAD_LEN - (state->buf_used % POLY1305_PAD_LEN)); -+ } -+ poly1305_update_x64(state, state->buf, state->buf_used); -+ } -+ -+ poly1305_finish_x64(state, mac); -+} -+#endif -diff --git a/crypto/evp/Makefile b/crypto/evp/Makefile -index fa138d0..c87896b 100644 ---- a/crypto/evp/Makefile -+++ b/crypto/evp/Makefile -@@ -29,7 +29,8 @@ LIBSRC= encode.c digest.c evp_enc.c evp_key.c evp_acnf.c evp_cnf.c \ - c_all.c c_allc.c c_alld.c evp_lib.c bio_ok.c \ - evp_pkey.c evp_pbe.c p5_crpt.c p5_crpt2.c \ - e_old.c pmeth_lib.c pmeth_fn.c pmeth_gn.c m_sigver.c \ -- e_aes_cbc_hmac_sha1.c e_aes_cbc_hmac_sha256.c e_rc4_hmac_md5.c -+ e_aes_cbc_hmac_sha1.c e_aes_cbc_hmac_sha256.c e_rc4_hmac_md5.c \ -+ e_chacha20_poly1305.c - - LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o evp_cnf.o \ - e_des.o e_bf.o e_idea.o e_des3.o e_camellia.o\ -@@ -42,7 +43,8 @@ LIBOBJ= encode.o digest.o evp_enc.o evp_key.o evp_acnf.o evp_cnf.o \ - c_all.o c_allc.o c_alld.o evp_lib.o bio_ok.o \ - evp_pkey.o evp_pbe.o p5_crpt.o p5_crpt2.o \ - e_old.o pmeth_lib.o pmeth_fn.o pmeth_gn.o m_sigver.o \ -- e_aes_cbc_hmac_sha1.o e_aes_cbc_hmac_sha256.o e_rc4_hmac_md5.o -+ e_aes_cbc_hmac_sha1.o e_aes_cbc_hmac_sha256.o e_rc4_hmac_md5.o \ -+ e_chacha20_poly1305.o - - SRC= $(LIBSRC) - -@@ -793,3 +795,5 @@ pmeth_lib.o: ../../include/openssl/sha.h ../../include/openssl/stack.h - pmeth_lib.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h - pmeth_lib.o: ../../include/openssl/x509_vfy.h ../asn1/asn1_locl.h ../cryptlib.h - pmeth_lib.o: evp_locl.h pmeth_lib.c -+e_chacha20_poly1305.o: ../../include/openssl/chacha20poly1305.h -+e_chacha20_poly1305.o: e_chacha20_poly1305.c -diff --git a/crypto/evp/c_allc.c b/crypto/evp/c_allc.c -index 280e584..694f168 100644 ---- a/crypto/evp/c_allc.c -+++ b/crypto/evp/c_allc.c -@@ -238,4 +238,9 @@ void OpenSSL_add_all_ciphers(void) - EVP_add_cipher_alias(SN_camellia_256_cbc, "CAMELLIA256"); - EVP_add_cipher_alias(SN_camellia_256_cbc, "camellia256"); - #endif -+ -+#ifndef OPENSSL_NO_CHACHA_POLY -+ EVP_add_cipher(EVP_chacha20_poly1305()); -+ EVP_add_cipher(EVP_chacha20_poly1305_draft()); -+#endif - } -diff --git a/crypto/evp/e_chacha20_poly1305.c b/crypto/evp/e_chacha20_poly1305.c -new file mode 100644 -index 0000000..1e072ec ---- /dev/null -+++ b/crypto/evp/e_chacha20_poly1305.c -@@ -0,0 +1,362 @@ -+/* ==================================================================== -+ * Copyright (c) 2001-2014 The OpenSSL Project. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in -+ * the documentation and/or other materials provided with the -+ * distribution. -+ * -+ * 3. All advertising materials mentioning features or use of this -+ * software must display the following acknowledgment: -+ * "This product includes software developed by the OpenSSL Project -+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" -+ * -+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to -+ * endorse or promote products derived from this software without -+ * prior written permission. For written permission, please contact -+ * openssl-core@openssl.org. -+ * -+ * 5. Products derived from this software may not be called "OpenSSL" -+ * nor may "OpenSSL" appear in their names without prior written -+ * permission of the OpenSSL Project. -+ * -+ * 6. Redistributions of any form whatsoever must retain the following -+ * acknowledgment: -+ * "This product includes software developed by the OpenSSL Project -+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)" -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY -+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR -+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -+ * OF THE POSSIBILITY OF SUCH DAMAGE. -+ * ==================================================================== -+ * -+ */ -+ -+#include <openssl/opensslconf.h> -+#ifndef OPENSSL_NO_CHACHA_POLY -+# include <openssl/evp.h> -+# include <openssl/chacha20poly1305.h> -+ -+#define FILL_BUFFER ((size_t)128) -+ -+typedef struct { -+ uint8_t iv[12]; -+ uint8_t nonce[48]; -+ size_t aad_l; -+ size_t ct_l; -+ unsigned valid:1; -+ unsigned draft:1; -+ uint8_t poly_buffer[FILL_BUFFER]; -+ uint8_t chacha_buffer[FILL_BUFFER]; -+ uint16_t poly_buffer_used; -+ uint16_t chacha_used; -+ poly1305_state poly_state; -+ #define poly_finish(c,m) CRYPTO_poly1305_finish(&c->poly_state,m) -+} EVP_CHACHA20_POLY1305_CTX; -+ -+static int EVP_chacha20_poly1305_init_draft(EVP_CIPHER_CTX *ctx, -+ const unsigned char *key, -+ const unsigned char *iv, -+ int enc) -+{ -+ EVP_CHACHA20_POLY1305_CTX *aead_ctx = ctx->cipher_data; -+ memcpy(aead_ctx->nonce, key, 32); -+ aead_ctx->valid = 0; -+ aead_ctx->draft = 1; -+ return 1; -+} -+ -+static int EVP_chacha20_poly1305_init(EVP_CIPHER_CTX *ctx, -+ const unsigned char *key, -+ const unsigned char *iv, -+ int enc) -+{ -+ EVP_CHACHA20_POLY1305_CTX *aead_ctx = ctx->cipher_data; -+ memcpy(aead_ctx->nonce, key, 32); -+ memcpy(aead_ctx->iv, iv, 12); -+ aead_ctx->valid = 0; -+ aead_ctx->draft = 0; -+ return 1; -+} -+ -+static int EVP_chacha20_poly1305_cipher(EVP_CIPHER_CTX *ctx, -+ unsigned char *out, -+ const unsigned char *in, -+ size_t inl) -+{ -+ EVP_CHACHA20_POLY1305_CTX *aead_ctx = ctx->cipher_data; -+ uint8_t poly_mac[POLY1305_MAC_LEN]; -+ uint8_t zero[POLY1305_PAD_LEN] = {0}; -+ uint64_t cmp; -+ int i, todo; -+ -+ if (!aead_ctx->valid) -+ return 0; -+ -+ if (inl < POLY1305_MAC_LEN) -+ return -1; -+ -+ /* Fix for MAC */ -+ inl -= POLY1305_MAC_LEN; -+ -+#if (CHAPOLY_ASM) -+ if (!aead_ctx->draft) { -+ aead_ctx->valid = 0; -+ if (ctx->encrypt) { -+ chacha20_poly1305_seal(out, in, inl, -+ aead_ctx->poly_buffer, -+ aead_ctx->poly_buffer_used, -+ aead_ctx->nonce); -+ } else { -+ int cmp = chacha20_poly1305_open(out, in, inl, -+ aead_ctx->poly_buffer, -+ aead_ctx->poly_buffer_used, -+ aead_ctx->nonce); -+ if (!cmp) { -+ OPENSSL_cleanse(out, inl); -+ return -1; -+ } -+ } -+ return inl; -+ } -+#endif -+ -+ if (!ctx->encrypt) { -+ CRYPTO_poly1305_update(&aead_ctx->poly_state, in, inl); -+ } -+ -+ i = 0; -+ if (inl < 256) { -+ /* Consume the buffer we computed during poly initialization */ -+ todo = inl > (FILL_BUFFER - aead_ctx->chacha_used) ? -+ FILL_BUFFER - aead_ctx->chacha_used : -+ inl; -+ -+ for (; i < todo; i++) { -+ out[i] = in[i] ^ aead_ctx->chacha_buffer[i + 64 /*aead_ctx->chacha_used*/]; -+ } -+ -+ } else { -+ /* For long messages don't use precomputed buffer */ -+ ((uint64_t *)(aead_ctx->nonce))[4]--; -+ } -+ -+ todo = inl - i; -+ -+ if (todo) { -+ CRYPTO_chacha_20(&out[i], &in[i], todo, aead_ctx->nonce); -+ } -+ -+ if (ctx->encrypt) { -+ CRYPTO_poly1305_update(&aead_ctx->poly_state, out, inl); -+ } -+ -+ aead_ctx->ct_l += inl; -+ -+ if (!aead_ctx->draft) { -+ /* For RFC padd ciphertext with zeroes, then mac len(aad)||len(ct) */ -+ todo = aead_ctx->ct_l % POLY1305_PAD_LEN ? -+ POLY1305_PAD_LEN - (aead_ctx->ct_l % POLY1305_PAD_LEN) : -+ 0; -+ -+ if (todo) { -+ CRYPTO_poly1305_update(&aead_ctx->poly_state, zero, todo); -+ } -+ -+ CRYPTO_poly1305_update(&aead_ctx->poly_state, (uint8_t*)&aead_ctx->aad_l, 8); -+ CRYPTO_poly1305_update(&aead_ctx->poly_state, (uint8_t*)&aead_ctx->ct_l, 8); -+ -+ } else { -+ /* For the draft don't pad, mac len(ct) */ -+ CRYPTO_poly1305_update(&aead_ctx->poly_state, (uint8_t*)&aead_ctx->ct_l, 8); -+ } -+ aead_ctx->valid = 0; -+ -+ if (ctx->encrypt) { -+ poly_finish(aead_ctx, &out[inl]); -+ return inl + POLY1305_MAC_LEN; -+ -+ } else { /* Decryption */ -+ poly_finish(aead_ctx, poly_mac); -+ /* Constant time comparison */ -+ cmp = (*(uint64_t *)(poly_mac)) ^ (*(uint64_t *)(in + inl)); -+ cmp |= (*(uint64_t *)(poly_mac + 8)) ^ (*(uint64_t *)(in + inl + 8)); -+ -+ if (cmp) { -+ OPENSSL_cleanse(out, inl); -+ return -1; -+ } -+ -+ return inl; -+ } -+} -+ -+ -+static int EVP_chacha20_poly1305_cleanup(EVP_CIPHER_CTX *ctx) -+{ -+ return 1; -+} -+ -+ -+static int EVP_chacha20_poly1305_ctrl(EVP_CIPHER_CTX *ctx, -+ int type, -+ int arg, -+ void *ptr) -+{ -+ EVP_CHACHA20_POLY1305_CTX *aead_ctx = ctx->cipher_data; -+ uint8_t aad[EVP_AEAD_TLS1_AAD_LEN + 8]; -+ uint64_t thirteen = EVP_AEAD_TLS1_AAD_LEN; -+ -+ switch (type) { -+ case EVP_CTRL_AEAD_TLS1_AAD: -+ -+ /* Initialize poly keys */ -+ memset(aead_ctx->chacha_buffer, 0, FILL_BUFFER); -+ -+ if (!aead_ctx->draft) { -+ /* RFC IV = (0 || iv) ^ seq_num */ -+ memset(aead_ctx->nonce + 32, 0, 4); -+ memcpy(aead_ctx->nonce + 36, aead_ctx->iv, 12); -+ *(uint64_t *)(aead_ctx->nonce + 40) ^= *(uint64_t *)(ptr); -+ -+ } else { -+ /* draft IV = 0 || seq_num */ -+ memset(aead_ctx->nonce + 32, 0, 8); -+ memcpy(aead_ctx->nonce + 40, ptr, 8); -+ } -+ -+#if (CHAPOLY_ASM) -+ if (!aead_ctx->draft) { -+ if (arg == EVP_AEAD_TLS1_AAD_LEN) { -+ /* For RFC, use optimized seal/open */ -+ memcpy(aad, ptr, arg); -+ unsigned int len = (aad[arg-2] << 8) | aad[arg-1]; -+ if (!ctx->encrypt) { -+ len -= POLY1305_MAC_LEN; -+ aad[arg-2] = len>>8; -+ aad[arg-1] = len & 0xff; -+ } -+ memcpy(aead_ctx->poly_buffer, aad, arg); -+ } else if (arg <= FILL_BUFFER) { -+ memcpy(aead_ctx->poly_buffer, ptr, arg); -+ } else { -+ aead_ctx->valid = 0; -+ return 0; -+ } -+ aead_ctx->valid = 1; -+ aead_ctx->poly_buffer_used = arg; -+ return POLY1305_MAC_LEN; -+ } -+#endif -+ /* Poly keys = ENC(0) */ -+ CRYPTO_chacha_20(aead_ctx->chacha_buffer, -+ aead_ctx->chacha_buffer, -+ FILL_BUFFER, -+ aead_ctx->nonce); -+ -+ CRYPTO_poly1305_init(&aead_ctx->poly_state, aead_ctx->chacha_buffer); -+ -+ aead_ctx->chacha_used = 64; -+ aead_ctx->poly_buffer_used = 0; -+ aead_ctx->aad_l = arg; -+ aead_ctx->ct_l = 0; -+ -+ /* Absorb AAD */ -+ memcpy(aad, ptr, arg); -+ memset(aad + arg, 0, sizeof(aad) - arg); -+ -+ /* If decrypting fix length for tag */ -+ if (!ctx->encrypt) { -+ unsigned int len = (aad[arg-2] << 8) | aad[arg-1]; -+ len -= POLY1305_MAC_LEN; -+ aad[arg-2] = len>>8; -+ aad[arg-1] = len & 0xff; -+ } -+ -+ if (!aead_ctx->draft) { -+ /* In the RFC, AAD is padded with zeroes */ -+ CRYPTO_poly1305_update(&aead_ctx->poly_state, aad, POLY1305_PAD_LEN); -+ -+ } else { -+ /* In the draft AAD is followed by len(AAD) */ -+ memcpy(&aad[arg], &thirteen, sizeof(thirteen)); -+ CRYPTO_poly1305_update(&aead_ctx->poly_state, aad, arg + sizeof(thirteen)); -+ } -+ -+ aead_ctx->valid = 1; -+ return POLY1305_MAC_LEN; -+ -+ break; -+ -+ default: -+ return 0; -+ break; -+ } -+ -+ return 0; -+} -+ -+ -+#define CUSTOM_FLAGS (\ -+ EVP_CIPH_CUSTOM_IV | EVP_CIPH_FLAG_CUSTOM_CIPHER \ -+ | EVP_CIPH_ALWAYS_CALL_INIT \ -+ | EVP_CIPH_CUSTOM_COPY) -+ -+ -+static const EVP_CIPHER chacha20_poly1305_d = { -+ NID_chacha20_poly1305_draft, -+ 1, /* block size, sorta */ -+ 32, /* key len */ -+ 0, /* iv len */ -+ CUSTOM_FLAGS|EVP_CIPH_FLAG_AEAD_CIPHER, /* flags */ -+ EVP_chacha20_poly1305_init_draft, -+ EVP_chacha20_poly1305_cipher, -+ EVP_chacha20_poly1305_cleanup, -+ sizeof(EVP_CHACHA20_POLY1305_CTX), /* ctx size */ -+ NULL, -+ NULL, -+ EVP_chacha20_poly1305_ctrl, -+ NULL -+ }; -+ -+ -+static const EVP_CIPHER chacha20_poly1305 = { -+ NID_chacha20_poly1305, -+ 1, /* block size, sorta */ -+ 32, /* key len */ -+ 12, /* iv len */ -+ CUSTOM_FLAGS|EVP_CIPH_FLAG_AEAD_CIPHER, /* flags */ -+ EVP_chacha20_poly1305_init, -+ EVP_chacha20_poly1305_cipher, -+ EVP_chacha20_poly1305_cleanup, -+ sizeof(EVP_CHACHA20_POLY1305_CTX), /* ctx size */ -+ NULL, -+ NULL, -+ EVP_chacha20_poly1305_ctrl, -+ NULL -+ }; -+ -+ -+const EVP_CIPHER *EVP_chacha20_poly1305_draft(void) -+{ return &chacha20_poly1305_d; } -+ -+ -+const EVP_CIPHER *EVP_chacha20_poly1305(void) -+{ return &chacha20_poly1305; } -+#endif -diff --git a/crypto/evp/evp.h b/crypto/evp/evp.h -index 39ab793..8feaabc 100644 ---- a/crypto/evp/evp.h -+++ b/crypto/evp/evp.h -@@ -902,6 +902,11 @@ const EVP_CIPHER *EVP_seed_cfb128(void); - const EVP_CIPHER *EVP_seed_ofb(void); - # endif - -+# ifndef OPENSSL_NO_CHACHA_POLY -+const EVP_CIPHER *EVP_chacha20_poly1305(void); -+const EVP_CIPHER *EVP_chacha20_poly1305_draft(void); -+# endif -+ - void OPENSSL_add_all_algorithms_noconf(void); - void OPENSSL_add_all_algorithms_conf(void); - -diff --git a/crypto/objects/obj_dat.h b/crypto/objects/obj_dat.h -index b7e3cf2..26612e2 100644 ---- a/crypto/objects/obj_dat.h -+++ b/crypto/objects/obj_dat.h -@@ -62,9 +62,9 @@ - * [including the GNU Public Licence.] - */ - --#define NUM_NID 958 --#define NUM_SN 951 --#define NUM_LN 951 -+#define NUM_NID 960 -+#define NUM_SN 953 -+#define NUM_LN 953 - #define NUM_OBJ 890 - - static const unsigned char lvalues[6255]={ -@@ -2514,6 +2514,9 @@ static const ASN1_OBJECT nid_objs[NUM_NID]={ - NID_jurisdictionStateOrProvinceName,11,&(lvalues[6232]),0}, - {"jurisdictionC","jurisdictionCountryName", - NID_jurisdictionCountryName,11,&(lvalues[6243]),0}, -+{"CHACHA20-POLY1305","chacha20-poly1305",NID_chacha20_poly1305,0,NULL,0}, -+{"CHACHA20-POLY1305-D","chacha20-poly1305-draft", -+ NID_chacha20_poly1305_draft,0,NULL,0}, - }; - - static const unsigned int sn_objs[NUM_SN]={ -@@ -2574,6 +2577,8 @@ static const unsigned int sn_objs[NUM_SN]={ - 110, /* "CAST5-CFB" */ - 109, /* "CAST5-ECB" */ - 111, /* "CAST5-OFB" */ -+958, /* "CHACHA20-POLY1305" */ -+959, /* "CHACHA20-POLY1305-D" */ - 894, /* "CMAC" */ - 13, /* "CN" */ - 141, /* "CRLReason" */ -@@ -3728,6 +3733,8 @@ static const unsigned int ln_objs[NUM_LN]={ - 677, /* "certicom-arc" */ - 517, /* "certificate extensions" */ - 883, /* "certificateRevocationList" */ -+958, /* "chacha20-poly1305" */ -+959, /* "chacha20-poly1305-draft" */ - 54, /* "challengePassword" */ - 407, /* "characteristic-two-field" */ - 395, /* "clearance" */ -diff --git a/crypto/objects/obj_mac.h b/crypto/objects/obj_mac.h -index 779c309..35a2364 100644 ---- a/crypto/objects/obj_mac.h -+++ b/crypto/objects/obj_mac.h -@@ -4047,6 +4047,14 @@ - #define LN_aes_256_cbc_hmac_sha256 "aes-256-cbc-hmac-sha256" - #define NID_aes_256_cbc_hmac_sha256 950 - -+#define SN_chacha20_poly1305 "CHACHA20-POLY1305" -+#define LN_chacha20_poly1305 "chacha20-poly1305" -+#define NID_chacha20_poly1305 958 -+ -+#define SN_chacha20_poly1305_draft "CHACHA20-POLY1305-D" -+#define LN_chacha20_poly1305_draft "chacha20-poly1305-draft" -+#define NID_chacha20_poly1305_draft 959 -+ - #define SN_dhpublicnumber "dhpublicnumber" - #define LN_dhpublicnumber "X9.42 DH" - #define NID_dhpublicnumber 920 -diff --git a/crypto/objects/obj_mac.num b/crypto/objects/obj_mac.num -index 8e5ea83..a3da329 100644 ---- a/crypto/objects/obj_mac.num -+++ b/crypto/objects/obj_mac.num -@@ -955,3 +955,5 @@ ct_cert_scts 954 - jurisdictionLocalityName 955 - jurisdictionStateOrProvinceName 956 - jurisdictionCountryName 957 -+chacha20_poly1305 958 -+chacha20_poly1305_draft 959 -diff --git a/crypto/objects/objects.txt b/crypto/objects/objects.txt -index b57aabb..6a34a33 100644 ---- a/crypto/objects/objects.txt -+++ b/crypto/objects/objects.txt -@@ -1294,6 +1294,8 @@ kisa 1 6 : SEED-OFB : seed-ofb - : AES-128-CBC-HMAC-SHA256 : aes-128-cbc-hmac-sha256 - : AES-192-CBC-HMAC-SHA256 : aes-192-cbc-hmac-sha256 - : AES-256-CBC-HMAC-SHA256 : aes-256-cbc-hmac-sha256 -+ : CHACHA20-POLY1305 : chacha20-poly1305 -+ : CHACHA20-POLY1305-D : chacha20-poly1305-draft - - ISO-US 10046 2 1 : dhpublicnumber : X9.42 DH - -diff --git a/ssl/s3_lib.c b/ssl/s3_lib.c -index 0385e03..65fdc59 100644 ---- a/ssl/s3_lib.c -+++ b/ssl/s3_lib.c -@@ -2945,6 +2945,110 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { - 256}, - #endif - -+#if !defined(OPENSSL_NO_CHACHA_POLY) -+/* Draft ciphers */ -+ { -+ 1, -+ TLS1_TXT_ECDHE_RSA_WITH_CHACHA20_POLY1305_D, -+ TLS1_CK_ECDHE_RSA_WITH_CHACHA20_POLY1305_D, -+ SSL_kEECDH, -+ SSL_aRSA, -+ SSL_CHACHA20POLY1305_D, -+ SSL_AEAD, -+ SSL_TLSV1_2, -+ SSL_HIGH, -+ SSL_HANDSHAKE_MAC_SHA256 | TLS1_PRF_SHA256, -+ 256, -+ 256, -+ }, -+ -+ { -+ 1, -+ TLS1_TXT_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_D, -+ TLS1_CK_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_D, -+ SSL_kEECDH, -+ SSL_aECDSA, -+ SSL_CHACHA20POLY1305_D, -+ SSL_AEAD, -+ SSL_TLSV1_2, -+ SSL_HIGH, -+ SSL_HANDSHAKE_MAC_SHA256 | TLS1_PRF_SHA256, -+ 256, -+ 256, -+ }, -+ -+ { -+ 1, -+ TLS1_TXT_DHE_RSA_WITH_CHACHA20_POLY1305_D, -+ TLS1_CK_DHE_RSA_WITH_CHACHA20_POLY1305_D, -+ SSL_kEDH, -+ SSL_aRSA, -+ SSL_CHACHA20POLY1305_D, -+ SSL_AEAD, -+ SSL_TLSV1_2, -+ SSL_HIGH, -+ SSL_HANDSHAKE_MAC_SHA256 | TLS1_PRF_SHA256, -+ 256, -+ 256, -+ }, -+ /* RFC ciphers */ -+ { -+ 1, -+ TLS1_TXT_ECDHE_RSA_WITH_CHACHA20_POLY1305, -+ TLS1_CK_ECDHE_RSA_WITH_CHACHA20_POLY1305, -+ SSL_kECDHE, -+ SSL_aRSA, -+ SSL_CHACHA20POLY1305, -+ SSL_AEAD, -+ SSL_TLSV1_2, -+ SSL_HIGH, -+ SSL_HANDSHAKE_MAC_SHA256 | TLS1_PRF_SHA256, -+ 256, -+ 256, -+ }, -+ { -+ 1, -+ TLS1_TXT_ECDHE_ECDSA_WITH_CHACHA20_POLY1305, -+ TLS1_CK_ECDHE_ECDSA_WITH_CHACHA20_POLY1305, -+ SSL_kECDHE, -+ SSL_aECDSA, -+ SSL_CHACHA20POLY1305, -+ SSL_AEAD, -+ SSL_TLSV1_2, -+ SSL_HIGH, -+ SSL_HANDSHAKE_MAC_SHA256 | TLS1_PRF_SHA256, -+ 256, -+ 256, -+ }, -+ { -+ 1, -+ TLS1_TXT_DHE_RSA_WITH_CHACHA20_POLY1305, -+ TLS1_CK_DHE_RSA_WITH_CHACHA20_POLY1305, -+ SSL_kDHE, -+ SSL_aRSA, -+ SSL_CHACHA20POLY1305, -+ SSL_AEAD, -+ SSL_TLSV1_2, -+ SSL_HIGH, -+ SSL_HANDSHAKE_MAC_SHA256 | TLS1_PRF_SHA256, -+ 256, -+ 256, -+ }, -+ { -+ 1, -+ TLS1_TXT_PSK_WITH_CHACHA20_POLY1305, -+ TLS1_CK_PSK_WITH_CHACHA20_POLY1305, -+ SSL_kPSK, -+ SSL_aPSK, -+ SSL_CHACHA20POLY1305, -+ SSL_AEAD, -+ SSL_TLSV1_2, -+ SSL_HIGH, -+ SSL_HANDSHAKE_MAC_SHA256 | TLS1_PRF_SHA256, -+ 256, -+ 256, -+ }, -+#endif - /* end of list */ - }; - -@@ -4090,6 +4194,7 @@ SSL_CIPHER *ssl3_choose_cipher(SSL *s, STACK_OF(SSL_CIPHER) *clnt, - int i, ii, ok; - CERT *cert; - unsigned long alg_k, alg_a, mask_k, mask_a, emask_k, emask_a; -+ int use_chacha = 0; - - /* Let's see which ciphers we can support */ - cert = s->cert; -@@ -4119,13 +4224,21 @@ SSL_CIPHER *ssl3_choose_cipher(SSL *s, STACK_OF(SSL_CIPHER) *clnt, - fprintf(stderr, "%p:%s\n", (void *)c, c->name); - } - #endif -- -+retry: - if (s->options & SSL_OP_CIPHER_SERVER_PREFERENCE || tls1_suiteb(s)) { - prio = srvr; - allow = clnt; -+ /* Use ChaCha20+Poly1305 iff it's client's most preferred cipher suite */ -+ if (sk_SSL_CIPHER_num(clnt) > 0) { -+ c = sk_SSL_CIPHER_value(clnt, 0); -+ if (c->algorithm_enc == SSL_CHACHA20POLY1305 || -+ c->algorithm_enc == SSL_CHACHA20POLY1305_D) -+ use_chacha = 1; -+ } - } else { - prio = clnt; - allow = srvr; -+ use_chacha = 1; - } - - tls1_set_cert_validity(s); -@@ -4137,6 +4250,11 @@ SSL_CIPHER *ssl3_choose_cipher(SSL *s, STACK_OF(SSL_CIPHER) *clnt, - if ((c->algorithm_ssl & SSL_TLSV1_2) && !SSL_USE_TLS1_2_CIPHERS(s)) - continue; - -+ /* Skip ChaCha unless top client priority */ -+ if ((c->algorithm_enc == SSL_CHACHA20POLY1305 || -+ c->algorithm_enc == SSL_CHACHA20POLY1305_D) && !use_chacha) -+ continue; -+ - ssl_set_cert_masks(cert, c); - mask_k = cert->mask_k; - mask_a = cert->mask_a; -@@ -4216,6 +4334,14 @@ SSL_CIPHER *ssl3_choose_cipher(SSL *s, STACK_OF(SSL_CIPHER) *clnt, - break; - } - } -+ -+ if (ret == NULL && !use_chacha) { -+ /* If no shared cipher was found due to some unusual preferences, try -+ * again with CHACHA enabled even if not top priority */ -+ use_chacha = 1; -+ goto retry; -+ } -+ - return (ret); - } - -diff --git a/ssl/ssl.h b/ssl/ssl.h -index 90aeb0c..f783baa 100644 ---- a/ssl/ssl.h -+++ b/ssl/ssl.h -@@ -297,6 +297,8 @@ extern "C" { - # define SSL_TXT_CAMELLIA128 "CAMELLIA128" - # define SSL_TXT_CAMELLIA256 "CAMELLIA256" - # define SSL_TXT_CAMELLIA "CAMELLIA" -+# define SSL_TXT_CHACHA20_D "CHACHA20-draft" -+# define SSL_TXT_CHACHA20 "CHACHA20" - - # define SSL_TXT_MD5 "MD5" - # define SSL_TXT_SHA1 "SHA1" -diff --git a/ssl/ssl_ciph.c b/ssl/ssl_ciph.c -index 2ad8f43..23c1c68 100644 ---- a/ssl/ssl_ciph.c -+++ b/ssl/ssl_ciph.c -@@ -164,11 +164,13 @@ - #define SSL_ENC_SEED_IDX 11 - #define SSL_ENC_AES128GCM_IDX 12 - #define SSL_ENC_AES256GCM_IDX 13 --#define SSL_ENC_NUM_IDX 14 -+#define SSL_ENC_CHACHA20POLY1305_DRAFT_IDX 14 -+#define SSL_ENC_CHACHA20POLY1305_IDX 15 -+#define SSL_ENC_NUM_IDX 16 - - static const EVP_CIPHER *ssl_cipher_methods[SSL_ENC_NUM_IDX] = { - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, -- NULL, NULL -+ NULL, NULL, NULL, NULL - }; - - #define SSL_COMP_NULL_IDX 0 -@@ -315,6 +317,8 @@ static const SSL_CIPHER cipher_aliases[] = { - {0, SSL_TXT_CAMELLIA256, 0, 0, 0, SSL_CAMELLIA256, 0, 0, 0, 0, 0, 0}, - {0, SSL_TXT_CAMELLIA, 0, 0, 0, SSL_CAMELLIA128 | SSL_CAMELLIA256, 0, 0, 0, - 0, 0, 0}, -+ {0, SSL_TXT_CHACHA20_D, 0, 0, 0, SSL_CHACHA20POLY1305_D, 0, 0, 0, 0, 0, 0}, -+ {0, SSL_TXT_CHACHA20, 0, 0, 0, SSL_CHACHA20POLY1305, 0, 0, 0, 0, 0, 0}, - - /* MAC aliases */ - {0, SSL_TXT_MD5, 0, 0, 0, 0, SSL_MD5, 0, 0, 0, 0, 0}, -@@ -431,6 +435,11 @@ void ssl_load_ciphers(void) - ssl_cipher_methods[SSL_ENC_AES256GCM_IDX] = - EVP_get_cipherbyname(SN_aes_256_gcm); - -+ ssl_cipher_methods[SSL_ENC_CHACHA20POLY1305_DRAFT_IDX] = -+ EVP_chacha20_poly1305_draft(); -+ ssl_cipher_methods[SSL_ENC_CHACHA20POLY1305_IDX] = -+ EVP_chacha20_poly1305(); -+ - ssl_digest_methods[SSL_MD_MD5_IDX] = EVP_get_digestbyname(SN_md5); - ssl_mac_secret_size[SSL_MD_MD5_IDX] = - EVP_MD_size(ssl_digest_methods[SSL_MD_MD5_IDX]); -@@ -581,6 +590,12 @@ int ssl_cipher_get_evp(const SSL_SESSION *s, const EVP_CIPHER **enc, - case SSL_AES256GCM: - i = SSL_ENC_AES256GCM_IDX; - break; -+ case SSL_CHACHA20POLY1305_D: -+ i = SSL_ENC_CHACHA20POLY1305_DRAFT_IDX; -+ break; -+ case SSL_CHACHA20POLY1305: -+ i = SSL_ENC_CHACHA20POLY1305_IDX; -+ break; - default: - i = -1; - break; -@@ -805,6 +820,12 @@ static void ssl_cipher_get_disabled(unsigned long *mkey, unsigned long *auth, - (ssl_cipher_methods[SSL_ENC_GOST89_IDX] == - NULL) ? SSL_eGOST2814789CNT : 0; - *enc |= (ssl_cipher_methods[SSL_ENC_SEED_IDX] == NULL) ? SSL_SEED : 0; -+ *enc |= -+ (ssl_cipher_methods[SSL_ENC_CHACHA20POLY1305_DRAFT_IDX] == -+ NULL) ? SSL_CHACHA20POLY1305_D : 0; -+ *enc |= -+ (ssl_cipher_methods[SSL_ENC_CHACHA20POLY1305_IDX] == -+ NULL) ? SSL_CHACHA20POLY1305 : 0; - - *mac |= (ssl_digest_methods[SSL_MD_MD5_IDX] == NULL) ? SSL_MD5 : 0; - *mac |= (ssl_digest_methods[SSL_MD_SHA1_IDX] == NULL) ? SSL_SHA1 : 0; -@@ -1824,6 +1845,12 @@ char *SSL_CIPHER_description(const SSL_CIPHER *cipher, char *buf, int len) - case SSL_eGOST2814789CNT: - enc = "GOST89(256)"; - break; -+ case SSL_CHACHA20POLY1305_D: -+ enc = "ChaCha20-Poly1305-draft"; -+ break; -+ case SSL_CHACHA20POLY1305: -+ enc = "ChaCha20-Poly1305"; -+ break; - default: - enc = "unknown"; - break; -diff --git a/ssl/ssl_locl.h b/ssl/ssl_locl.h -index 6df725f..dbe68f2 100644 ---- a/ssl/ssl_locl.h -+++ b/ssl/ssl_locl.h -@@ -354,6 +354,8 @@ - # define SSL_SEED 0x00000800L - # define SSL_AES128GCM 0x00001000L - # define SSL_AES256GCM 0x00002000L -+# define SSL_CHACHA20POLY1305_D 0x00040000L -+# define SSL_CHACHA20POLY1305 0x00080000L /* Value from openssl */ - - # define SSL_AES (SSL_AES128|SSL_AES256|SSL_AES128GCM|SSL_AES256GCM) - # define SSL_CAMELLIA (SSL_CAMELLIA128|SSL_CAMELLIA256) -diff --git a/ssl/tls1.h b/ssl/tls1.h -index 7e237d0..ff2e259 100644 ---- a/ssl/tls1.h -+++ b/ssl/tls1.h -@@ -563,6 +563,19 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) - # define TLS1_CK_ECDH_RSA_WITH_AES_128_GCM_SHA256 0x0300C031 - # define TLS1_CK_ECDH_RSA_WITH_AES_256_GCM_SHA384 0x0300C032 - -+/* ChaCha20-Poly1305 ciphersuites draft-agl-tls-chacha20poly1305-01 */ -+# define TLS1_CK_ECDHE_RSA_WITH_CHACHA20_POLY1305_D 0x0300CC13 -+# define TLS1_CK_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_D 0x0300CC14 -+# define TLS1_CK_DHE_RSA_WITH_CHACHA20_POLY1305_D 0x0300CC15 -+/* ChaCha20-Poly1305 ciphersuites from RFC */ -+# define TLS1_CK_ECDHE_RSA_WITH_CHACHA20_POLY1305 0x0300CCA8 -+# define TLS1_CK_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 0x0300CCA9 -+# define TLS1_CK_DHE_RSA_WITH_CHACHA20_POLY1305 0x0300CCAA -+# define TLS1_CK_PSK_WITH_CHACHA20_POLY1305 0x0300CCAB -+# define TLS1_CK_ECDHE_PSK_WITH_CHACHA20_POLY1305 0x0300CCAC -+# define TLS1_CK_DHE_PSK_WITH_CHACHA20_POLY1305 0x0300CCAD -+# define TLS1_CK_RSA_PSK_WITH_CHACHA20_POLY1305 0x0300CCAE -+ - /* - * XXX * Backward compatibility alert: + * Older versions of OpenSSL gave - * some DHE ciphers names with "EDH" + * instead of "DHE". Going forward, we -@@ -713,6 +726,19 @@ SSL_CTX_callback_ctrl(ssl,SSL_CTRL_SET_TLSEXT_TICKET_KEY_CB,(void (*)(void))cb) - # define TLS1_TXT_ECDH_RSA_WITH_AES_128_GCM_SHA256 "ECDH-RSA-AES128-GCM-SHA256" - # define TLS1_TXT_ECDH_RSA_WITH_AES_256_GCM_SHA384 "ECDH-RSA-AES256-GCM-SHA384" - -+/* ChaCha20-Poly1305 ciphersuites draft-agl-tls-chacha20poly1305-01 */ -+# define TLS1_TXT_ECDHE_RSA_WITH_CHACHA20_POLY1305_D "ECDHE-RSA-CHACHA20-POLY1305-D" -+# define TLS1_TXT_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_D "ECDHE-ECDSA-CHACHA20-POLY1305-D" -+# define TLS1_TXT_DHE_RSA_WITH_CHACHA20_POLY1305_D "DHE-RSA-CHACHA20-POLY1305-D" -+/* Chacha20-Poly1305 ciphersuites from RFC */ -+# define TLS1_TXT_ECDHE_RSA_WITH_CHACHA20_POLY1305 "ECDHE-RSA-CHACHA20-POLY1305" -+# define TLS1_TXT_ECDHE_ECDSA_WITH_CHACHA20_POLY1305 "ECDHE-ECDSA-CHACHA20-POLY1305" -+# define TLS1_TXT_DHE_RSA_WITH_CHACHA20_POLY1305 "DHE-RSA-CHACHA20-POLY1305" -+# define TLS1_TXT_PSK_WITH_CHACHA20_POLY1305 "PSK-CHACHA20-POLY1305" -+# define TLS1_TXT_ECDHE_PSK_WITH_CHACHA20_POLY1305 "ECDHE-PSK-CHACHA20-POLY1305" -+# define TLS1_TXT_DHE_PSK_WITH_CHACHA20_POLY1305 "DHE-PSK-CHACHA20-POLY1305" -+# define TLS1_TXT_RSA_PSK_WITH_CHACHA20_POLY1305 "RSA-PSK-CHACHA20-POLY1305" -+ - # define TLS_CT_RSA_SIGN 1 - # define TLS_CT_DSS_SIGN 2 - # define TLS_CT_RSA_FIXED_DH 3 --- -2.10.1 - diff --git a/ssl3-test-failure.patch b/ssl3-test-failure.patch deleted file mode 100644 index d161c3d4a593..000000000000 --- a/ssl3-test-failure.patch +++ /dev/null @@ -1,26 +0,0 @@ -From: Kurt Roeckx <kurt@roeckx.be> -Date: Sun, 6 Sep 2015 16:04:11 +0200 -Subject: Disable SSLv3 test in test suite - -When testing SSLv3 the test program returns 0 for skip. The test for weak DH -expects a failure, but gets success. - -It should probably be changed to return something other than 0 for a skipped -test. ---- - test/testssl | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/test/testssl b/test/testssl -index 747e4ba..1e4370b 100644 ---- a/test/testssl -+++ b/test/testssl -@@ -160,7 +160,7 @@ test_cipher() { - } - - echo "Testing ciphersuites" --for protocol in TLSv1.2 SSLv3; do -+for protocol in TLSv1.2; do - echo "Testing ciphersuites for $protocol" - for cipher in `../util/shlib_wrap.sh ../apps/openssl ciphers "RSA+$protocol" | tr ':' ' '`; do - test_cipher $cipher $protocol |