diff options
author | Einhard Leichtfuß | 2018-12-27 18:55:44 +0100 |
---|---|---|
committer | Einhard Leichtfuß | 2018-12-27 19:06:40 +0100 |
commit | dd137b37a572f28510be3cd7a74ec538ef692689 (patch) | |
tree | 1b30d22a8f47fe36c257aa456a72bffa23c84789 | |
parent | 8fe66d46bef882a390ba637a2be9a2172cd3f423 (diff) | |
download | aur-dd137b37a572f28510be3cd7a74ec538ef692689.tar.gz |
Add a few fixes to the dictionary source
A large part is derived from a diff between 0.48 and Debian's 0.48.5.
Also,
- split the sed script into one to be executed initially and another one
after webfilter.
- Use Debian's 0.48.5 instead of 0.48.4 (does not change much).
- Correctly set the version of the dictionary (as written by `dict -D').
- Do not patch the Makefile but execute the commands directly.
- Simplify prepare() and build().
- Add a check function.
-rw-r--r-- | .SRCINFO | 13 | ||||
-rw-r--r-- | PKGBUILD | 79 | ||||
-rwxr-xr-x | check.sed | 27 | ||||
-rwxr-xr-x[-rw-r--r--] | fixes.sed | 184 | ||||
-rwxr-xr-x | post_webfilter.sed | 10 |
5 files changed, 264 insertions, 49 deletions
@@ -1,11 +1,10 @@ pkgbase = dict-gcide pkgdesc = GNU version of the Collaborative International Dictionary of English for dictd et al. pkgver = 0.52 - pkgrel = 1 + pkgrel = 2 url = http://gcide.gnu.org.ua/ install = dict-gcide.install arch = any - license = GPL license = GPL3 makedepends = dictd makedepends = sed @@ -13,12 +12,16 @@ pkgbase = dict-gcide provides = dictd-gcide conflicts = dictd-gcide source = fixes.sed - source = http://ftp.debian.org/debian/pool/main/d/dict-gcide/dict-gcide_0.48.4.tar.xz + source = post_webfilter.sed + source = check.sed + source = http://ftp.debian.org/debian/pool/main/d/dict-gcide/dict-gcide_0.48.5.tar.xz source = ftp://ftp.gnu.org/gnu/gcide/gcide-0.52.tar.xz source = ftp://ftp.gnu.org/gnu/gcide/gcide-0.52.tar.xz.sig validpgpkeys = 325F650C4C2B6AD58807327A3602B07F55D0C732 - sha512sums = 3adc1c75e9f96883966236131518b4b8f4c6c588759112417ec46c35d421da87ddd1bac156fa0bf55c64f939cbe1efd235d0854b4dfe8cfa1278917d6fef07f5 - sha512sums = 82939f8c9e2bb30368b673784abaf85724a04fb9ebab382b6908034e0978d4d47039b7667573b88fe074bb29ea232b9e11129ac30ba6e88d5b9271df3b2ad302 + sha512sums = 6fc2caf86d96ac9a14f2676ca4455afe987f37933c03ac672b27cf2de7a4fc893457c97c7fa17dc280debfb4e1a3ab4adc5f7c68ee445605dc3c22665d32a445 + sha512sums = e7c6766b51ef92c4d8669394b6a71f8c4d7249ac63a1eb940f0a3017cb5ff6841f0f4bce7a60ffbcf180801d92b068db6ca46afd1dceac06a9c37c59dcddbfb3 + sha512sums = 1304bdce3776355fcd7827cebbf670ae34377bda78b04bf271ba10a25d92e33f6ec69644e168c30a6ec47808ebacc461de47d9494a5a2a5894d58c21be045973 + sha512sums = a10d46947c7d8b7050c44a12cfb38ed374e971063805e69897652cd8993dc5cf0ecc4d9a12680a8ced30d110d20bc84def75bd587166aa2cfa78529b9397aafe sha512sums = 260e4e7bb30ac857255b9a1f696b18b77bbae06574bb0f0cc35b38be7399bf497d3c561ab0d4a4b7e8fc3127c6bf72efde8e1d4a28fa428f82178d0972f704b3 sha512sums = SKIP @@ -1,61 +1,78 @@ -# Maintainer: Einhard Leichtfuß <archer@respiranto.de> +# Maintainer: Einhard Leichtfuß <alguien@respiranto.de> # Contributor: Tai Chi Minh Ralph Eastwood <tcmreastwood@gmail.com> pkgname=dict-gcide -_debver=0.48.4 +_major_debver=0.48 +_debver=${_major_debver}.5 pkgver=0.52 -pkgrel=1 +pkgrel=2 pkgdesc="GNU version of the Collaborative International Dictionary of English for dictd et al." arch=('any') url="http://gcide.gnu.org.ua/" -license=('GPL' 'GPL3') +license=('GPL3') optdepends=('dictd: dict client and server') makedepends=('dictd' 'sed') provides=('dictd-gcide') conflicts=('dictd-gcide') install=${pkgname}.install source=('fixes.sed' + 'post_webfilter.sed' + 'check.sed' "http://ftp.debian.org/debian/pool/main/d/${pkgname}/${pkgname}_${_debver}.tar.xz" "ftp://ftp.gnu.org/gnu/gcide/gcide-${pkgver}.tar.xz"{,.sig}) -sha512sums=('3adc1c75e9f96883966236131518b4b8f4c6c588759112417ec46c35d421da87ddd1bac156fa0bf55c64f939cbe1efd235d0854b4dfe8cfa1278917d6fef07f5' - '82939f8c9e2bb30368b673784abaf85724a04fb9ebab382b6908034e0978d4d47039b7667573b88fe074bb29ea232b9e11129ac30ba6e88d5b9271df3b2ad302' +sha512sums=('6fc2caf86d96ac9a14f2676ca4455afe987f37933c03ac672b27cf2de7a4fc893457c97c7fa17dc280debfb4e1a3ab4adc5f7c68ee445605dc3c22665d32a445' + 'e7c6766b51ef92c4d8669394b6a71f8c4d7249ac63a1eb940f0a3017cb5ff6841f0f4bce7a60ffbcf180801d92b068db6ca46afd1dceac06a9c37c59dcddbfb3' + '1304bdce3776355fcd7827cebbf670ae34377bda78b04bf271ba10a25d92e33f6ec69644e168c30a6ec47808ebacc461de47d9494a5a2a5894d58c21be045973' + 'a10d46947c7d8b7050c44a12cfb38ed374e971063805e69897652cd8993dc5cf0ecc4d9a12680a8ced30d110d20bc84def75bd587166aa2cfa78529b9397aafe' '260e4e7bb30ac857255b9a1f696b18b77bbae06574bb0f0cc35b38be7399bf497d3c561ab0d4a4b7e8fc3127c6bf72efde8e1d4a28fa428f82178d0972f704b3' 'SKIP') validpgpkeys=('325F650C4C2B6AD58807327A3602B07F55D0C732') -prepare() { - ln -s "${srcdir}/fixes.sed" "${pkgname}-${_debver}/" - sed -i 's`\(\./webfilter\) | \(\./webfmt -c\)`\1 | sed -Ef fixes.sed | \2`' "${pkgname}-${_debver}/Makefile.in" - - rm -rf "${pkgname}-${_debver}/cide" - ln -s "${srcdir}/gcide-${pkgver}" "${pkgname}-${_debver}/cide" - cd "${pkgname}-${_debver}/cide" - for file in CIDE.? - do - mv "$file" "${file,,}" - done - mv tagset.{txt,web} - mv webfont.{txt,asc} - mv README{,.dic} +prepare() +{ + sed -Ei \ + "s/\"(The Collaborative International Dictionary of English) v.${_major_debver}\"/\"\\1 v.${pkgver}\"/" \ + "${pkgname}-${_debver}/scan.l" } -build() { - cd "${pkgname}-${_debver}" - export CPPFLAGS="-D_FORTIFY_SOURCE=0" - ./configure --with-local-libmaa --prefix=/usr +build() +{ + cd ${pkgname}-${_debver} + + CPPFLAGS="-D_FORTIFY_SOURCE=0" ./configure --with-local-libmaa make -j1 - mkdir "../final_data" - make dictdir="../final_data" install - # Fix wrong order. - LANG=C sort --dictionary-order --ignore-case \ - -o ../final_data/gcide.index{,} + + # Do the conversion explicitly, instead of `make db', to account for all + # the differences to the original build process. + # LANG=C is required so that the index file is properly sorted. + ../fixes.sed ../gcide-${pkgver}/CIDE.? \ + | sed -f debian/sedfile \ + | ./webfilter \ + | ../post_webfilter.sed \ + | tee pre_webfmt.data \ + | LANG=C ./webfmt -c + + dictzip -v gcide.dict +} + +check() +{ + errors="$(./check.sed < ${pkgname}-${_debver}/pre_webfmt.data)" + + if test -n "$errors" + then + echo "Errors found:" + echo "$errors" + return 1 + fi } package() { install -m 0755 -d "${pkgdir}/usr/share/dictd" install -m 0644 -t "${pkgdir}/usr/share/dictd/" \ - final_data/gcide.{dict.dz,index} + ${pkgname}-${_debver}/gcide.{dict.dz,index} + install -m 0755 -d "${pkgdir}/usr/share/doc/dict-gcide" install -m 0644 -t "${pkgdir}/usr/share/doc/dict-gcide/" \ - "${pkgname}-${_debver}/cide/"{README.dic,INFO} + gcide-${pkgver}/{README,INFO} } diff --git a/check.sed b/check.sed new file mode 100755 index 000000000000..002fbbc5c84c --- /dev/null +++ b/check.sed @@ -0,0 +1,27 @@ +#!/usr/bin/env -S sed -nEf + +# Small sed script to find (possible) problems. +# Particularly to be used on update to a new gcide version. +# +# To be run before webfmt. +# For each test, below the action formerly in post_webfmt.sed. +# - disregarding whether it is a good one or not. +# +# If this script prints anything, webfmt probably fails. +# + + +\`<col>([^<]*),? <cd>([^<]*)</col>` p +#s`<col>([^<]*),? <cd>([^<]*)</col>`<col>\1</col>, <cd>\2`g + +\`<qau>([^<]*)(<break>)` p +#s`<qau>([^<]*)(<break>)`<qau>\1</qau>\2` + +\`^([^<]*)</qau>` p +#s`^([^<]*)</qau>`\1` + +\`^([^<]*)</au>` p +#s`^([^<]*)</au>`\1` + +\`</>` p +#s`</>``g diff --git a/fixes.sed b/fixes.sed index b135ecaaffb3..ff8cf356dc40 100644..100755 --- a/fixes.sed +++ b/fixes.sed @@ -1,20 +1,178 @@ -s`(<altname>|<contr>)<cref>([^<]*)</cref>`\1\2`g -s`(<stype>|<prod>)<ecol>([^<]*)</ecol>`\1\2`g +#!/usr/bin/env -S sed -Ef -s`<col>([^<]*),? <cd>([^<]*)</col>`<col>\1</col>, <cd>\2`g +# A large part of the changes is derived from a diff between 0.48 and +# Debian's 0.48.5, excluding changes included in the new gcide release and +# those that do not change the final output. +# +# Sources: +# http://archive.debian.org/debian-archive/debian/pool/main/d/dict-gcide/dict-gcide_0.48.orig.tar.gz +# http://deb.debian.org/debian/pool/main/d/dict-gcide/dict-gcide_0.48.5.tar.xz +# TODO: +# * '[<source></source>]' +# * '</item><item>' (dict -d gcide legislation) + + +## GENERAL + +# Remove lines pretending to be in a particular font. +\`^<p>\s*(<note>\s*)?<hand/\s*(<[^>]*type>\s*)?This\s*line\s*is\s*printed\s*in`, \`^\s*$` d + +# Remove book and publ tags in a qau element. +# <publ> seems to be removed by webfmt, so apparently not necessary to +# remove here. s`(<qau>[^<]*)(<book>|<publ>)([^<]*)(</book>|</publ>)`\1\3`g -s`<qau>([^<]*)(<break>)`<qau>\1</qau>\2` -s`^([^<]*)</qau>`\1` -s`<qau>([^<]*) (\([^)]{20}[^<]*)</qau>`<qau>\1</qau> \2` -s`<au>([^<]*)<break>`<au>\1</au><break>` -s`^([^<]*)</au>`\1` +## CIDE.A + +# Typo. +s`^(<p><q><qex>A priori</qex>, that is,) form (these necessities)`\1 from \2` + +# Add some semicolon. +\`^<mhw>\{ <hw>Ar"que\*bus</hw>, <hw>Ar"que\*buse</hw> \}</mhw>` { + s`(<def>A sort of hand gun or firearm) (a contrivance)`\1\; \2` +} + + +## CIDE.B + +# Add a closing paranthis. +s`\(Thirteenth Greatest of Centuries, 1913\.`&)` + + +## CIDE.C + +# Remove empty element. +s`<stype></stype>`` + + +## CIDE.D + +# Descartes did not live one and a half millennia. +\`^<hw>Descartes</hw>` { + s`(born) 159, (died)`\1 1596, \2` +} + +# Typo; doubled quote. +\`^<p><syn><b>Syn\.</b> -- To vary\; disagree\; dissent\; dispute\;` { + s`(<xex>)aiffer (with</xex>)`\1differ \2` + s`(<rdquo/){2}`\1` +} + +# Remove empty element. +\`^<p><ent>diploid</ent>`, \`<ent>` { + s`^(B: Oh, how I wish.*</q>)\s*<rj><qau></qau></rj>(</p>)`\1\2` +} + +# In 0.48, the 'between' was missing; I prefer Debian's way of solving it. +\`^<p><ent>Doublet</ent>`, \`<ent>` { + \`^<p><sn>4\.</sn>` { + s`(with a) (color between them)`\1 layer of \2` + } +} + + +## CIDE.E + +# Restrict qau element to the author themself. +s`^(<qau>Mark Feeney)(<br/)`\1</qau>\2` +\`^Copyright 1999 Globe Newspaper Company\.` { + s`</qau>`` +} + + +## CIDE.F + +s`measurments`measurements` + +# Fix misattribution. +s`(Dostoevsky's) (War and Peace)`\1 Crime and Punishment\; or Tolstoy's \2` + +s`compIy`comply` + + +## CIDE.I + +# Remove qau tags. +\`^<rj><qau>Dr\. Rod Beavon<br/`, +2 { + s`^(<rj>)<qau>(Dr\. Rod Beavon<br/)`\1\2</rj>` + s`^\((17 Dean's Yard London SW1P 3PB)\;(<br/)`<rj>\1</rj>\2` + s`(e-mail: rod\.beavon@westminster\.org\.uk)</qau>(</rj><br/)`<rj>\1\2` +} + + +## M + +# Remove extraneous ', in'. +s`^(<qau>Andrew Hood), in`\1` + + +## P + +# Restrict col element's content. +\`^<p><cs><col><b>Park of artillery</b></col>` { + s`(<col><b>industrial park</b>) `\1</col>` + s`</col>(</cs><br/)$`\1` +} + + +## R + +# Typo. +\`^<hw>Re\*cu"sant</hw>` { + s`\bchurc\b`church` +} + +# Remove text centering around a referenced image. +# Debian removed the preceding paragraph as well. I do not agree. +\`<a href="\\cide\\more\\lilac-breasted-roller\.jpg">`, \`zambezi\.co\.uk` d + + +## S + +# Fix badly formatted closing tag (<i>(.*)</> -> <i>\1</i>). +s`<([^><]*)>([^<]*)</>`<\1>\2</\1>` + +# If one wanted to fix more than necessary (Debian does): +#\`^<hw>Ses\*quip"li\*cate</hw>` { +# s`^`<p>` +# s`<(/?)i>`<\1xex>`g +# +# s`(<xex>)(a|b)(</xex>)<prime/`\1\2\\'\''b7\3`g +# s`<prime/`\\'\''b7` +#} + +s`\<(something)l\>`\1` + +s`rappng`rapping` + + +## T + +# Restrict qau to the author themself. +\`^<rj><qau>Andrew Forbes/CPA`, +1 { + s`^<rj><qau>Andrew Forbes/CPA`&</qau>` + s`^(\(from.*)</qau>(</rj><br/)$`\1\2` +} + + +## U + +# Restrict au element to the author themself. +\`^<au>Kari Jensen \(University of Wisconsin`, +1 { + s`^<au>Kari Jensen`&</au>` + s`^(\[available at.*)</au>(<br/)$`\1\2` +} + + +## V + +# Avoid double empty line in dict's output. +\`^<p><cs><col><b>Principle of virtual velocities</b>` { + s`-(- <col><b>Virtual image</b></col>)`\1` +} -s`,? <[^>]*></[^>]*> ?``g -s`</>``g -s`(<qau>Andrew Hood), in`\1` -s`\(Thirteenth Greatest of Centuries, 1913.`&)` -/<a href="\\cide\\more\\lilac-breasted-roller.jpg">/,/zambezi.co.uk/d +## W +s`\<(Where\*?)form\>`\1from` diff --git a/post_webfilter.sed b/post_webfilter.sed new file mode 100755 index 000000000000..7c3cdf8b7dd0 --- /dev/null +++ b/post_webfilter.sed @@ -0,0 +1,10 @@ +#!/usr/bin/env -S sed -Ef + +s`(<altname>|<contr>)<cref>([^<]*)</cref>`\1\2`g +s`(<stype>|<prod>)<ecol>([^<]*)</ecol>`\1\2`g + +# Restrict qau tag to the author themself. +s`<qau>([^<]*) (\([^)]{20}[^<]*)</qau>`<qau>\1</qau> \2`gp + +# Replace for example, '(<mcol><col>.+</col>), <col></col>(</mcol>)' -> '\1' +s`,?\s+<([^>]*)></\1>\s*``g |