summarylogtreecommitdiffstats
diff options
context:
space:
mode:
authorEinhard Leichtfuß2018-12-27 18:55:44 +0100
committerEinhard Leichtfuß2018-12-27 19:06:40 +0100
commitdd137b37a572f28510be3cd7a74ec538ef692689 (patch)
tree1b30d22a8f47fe36c257aa456a72bffa23c84789
parent8fe66d46bef882a390ba637a2be9a2172cd3f423 (diff)
downloadaur-dd137b37a572f28510be3cd7a74ec538ef692689.tar.gz
Add a few fixes to the dictionary source
A large part is derived from a diff between 0.48 and Debian's 0.48.5. Also, - split the sed script into one to be executed initially and another one after webfilter. - Use Debian's 0.48.5 instead of 0.48.4 (does not change much). - Correctly set the version of the dictionary (as written by `dict -D'). - Do not patch the Makefile but execute the commands directly. - Simplify prepare() and build(). - Add a check function.
-rw-r--r--.SRCINFO13
-rw-r--r--PKGBUILD79
-rwxr-xr-xcheck.sed27
-rwxr-xr-x[-rw-r--r--]fixes.sed184
-rwxr-xr-xpost_webfilter.sed10
5 files changed, 264 insertions, 49 deletions
diff --git a/.SRCINFO b/.SRCINFO
index 8e103539a766..7832338496b8 100644
--- a/.SRCINFO
+++ b/.SRCINFO
@@ -1,11 +1,10 @@
pkgbase = dict-gcide
pkgdesc = GNU version of the Collaborative International Dictionary of English for dictd et al.
pkgver = 0.52
- pkgrel = 1
+ pkgrel = 2
url = http://gcide.gnu.org.ua/
install = dict-gcide.install
arch = any
- license = GPL
license = GPL3
makedepends = dictd
makedepends = sed
@@ -13,12 +12,16 @@ pkgbase = dict-gcide
provides = dictd-gcide
conflicts = dictd-gcide
source = fixes.sed
- source = http://ftp.debian.org/debian/pool/main/d/dict-gcide/dict-gcide_0.48.4.tar.xz
+ source = post_webfilter.sed
+ source = check.sed
+ source = http://ftp.debian.org/debian/pool/main/d/dict-gcide/dict-gcide_0.48.5.tar.xz
source = ftp://ftp.gnu.org/gnu/gcide/gcide-0.52.tar.xz
source = ftp://ftp.gnu.org/gnu/gcide/gcide-0.52.tar.xz.sig
validpgpkeys = 325F650C4C2B6AD58807327A3602B07F55D0C732
- sha512sums = 3adc1c75e9f96883966236131518b4b8f4c6c588759112417ec46c35d421da87ddd1bac156fa0bf55c64f939cbe1efd235d0854b4dfe8cfa1278917d6fef07f5
- sha512sums = 82939f8c9e2bb30368b673784abaf85724a04fb9ebab382b6908034e0978d4d47039b7667573b88fe074bb29ea232b9e11129ac30ba6e88d5b9271df3b2ad302
+ sha512sums = 6fc2caf86d96ac9a14f2676ca4455afe987f37933c03ac672b27cf2de7a4fc893457c97c7fa17dc280debfb4e1a3ab4adc5f7c68ee445605dc3c22665d32a445
+ sha512sums = e7c6766b51ef92c4d8669394b6a71f8c4d7249ac63a1eb940f0a3017cb5ff6841f0f4bce7a60ffbcf180801d92b068db6ca46afd1dceac06a9c37c59dcddbfb3
+ sha512sums = 1304bdce3776355fcd7827cebbf670ae34377bda78b04bf271ba10a25d92e33f6ec69644e168c30a6ec47808ebacc461de47d9494a5a2a5894d58c21be045973
+ sha512sums = a10d46947c7d8b7050c44a12cfb38ed374e971063805e69897652cd8993dc5cf0ecc4d9a12680a8ced30d110d20bc84def75bd587166aa2cfa78529b9397aafe
sha512sums = 260e4e7bb30ac857255b9a1f696b18b77bbae06574bb0f0cc35b38be7399bf497d3c561ab0d4a4b7e8fc3127c6bf72efde8e1d4a28fa428f82178d0972f704b3
sha512sums = SKIP
diff --git a/PKGBUILD b/PKGBUILD
index 023cfad20dbc..c2ef10024746 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -1,61 +1,78 @@
-# Maintainer: Einhard Leichtfuß <archer@respiranto.de>
+# Maintainer: Einhard Leichtfuß <alguien@respiranto.de>
# Contributor: Tai Chi Minh Ralph Eastwood <tcmreastwood@gmail.com>
pkgname=dict-gcide
-_debver=0.48.4
+_major_debver=0.48
+_debver=${_major_debver}.5
pkgver=0.52
-pkgrel=1
+pkgrel=2
pkgdesc="GNU version of the Collaborative International Dictionary of English for dictd et al."
arch=('any')
url="http://gcide.gnu.org.ua/"
-license=('GPL' 'GPL3')
+license=('GPL3')
optdepends=('dictd: dict client and server')
makedepends=('dictd' 'sed')
provides=('dictd-gcide')
conflicts=('dictd-gcide')
install=${pkgname}.install
source=('fixes.sed'
+ 'post_webfilter.sed'
+ 'check.sed'
"http://ftp.debian.org/debian/pool/main/d/${pkgname}/${pkgname}_${_debver}.tar.xz"
"ftp://ftp.gnu.org/gnu/gcide/gcide-${pkgver}.tar.xz"{,.sig})
-sha512sums=('3adc1c75e9f96883966236131518b4b8f4c6c588759112417ec46c35d421da87ddd1bac156fa0bf55c64f939cbe1efd235d0854b4dfe8cfa1278917d6fef07f5'
- '82939f8c9e2bb30368b673784abaf85724a04fb9ebab382b6908034e0978d4d47039b7667573b88fe074bb29ea232b9e11129ac30ba6e88d5b9271df3b2ad302'
+sha512sums=('6fc2caf86d96ac9a14f2676ca4455afe987f37933c03ac672b27cf2de7a4fc893457c97c7fa17dc280debfb4e1a3ab4adc5f7c68ee445605dc3c22665d32a445'
+ 'e7c6766b51ef92c4d8669394b6a71f8c4d7249ac63a1eb940f0a3017cb5ff6841f0f4bce7a60ffbcf180801d92b068db6ca46afd1dceac06a9c37c59dcddbfb3'
+ '1304bdce3776355fcd7827cebbf670ae34377bda78b04bf271ba10a25d92e33f6ec69644e168c30a6ec47808ebacc461de47d9494a5a2a5894d58c21be045973'
+ 'a10d46947c7d8b7050c44a12cfb38ed374e971063805e69897652cd8993dc5cf0ecc4d9a12680a8ced30d110d20bc84def75bd587166aa2cfa78529b9397aafe'
'260e4e7bb30ac857255b9a1f696b18b77bbae06574bb0f0cc35b38be7399bf497d3c561ab0d4a4b7e8fc3127c6bf72efde8e1d4a28fa428f82178d0972f704b3'
'SKIP')
validpgpkeys=('325F650C4C2B6AD58807327A3602B07F55D0C732')
-prepare() {
- ln -s "${srcdir}/fixes.sed" "${pkgname}-${_debver}/"
- sed -i 's`\(\./webfilter\) | \(\./webfmt -c\)`\1 | sed -Ef fixes.sed | \2`' "${pkgname}-${_debver}/Makefile.in"
-
- rm -rf "${pkgname}-${_debver}/cide"
- ln -s "${srcdir}/gcide-${pkgver}" "${pkgname}-${_debver}/cide"
- cd "${pkgname}-${_debver}/cide"
- for file in CIDE.?
- do
- mv "$file" "${file,,}"
- done
- mv tagset.{txt,web}
- mv webfont.{txt,asc}
- mv README{,.dic}
+prepare()
+{
+ sed -Ei \
+ "s/\"(The Collaborative International Dictionary of English) v.${_major_debver}\"/\"\\1 v.${pkgver}\"/" \
+ "${pkgname}-${_debver}/scan.l"
}
-build() {
- cd "${pkgname}-${_debver}"
- export CPPFLAGS="-D_FORTIFY_SOURCE=0"
- ./configure --with-local-libmaa --prefix=/usr
+build()
+{
+ cd ${pkgname}-${_debver}
+
+ CPPFLAGS="-D_FORTIFY_SOURCE=0" ./configure --with-local-libmaa
make -j1
- mkdir "../final_data"
- make dictdir="../final_data" install
- # Fix wrong order.
- LANG=C sort --dictionary-order --ignore-case \
- -o ../final_data/gcide.index{,}
+
+ # Do the conversion explicitly, instead of `make db', to account for all
+ # the differences to the original build process.
+ # LANG=C is required so that the index file is properly sorted.
+ ../fixes.sed ../gcide-${pkgver}/CIDE.? \
+ | sed -f debian/sedfile \
+ | ./webfilter \
+ | ../post_webfilter.sed \
+ | tee pre_webfmt.data \
+ | LANG=C ./webfmt -c
+
+ dictzip -v gcide.dict
+}
+
+check()
+{
+ errors="$(./check.sed < ${pkgname}-${_debver}/pre_webfmt.data)"
+
+ if test -n "$errors"
+ then
+ echo "Errors found:"
+ echo "$errors"
+ return 1
+ fi
}
package()
{
install -m 0755 -d "${pkgdir}/usr/share/dictd"
install -m 0644 -t "${pkgdir}/usr/share/dictd/" \
- final_data/gcide.{dict.dz,index}
+ ${pkgname}-${_debver}/gcide.{dict.dz,index}
+
install -m 0755 -d "${pkgdir}/usr/share/doc/dict-gcide"
install -m 0644 -t "${pkgdir}/usr/share/doc/dict-gcide/" \
- "${pkgname}-${_debver}/cide/"{README.dic,INFO}
+ gcide-${pkgver}/{README,INFO}
}
diff --git a/check.sed b/check.sed
new file mode 100755
index 000000000000..002fbbc5c84c
--- /dev/null
+++ b/check.sed
@@ -0,0 +1,27 @@
+#!/usr/bin/env -S sed -nEf
+
+# Small sed script to find (possible) problems.
+# Particularly to be used on update to a new gcide version.
+#
+# To be run before webfmt.
+# For each test, below the action formerly in post_webfmt.sed.
+# - disregarding whether it is a good one or not.
+#
+# If this script prints anything, webfmt probably fails.
+#
+
+
+\`<col>([^<]*),? <cd>([^<]*)</col>` p
+#s`<col>([^<]*),? <cd>([^<]*)</col>`<col>\1</col>, <cd>\2`g
+
+\`<qau>([^<]*)(<break>)` p
+#s`<qau>([^<]*)(<break>)`<qau>\1</qau>\2`
+
+\`^([^<]*)</qau>` p
+#s`^([^<]*)</qau>`\1`
+
+\`^([^<]*)</au>` p
+#s`^([^<]*)</au>`\1`
+
+\`</>` p
+#s`</>``g
diff --git a/fixes.sed b/fixes.sed
index b135ecaaffb3..ff8cf356dc40 100644..100755
--- a/fixes.sed
+++ b/fixes.sed
@@ -1,20 +1,178 @@
-s`(<altname>|<contr>)<cref>([^<]*)</cref>`\1\2`g
-s`(<stype>|<prod>)<ecol>([^<]*)</ecol>`\1\2`g
+#!/usr/bin/env -S sed -Ef
-s`<col>([^<]*),? <cd>([^<]*)</col>`<col>\1</col>, <cd>\2`g
+# A large part of the changes is derived from a diff between 0.48 and
+# Debian's 0.48.5, excluding changes included in the new gcide release and
+# those that do not change the final output.
+#
+# Sources:
+# http://archive.debian.org/debian-archive/debian/pool/main/d/dict-gcide/dict-gcide_0.48.orig.tar.gz
+# http://deb.debian.org/debian/pool/main/d/dict-gcide/dict-gcide_0.48.5.tar.xz
+# TODO:
+# * '[<source></source>]'
+# * '</item><item>' (dict -d gcide legislation)
+
+
+## GENERAL
+
+# Remove lines pretending to be in a particular font.
+\`^<p>\s*(<note>\s*)?<hand/\s*(<[^>]*type>\s*)?This\s*line\s*is\s*printed\s*in`, \`^\s*$` d
+
+# Remove book and publ tags in a qau element.
+# <publ> seems to be removed by webfmt, so apparently not necessary to
+# remove here.
s`(<qau>[^<]*)(<book>|<publ>)([^<]*)(</book>|</publ>)`\1\3`g
-s`<qau>([^<]*)(<break>)`<qau>\1</qau>\2`
-s`^([^<]*)</qau>`\1`
-s`<qau>([^<]*) (\([^)]{20}[^<]*)</qau>`<qau>\1</qau> \2`
-s`<au>([^<]*)<break>`<au>\1</au><break>`
-s`^([^<]*)</au>`\1`
+## CIDE.A
+
+# Typo.
+s`^(<p><q><qex>A priori</qex>, that is,) form (these necessities)`\1 from \2`
+
+# Add some semicolon.
+\`^<mhw>\{ <hw>Ar"que\*bus</hw>, <hw>Ar"que\*buse</hw> \}</mhw>` {
+ s`(<def>A sort of hand gun or firearm) (a contrivance)`\1\; \2`
+}
+
+
+## CIDE.B
+
+# Add a closing paranthis.
+s`\(Thirteenth Greatest of Centuries, 1913\.`&)`
+
+
+## CIDE.C
+
+# Remove empty element.
+s`<stype></stype>``
+
+
+## CIDE.D
+
+# Descartes did not live one and a half millennia.
+\`^<hw>Descartes</hw>` {
+ s`(born) 159, (died)`\1 1596, \2`
+}
+
+# Typo; doubled quote.
+\`^<p><syn><b>Syn\.</b> -- To vary\; disagree\; dissent\; dispute\;` {
+ s`(<xex>)aiffer (with</xex>)`\1differ \2`
+ s`(<rdquo/){2}`\1`
+}
+
+# Remove empty element.
+\`^<p><ent>diploid</ent>`, \`<ent>` {
+ s`^(B: Oh, how I wish.*</q>)\s*<rj><qau></qau></rj>(</p>)`\1\2`
+}
+
+# In 0.48, the 'between' was missing; I prefer Debian's way of solving it.
+\`^<p><ent>Doublet</ent>`, \`<ent>` {
+ \`^<p><sn>4\.</sn>` {
+ s`(with a) (color between them)`\1 layer of \2`
+ }
+}
+
+
+## CIDE.E
+
+# Restrict qau element to the author themself.
+s`^(<qau>Mark Feeney)(<br/)`\1</qau>\2`
+\`^Copyright 1999 Globe Newspaper Company\.` {
+ s`</qau>``
+}
+
+
+## CIDE.F
+
+s`measurments`measurements`
+
+# Fix misattribution.
+s`(Dostoevsky's) (War and Peace)`\1 Crime and Punishment\; or Tolstoy's \2`
+
+s`compIy`comply`
+
+
+## CIDE.I
+
+# Remove qau tags.
+\`^<rj><qau>Dr\. Rod Beavon<br/`, +2 {
+ s`^(<rj>)<qau>(Dr\. Rod Beavon<br/)`\1\2</rj>`
+ s`^\((17 Dean's Yard London SW1P 3PB)\;(<br/)`<rj>\1</rj>\2`
+ s`(e-mail: rod\.beavon@westminster\.org\.uk)</qau>(</rj><br/)`<rj>\1\2`
+}
+
+
+## M
+
+# Remove extraneous ', in'.
+s`^(<qau>Andrew Hood), in`\1`
+
+
+## P
+
+# Restrict col element's content.
+\`^<p><cs><col><b>Park of artillery</b></col>` {
+ s`(<col><b>industrial park</b>) `\1</col>`
+ s`</col>(</cs><br/)$`\1`
+}
+
+
+## R
+
+# Typo.
+\`^<hw>Re\*cu"sant</hw>` {
+ s`\bchurc\b`church`
+}
+
+# Remove text centering around a referenced image.
+# Debian removed the preceding paragraph as well. I do not agree.
+\`<a href="\\cide\\more\\lilac-breasted-roller\.jpg">`, \`zambezi\.co\.uk` d
+
+
+## S
+
+# Fix badly formatted closing tag (<i>(.*)</> -> <i>\1</i>).
+s`<([^><]*)>([^<]*)</>`<\1>\2</\1>`
+
+# If one wanted to fix more than necessary (Debian does):
+#\`^<hw>Ses\*quip"li\*cate</hw>` {
+# s`^`<p>`
+# s`<(/?)i>`<\1xex>`g
+#
+# s`(<xex>)(a|b)(</xex>)<prime/`\1\2\\'\''b7\3`g
+# s`<prime/`\\'\''b7`
+#}
+
+s`\<(something)l\>`\1`
+
+s`rappng`rapping`
+
+
+## T
+
+# Restrict qau to the author themself.
+\`^<rj><qau>Andrew Forbes/CPA`, +1 {
+ s`^<rj><qau>Andrew Forbes/CPA`&</qau>`
+ s`^(\(from.*)</qau>(</rj><br/)$`\1\2`
+}
+
+
+## U
+
+# Restrict au element to the author themself.
+\`^<au>Kari Jensen \(University of Wisconsin`, +1 {
+ s`^<au>Kari Jensen`&</au>`
+ s`^(\[available at.*)</au>(<br/)$`\1\2`
+}
+
+
+## V
+
+# Avoid double empty line in dict's output.
+\`^<p><cs><col><b>Principle of virtual velocities</b>` {
+ s`-(- <col><b>Virtual image</b></col>)`\1`
+}
-s`,? <[^>]*></[^>]*> ?``g
-s`</>``g
-s`(<qau>Andrew Hood), in`\1`
-s`\(Thirteenth Greatest of Centuries, 1913.`&)`
-/<a href="\\cide\\more\\lilac-breasted-roller.jpg">/,/zambezi.co.uk/d
+## W
+s`\<(Where\*?)form\>`\1from`
diff --git a/post_webfilter.sed b/post_webfilter.sed
new file mode 100755
index 000000000000..7c3cdf8b7dd0
--- /dev/null
+++ b/post_webfilter.sed
@@ -0,0 +1,10 @@
+#!/usr/bin/env -S sed -Ef
+
+s`(<altname>|<contr>)<cref>([^<]*)</cref>`\1\2`g
+s`(<stype>|<prod>)<ecol>([^<]*)</ecol>`\1\2`g
+
+# Restrict qau tag to the author themself.
+s`<qau>([^<]*) (\([^)]{20}[^<]*)</qau>`<qau>\1</qau> \2`gp
+
+# Replace for example, '(<mcol><col>.+</col>), <col></col>(</mcol>)' -> '\1'
+s`,?\s+<([^>]*)></\1>\s*``g