diff options
author | Connor McFarlane | 2021-04-17 13:57:57 +0100 |
---|---|---|
committer | Connor McFarlane | 2021-04-17 13:57:57 +0100 |
commit | c80509e815a3e6043827432bba563cf8b8ff4f99 (patch) | |
tree | 11a4d95b68433d013965dbdcf91a1fcd2b6a12a2 | |
download | aur-c80509e815a3e6043827432bba563cf8b8ff4f99.tar.gz |
Initial commit
-rw-r--r-- | .SRCINFO | 11 | ||||
-rw-r--r-- | PKGBUILD | 14 | ||||
-rw-r--r-- | tr-unicode.pl | 186 |
3 files changed, 211 insertions, 0 deletions
diff --git a/.SRCINFO b/.SRCINFO new file mode 100644 index 000000000000..4ff57a2adb31 --- /dev/null +++ b/.SRCINFO @@ -0,0 +1,11 @@ +pkgbase = tr-unicode + pkgdesc = Unix tr command reimplemented in perl with unicode support + pkgver = 1 + pkgrel = 1 + arch = any + depends = perl + source = tr-unicode.pl + md5sums = ebad335d1225fa3a203bc3d21e45aaea + +pkgname = tr-unicode + diff --git a/PKGBUILD b/PKGBUILD new file mode 100644 index 000000000000..4d50f5925e9b --- /dev/null +++ b/PKGBUILD @@ -0,0 +1,14 @@ +# Maintainer: Connor McFarlane <cm at semtex dot net> + +pkgname=tr-unicode +pkgver=1 +pkgrel=1 +pkgdesc='Unix tr command reimplemented in perl with unicode support' +arch=('any') +depends=('perl') +source=('tr-unicode.pl') +md5sums=('ebad335d1225fa3a203bc3d21e45aaea') + +package() { + install -Dm0755 tr-unicode.pl "$pkgdir/usr/local/bin/${pkgname}" +} diff --git a/tr-unicode.pl b/tr-unicode.pl new file mode 100644 index 000000000000..7c6836c82b3b --- /dev/null +++ b/tr-unicode.pl @@ -0,0 +1,186 @@ +#!/usr/bin/perl +# -*- coding: utf-8, vim: expandtab:ts=4 -*- + +# Source: http://www.perl.com/pub/2012/04/perlunicook-standard-preamble.html +use utf8; # so literals and identifiers can be in UTF-8 +use v5.14; # for locale sorting and unicode_strings +use strict; # quote strings, declare variables +use warnings; # on by default +use warnings qw(FATAL utf8); # fatalize encoding glitches +use open qw(:std :utf8); # undeclared streams in UTF-8 +use charnames qw(:full :short); # unneeded in v5.16 + +# Decode UTF-8 commandline args +use Encode qw(decode_utf8); +@ARGV = map { decode_utf8($_, 1) } @ARGV; + +my $show_help = 0; +my $opt = ''; +my $orig = ''; +my $repl = ''; + +if ((scalar(@ARGV) >= 2) and (scalar(@ARGV) <= 3)) { + my $switch = $ARGV[0]; # Command-line switch, or first pattern + + # -c, -C, --complement use the complement of SET1 + if ((($switch eq '-c') || ($switch eq '-C') || ($switch eq '--complement')) && (scalar(@ARGV) == 3)) { + $opt = 'c'; + $orig = $ARGV[1]; + $repl = $ARGV[2]; + } + # -d, --delete delete characters in SET1, do not translate + elsif ((($switch eq '-d') || ($switch eq '--delete')) && (scalar(@ARGV) == 2)) { + $opt = 'd'; + $orig = $ARGV[1]; + $repl = ''; + } + # -s, --squeeze-repeats replace each input sequence of a repeated character that is listed in SET1 with a single occurrence of that character + elsif (($switch eq '-s') || ($switch eq '--squeeze-repeats')) { # 2 or 3 argument + $opt = 's'; + $orig = $ARGV[1]; + $repl = scalar(@ARGV) == 3 ? $ARGV[2] : ''; # If there is a 3rd parameter, then get it else empty string. + } + # 2 pattern, default + elsif (scalar(@ARGV) == 2) { + $opt = ''; + $orig = $ARGV[0]; + $repl = $ARGV[1]; + } + else { + $show_help = 1; # True + } +} else { + $show_help = 1; # True +} + + +if ($show_help == 1) { +my $message = <<'END_HELPTEXT'; +Usage: tr [OPTION]... SET1 [SET2] +(OPTION must (if there are any) present in the first argument, only one at a + time (TODO: bundling)!) + +This is a tr-like utility in Perl with minimal Unicode support. +(Aid for tr's 'feature': echo "°" | tr "Ű" "ű" -> ±) + +Translate, squeeze, and/or delete characters from standard input, +writing to standard output. + + -c, -C, --complement use the complement of SET1 + -d, --delete delete characters in SET1, do not translate + -s, --squeeze-repeats replace each input sequence of a repeated character + that is listed in SET1 with a single occurrence + of that character + -t, --truncate-set1 (NOT IMPLEMENTED) first truncate SET1 to length of + SET2 + --help display this help and exit (This text) + --version output version information and exit (NOT IMPLEMENTED) + +SETs are specified as strings of characters. Most represent themselves. +Interpreted sequences are: + + \NNN character with octal value NNN (1 to 3 octal digits) + \\ backslash + \a audible BEL + \b backspace + \f form feed + \n new line + \r return + \t horizontal tab + \v vertical tab + CHAR1-CHAR2 all characters from CHAR1 to CHAR2 in ascending order + (as in ASCII, TODO: MAKE LOCALE DEPENDENT) + + NOT IMPLEMENTED, TODO: + [CHAR*] in SET2, copies of CHAR until length of SET1 + [CHAR*REPEAT] REPEAT copies of CHAR, REPEAT octal if starting with 0 + + Directly not supported: + [:alnum:] all letters and digits + [:alpha:] all letters + [:blank:] all horizontal whitespace + [:cntrl:] all control characters + [:digit:] all digits + [:graph:] all printable characters, not including space + [:lower:] all lower case letters + [:print:] all printable characters, including space + [:punct:] all punctuation characters + [:space:] all horizontal or vertical whitespace + [:upper:] all upper case letters + [:xdigit:] all hexadecimal digits + [=CHAR=] all characters which are equivalent to CHAR + +Bug reports and patch requests are welcome on the project site! + +Excerpt from man tr: + +Translation occurs if -d is not given and both SET1 and SET2 appear. +-t may be used only when translating. SET2 is extended to length of +SET1 by repeating its last character as necessary. Excess characters +of SET2 are ignored. Only [:lower:] and [:upper:] are guaranteed to +expand in ascending order; used in SET2 while translating, they may +only be used in pairs to specify case conversion. -s uses SET1 if not +translating nor deleting; else squeezing uses SET2 and occurs after +translation or deletion. + +GNU coreutils home page: <http://www.gnu.org/software/coreutils/> +General help using GNU software: <http://www.gnu.org/gethelp/> +For complete documentation, run: info coreutils 'tr invocation' +END_HELPTEXT + print(STDERR $message); + exit; +} + +# Custom quoting function +sub quote { + # The extended patterns should be here + # ... + # Escape everything + my $text = quotemeta; + # quotemeta is too greedy, remove some escapes + $text =~ s/\\(-|\\[abfnrtv]|\\[0-7]{3})/$1/gmpu; + return $text; +} + +# Make 'tr operator in a subroutine' as string +my $subtr = sprintf('sub { + my ( $text ) = @_; + $text =~ tr/%s/%s/%s; + return $text; +};', map (quote, $orig, $repl, $opt)); + +# Check +#print("$subtr\n"); + +# Make our tr function +my $tr = eval($subtr) or die $@; + +# Call tr... +#$out = &$tr($in); + +=for explain +Source: http://www.perlmonks.org/?node_id=446029 + +The eval line above was confusing to me at first (and I'm not even +a total newbie). It may help to see it delimited with more +parenthesis: + +eval (sprintf ("tr/%s/%s/", map (quotemeta, $orig, $repl))); + + +From right to left: + +The map uses quotemeta as its EXPR and $orig, $repl as its input list +quotemeta is operating on a local $_ + +The two variables sprintf is expecting are in the list output from map + +eval is evaluating the resulting string made by sprintf. The tr/// is +operating on $_ + +=cut + +# Process each line from STDIN and only STDIN! +foreach my $LINE ( <STDIN> ) { + print(&$tr($LINE)); +} |