blob: b84c4abb5532fe520966c35f2297af2174542bb7 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
|
#!/bin/sh
for cmd in pdfgrep magick pdftocairo; do
command -v "$cmd" >/dev/null 2>&1 || missing="$missing $cmd"
done
[ -n "$missing" ] && { echo "Missing packages:$missing"; exit 1; }
show_result_numbers=true
scale_to=256
# Parse arguments
while [ "$#" -gt 0 ]; do
case "$1" in
--no-result-numbers)
show_result_numbers=false
shift
;;
--scale-to)
shift
scale_to="$1"
shift
;;
*)
break
;;
esac
done
[ -z "$1" ] && { echo "Usage: ./pdfgrepSIXEL [--no-result-numbers] [--scale-to <px>] 'String'"; exit 1; }
IFS=" " # delimiter
# results in syntax: pdfname.pdf:pagenumber:searchstring
resultslist=$(pdfgrep -n -H -o "$1" *.pdf)
resultscount=$(echo "$resultslist" | cut -d ':' -f 1 | uniq -c | awk '{print $1}' | tr '\n' ' ') # results per pdf
deduplicatedlist=$(echo "$resultslist" | awk '!a[$0]++') # deduplicate
pageslist=$(echo "$deduplicatedlist" | cut -d ':' -f 2) # page numbers
resultpages=$(echo "$deduplicatedlist" | cut -d ':' -f 1 | uniq -c | awk '{print $1}' | tr '\n' ' ') # result pages per pdf
# determine individual pdf files:
uniquepdfnames=$(echo "$deduplicatedlist" | cut -d ':' -f 1 | sort -u | tr '\n' ' ')
unset IFS
uniquepdfcount=$(echo "$uniquepdfnames" | wc -w)
echo "Found $(echo "$resultslist" | wc -w) results inside $uniquepdfcount pdf files"
echo "(Results per pdf: $resultscount)"
i=1 # number of file
field=1 # position in pagelist
while [ "$i" -le "$uniquepdfcount" ]; do # less or equals
# different pdf files:
resultsthispdf=$(echo $resultpages | awk -v N="$i" '{print $N}') # better than loop with ${resultpages:2:1}
currentpages=$(echo $pageslist | cut -d " " -f "$field"-$(($field + $resultsthispdf - 1)))
field=$(($field + $resultsthispdf))
pdfsites="$pdfsites$currentpages"
[ "$i" -ne "$uniquepdfcount" ] && pdfsites="$pdfsites; "
# different pdf files end
i=$((i + 1))
done
echo "$uniquepdfnames"
pagecount=$(echo "$pageslist" | wc -w)
echo "Pages: $pagecount ($pdfsites)"
echo "[Pages per pdf: $resultpages]"
i=1
c=1
current_pdf="$(echo $uniquepdfnames | cut -d " " -f"$c")"
magick -background red -fill black -pointsize 16 label:"$current_pdf" sixel:-
while [ "$i" -le "$pagecount" ]; do
current_page="$(echo "$pdfsites" | cut -d " " -f"$i" | tr -d ";")"
# Draw orange rectangle per page including corresponding text search result number unless --no-result-numbers is passed
[ "$show_result_numbers" = true ] && TEXT_ORANGE=$(echo "$resultslist" | grep "^$current_pdf:$current_page:" | wc -l) || TEXT_ORANGE=" "
POINTSIZE=16
TEXT_GREEN="$current_page"
TEXT_GREEN_DIMENSIONS=$(magick -pointsize $POINTSIZE label:"$TEXT_GREEN" -format "%wx%h" info:)
TEXT_ORANGE_DIMENSIONS=$(magick -pointsize $POINTSIZE label:"$TEXT_ORANGE" -format "%wx%h" info:)
TEXT_GREEN_WIDTH=$(echo $TEXT_GREEN_DIMENSIONS | cut -d'x' -f1)
TEXT_ORANGE_WIDTH=$(echo $TEXT_ORANGE_DIMENSIONS | cut -d'x' -f1)
TEXT_HEIGHT=$(echo $TEXT_GREEN_DIMENSIONS | cut -d'x' -f2)
pdftocairo -png -scale-to $scale_to -f "$current_page" -l "$current_page" "$current_pdf" -singlefile - \
| magick - -size "$scale_to"x$((POINTSIZE + 16)) \
\( xc:gray \
-fill green -draw "rectangle 0,0 $((TEXT_GREEN_WIDTH + 16)),$((TEXT_HEIGHT + 8))" \
-fill orange -draw "rectangle $((TEXT_GREEN_WIDTH + 16)),0 $((TEXT_GREEN_WIDTH + 16 + TEXT_ORANGE_WIDTH + 16)),$((TEXT_HEIGHT + 8))" \
-fill white -pointsize $POINTSIZE -draw "text 8,$TEXT_HEIGHT '$TEXT_GREEN'" \
-fill black -pointsize $POINTSIZE -draw "text $((TEXT_GREEN_WIDTH + 16 + 8)),$TEXT_HEIGHT '$TEXT_ORANGE'" \
\) \
-append sixel:- 2>/dev/null
[ "$(echo "$pdfsites" | cut -d " " -f"$i")" != "$current_page" ] && {
c=$((c + 1))
current_pdf="$(echo $uniquepdfnames | cut -d " " -f"$c")"
magick -background red -fill black -pointsize $POINTSIZE label:"$current_pdf" sixel:-
}
i=$((i + 1))
done
|