1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
|
diff --git a/cajparser.py b/cajparser.py
index e931437..032a45c 100644
--- a/cajparser.py
+++ b/cajparser.py
@@ -159,9 +159,9 @@ class CAJParser(object):
pdf_length = pdf_end - pdf_start
caj.seek(pdf_start)
pdf_data = b"%PDF-1.3\r\n" + caj.read(pdf_length) + b"\r\n"
- with open("pdf.tmp", 'wb') as f:
+ with open("/tmp/caj2pdf.tmp", 'wb') as f:
f.write(pdf_data)
- pdf = open("pdf.tmp", "rb")
+ pdf = open("/tmp/caj2pdf.tmp", "rb")
# deal with disordered PDF data
endobj_addr = fnd_all(pdf, b"endobj")
@@ -229,9 +229,9 @@ class CAJParser(object):
catalog_obj_no, root_pages_obj_no), "utf-8")
pdf_data += catalog
pdf.close()
- with open("pdf.tmp", 'wb') as f:
+ with open("/tmp/caj2pdf.tmp", 'wb') as f:
f.write(pdf_data)
- pdf = open("pdf.tmp", "rb")
+ pdf = open("/tmp/caj2pdf.tmp", "rb")
# Add Pages obj and EOF mark
# if root pages object exist, pass
@@ -243,9 +243,9 @@ class CAJParser(object):
root_pages_obj_no, kids_str, self.page_num)
pdf_data += bytes(pages_str, "utf-8")
pdf.close()
- with open("pdf.tmp", 'wb') as f:
+ with open("/tmp/caj2pdf.tmp", 'wb') as f:
f.write(pdf_data)
- pdf = open("pdf.tmp", "rb")
+ pdf = open("/tmp/caj2pdf.tmp", "rb")
# deal with multiple missing pages objects
if multi_pages_obj_missed:
kids_dict = {i: [] for i in top_pages_obj_no}
@@ -284,25 +284,25 @@ class CAJParser(object):
pdf_data += bytes(pages_str, "utf-8")
pdf_data += bytes("\n%%EOF\r", "utf-8")
pdf.close()
- with open("pdf.tmp", 'wb') as f:
+ with open("/tmp/caj2pdf.tmp", 'wb') as f:
f.write(pdf_data)
# Use mutool to repair xref
try:
- check_output(["mutool", "clean", "pdf.tmp", "pdf_toc.pdf"], stderr=STDOUT)
+ check_output(["mutool", "clean", "/tmp/caj2pdf.tmp", "/tmp/caj2pdf_toc.pdf"], stderr=STDOUT)
except CalledProcessError as e:
print(e.output.decode("utf-8"))
raise SystemExit("Command mutool returned non-zero exit status " + str(e.returncode))
# Add Outlines
try:
- add_outlines(self.get_toc(), "pdf_toc.pdf", dest)
+ add_outlines(self.get_toc(), "/tmp/caj2pdf_toc.pdf", dest)
except errors.PdfReadError as e:
print("errors.PdfReadError:", str(e))
- copy("pdf_toc.pdf", dest)
+ copy("/tmp/caj2pdf_toc.pdf", dest)
pass
- os.remove("pdf.tmp")
- os.remove("pdf_toc.pdf")
+ os.remove("/tmp/caj2pdf.tmp")
+ os.remove("/tmp/caj2pdf_toc.pdf")
def _convert_hn(self, dest):
caj = open(self.filename, "rb")
@@ -419,13 +419,13 @@ class CAJParser(object):
# neither works, try brute-force
import imagesize
from PIL import Image as pilimage
- with open(".tmp.jpg", "wb") as f:
+ with open("/tmp/caj2pdf-tmp.jpg", "wb") as f:
f.write(image_data)
- (width, height) = imagesize.get(".tmp.jpg")
- pim = pilimage.open(".tmp.jpg")
+ (width, height) = imagesize.get("/tmp/caj2pdf-tmp.jpg")
+ pim = pilimage.open("/tmp/caj2pdf-tmp.jpg")
if (pim.mode == 'L'):
component = 1
- os.remove(".tmp.jpg")
+ os.remove("/tmp/caj2pdf-tmp.jpg")
if (image_type_enum == 1):
# non-inverted JPEG Images
height = -height
@@ -449,11 +449,11 @@ class CAJParser(object):
if (len(image_list) == 0):
raise SystemExit("File is pure-text HN; cannot convert to pdf")
pdf_data = convert_ImageList(image_list)
- with open('pdf_toc.pdf', 'wb') as f:
+ with open('/tmp/caj2pdf_toc.pdf', 'wb') as f:
f.write(pdf_data)
# Add Outlines
- add_outlines(self.get_toc(), "pdf_toc.pdf", dest)
- os.remove("pdf_toc.pdf")
+ add_outlines(self.get_toc(), "/tmp/caj2pdf_toc.pdf", dest)
+ os.remove("/tmp/caj2pdf_toc.pdf")
def _text_extract_hn(self):
if (self._TOC_NUMBER_OFFSET > 0):
|