summarylogtreecommitdiffstats
path: root/pdfTranslate.py
blob: 8ff8c37919f4191a36e3ce04b24aaf0985bbd954 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import sys
from PyPDF2 import PdfReader
import os
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter

def extract_text_from_pdf(pdf_path):
    text = ""
    with open(pdf_path, 'rb') as file:
        pdf_reader = PdfReader(file)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text

def create_pdf_with_text(output_pdf_path, text):
    c = canvas.Canvas(output_pdf_path, pagesize=letter)
    y_position = 750
    line_height = 12
    max_lines_per_page = 50
    padding = 20
    lines = text.split('\n')
    current_line = 0

    for line in lines:
        if y_position < 50:
            c.showPage()
            c.setFont("Helvetica", 12)
            y_position = 750
            current_line = 0

        if current_line < max_lines_per_page:
            c.drawString(50, y_position, line[:90])  # Adjust the number of characters displayed per line
            y_position -= line_height + padding
            current_line += 1

    c.save()

if __name__ == "__main__":
    if len(sys.argv) < 4:
        print("Usage: pdfTranslate <input_pdf_file> <output_pdf_file> <target_language>")
        print("Example: python pdf_text_extractor.py input.pdf output.pdf en")
        print("Target Language Codes example:")
        print("  en - English | fr - French | es - Spanish")
        sys.exit(1)

    input_pdf_path = sys.argv[1]
    output_pdf_path = sys.argv[2]
    target_language = sys.argv[3]

    text = extract_text_from_pdf(input_pdf_path)
    
    # Save translated text to a temporary text file
    temp_text_file = "temp_text_file.txt"
    with open(temp_text_file, 'w') as file:
        file.write(text)
    
    # Create PDF with text from the temporary text file
    command = f"trans -b :{target_language} -i {temp_text_file} -o {temp_text_file}_translated"
    os.system(command)

    # Read the translated text from the temporary text file
    with open(temp_text_file + "_translated", 'r') as translated_file:
        translated_text = translated_file.read()

    # Create PDF with translated text
    create_pdf_with_text(output_pdf_path, translated_text)

    # Delete temporary files
    os.remove(temp_text_file)
    os.remove(temp_text_file + "_translated")

    print("------------------")
    print("Your PDF is ready")