diff options
author | Your Name | 2024-04-18 01:04:23 +0330 |
---|---|---|
committer | Your Name | 2024-04-18 01:04:23 +0330 |
commit | 225566d5b9a5fe8edd4a1cd4adfdc18dfbc595d9 (patch) | |
tree | b059592303e75d49e5a13d7ce41175eb48126760 | |
parent | 706d4ab4b58de8d4119051966771acbfca2d1c67 (diff) | |
download | aur-225566d5b9a5fe8edd4a1cd4adfdc18dfbc595d9.tar.gz |
web2pdf v2
-rw-r--r-- | web2pdf-1.0-1-any.pkg.tar.zst | bin | 0 -> 17891 bytes | |||
-rw-r--r-- | web2pdf.py | 64 |
2 files changed, 64 insertions, 0 deletions
diff --git a/web2pdf-1.0-1-any.pkg.tar.zst b/web2pdf-1.0-1-any.pkg.tar.zst Binary files differnew file mode 100644 index 000000000000..10f83fb21078 --- /dev/null +++ b/web2pdf-1.0-1-any.pkg.tar.zst diff --git a/web2pdf.py b/web2pdf.py new file mode 100644 index 000000000000..9b3759e4383a --- /dev/null +++ b/web2pdf.py @@ -0,0 +1,64 @@ +import argparse +import requests +from bs4 import BeautifulSoup +from reportlab.pdfgen import canvas +from reportlab.lib.pagesizes import letter + +def save_content_as_pdf(url, id, pdf_name): + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' + } + + response = requests.get(url, headers) + if response.status_code == 200: + soup = BeautifulSoup(response.content, 'html.parser') + novel_content = soup.find(id=id) + if novel_content: + pdf_filename = f"{pdf_name}.pdf" + c = canvas.Canvas(pdf_filename, pagesize=letter) + c.setFont("Helvetica", 12) + + # Define padding values + top_padding = 30 + left_padding = 30 + right_padding = 30 + bottom_padding = 30 + + content_text = novel_content.get_text(separator='\n') + + lines = content_text.split('\n') + + y_position = 800 - top_padding + for line in lines: + if y_position < bottom_padding: + c.showPage() + y_position = 800 - top_padding + + words = line.split() + wrapped_line = '' + for word in words: + if c.stringWidth(wrapped_line + ' ' + word) < 500 - left_padding - right_padding: + wrapped_line += ' ' + word + else: + c.drawString(left_padding, y_position, wrapped_line.strip()) + y_position -= 20 + wrapped_line = word + if wrapped_line: + c.drawString(left_padding, y_position, wrapped_line.strip()) + y_position -= 20 + + c.save() + print(f"PDF file saved: {pdf_filename}") + else: + print(f"No content with id '{id}' found on {url}") + else: + print(f"Failed to retrieve content from {url}. Status code: {response.status_code}") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Save webpage content as PDF') + parser.add_argument('url', type=str, help='URL of the webpage to scrape') + parser.add_argument('id', type=str, help='ID of the content to extract') + parser.add_argument('pdf_name', type=str, help='Name of the PDF file to save') + args = parser.parse_args() + + save_content_as_pdf(args.url, args.id, args.pdf_name) |