web2pdf v2

author: Your Name 2024-04-18 01:04:23 +0330
committer: Your Name 2024-04-18 01:04:23 +0330
commit: 225566d5b9a5fe8edd4a1cd4adfdc18dfbc595d9 (patch)
tree: b059592303e75d49e5a13d7ce41175eb48126760
parent: 706d4ab4b58de8d4119051966771acbfca2d1c67 (diff)
download: aur-225566d5b9a5fe8edd4a1cd4adfdc18dfbc595d9.tar.gz
2 files changed, 64 insertions, 0 deletions
diff --git a/web2pdf-1.0-1-any.pkg.tar.zst b/web2pdf-1.0-1-any.pkg.tar.zst
new file mode 100644
index 000000000000..10f83fb21078
--- /dev/null
+++ b/web2pdf-1.0-1-any.pkg.tar.zst
diff --git a/web2pdf.py b/web2pdf.py
new file mode 100644
index 000000000000..9b3759e4383a
--- /dev/null
+++ b/web2pdf.py
@@ -0,0 +1,64 @@
+import argparse
+import requests
+from bs4 import BeautifulSoup
+from reportlab.pdfgen import canvas
+from reportlab.lib.pagesizes import letter
+
+def save_content_as_pdf(url, id, pdf_name):
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
+    }
+
+    response = requests.get(url, headers)
+    if response.status_code == 200:
+        soup = BeautifulSoup(response.content, 'html.parser')
+        novel_content = soup.find(id=id)
+        if novel_content:
+            pdf_filename = f"{pdf_name}.pdf"
+            c = canvas.Canvas(pdf_filename, pagesize=letter)
+            c.setFont("Helvetica", 12)
+            
+            # Define padding values
+            top_padding = 30
+            left_padding = 30
+            right_padding = 30
+            bottom_padding = 30
+            
+            content_text = novel_content.get_text(separator='\n')
+            
+            lines = content_text.split('\n')
+            
+            y_position = 800 - top_padding
+            for line in lines:
+                if y_position < bottom_padding:
+                    c.showPage()
+                    y_position = 800 - top_padding
+                
+                words = line.split()
+                wrapped_line = ''
+                for word in words:
+                    if c.stringWidth(wrapped_line + ' ' + word) < 500 - left_padding - right_padding:
+                        wrapped_line += ' ' + word
+                    else:
+                        c.drawString(left_padding, y_position, wrapped_line.strip())
+                        y_position -= 20
+                        wrapped_line = word
+                if wrapped_line:
+                    c.drawString(left_padding, y_position, wrapped_line.strip())
+                    y_position -= 20
+                
+            c.save()
+            print(f"PDF file saved: {pdf_filename}")
+        else:
+            print(f"No content with id '{id}' found on {url}")
+    else:
+        print(f"Failed to retrieve content from {url}. Status code: {response.status_code}")
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Save webpage content as PDF')
+    parser.add_argument('url', type=str, help='URL of the webpage to scrape')
+    parser.add_argument('id', type=str, help='ID of the content to extract')
+    parser.add_argument('pdf_name', type=str, help='Name of the PDF file to save')
+    args = parser.parse_args()
+
+    save_content_as_pdf(args.url, args.id, args.pdf_name)
author	Your Name	2024-04-18 01:04:23 +0330
committer	Your Name	2024-04-18 01:04:23 +0330
commit	225566d5b9a5fe8edd4a1cd4adfdc18dfbc595d9 (patch)
tree	b059592303e75d49e5a13d7ce41175eb48126760
parent	706d4ab4b58de8d4119051966771acbfca2d1c67 (diff)
download	aur-225566d5b9a5fe8edd4a1cd4adfdc18dfbc595d9.tar.gz