Nettoyage du HTML

2025-02-13 17:01:17 +01:00
parent 584a5e82b1
commit 220b102830
1 changed files with 5 additions and 0 deletions
--- a/mbox2pdf.py
+++ b/mbox2pdf.py
@@ -22,6 +22,11 @@ def extract_and_convert_email(message, output_file_base, i):
            try:
                payload = part.get_payload(decode=True).decode(charset, errors='replace')
                soup = BeautifulSoup(payload, 'html.parser')
                # Nettoyage de l'HTML
                attr_whitelist = ("style")
                for tag in soup.findAll(True):
                    for attr in [attr for attr in tag.attrs if attr not in attr_whitelist]:
                        del tag[attr]
                html_content = str(soup)
                # Extraction de la date et formatage ISO