From 220b1028309e8e42a9b4fa0ffa9213a7904cdb9a Mon Sep 17 00:00:00 2001 From: Antoine Van Elstraete Date: Thu, 13 Feb 2025 17:01:17 +0100 Subject: [PATCH] Nettoyage du HTML --- mbox2pdf.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mbox2pdf.py b/mbox2pdf.py index e8d7384..cc1a964 100755 --- a/mbox2pdf.py +++ b/mbox2pdf.py @@ -22,6 +22,11 @@ def extract_and_convert_email(message, output_file_base, i): try: payload = part.get_payload(decode=True).decode(charset, errors='replace') soup = BeautifulSoup(payload, 'html.parser') + # Nettoyage de l'HTML + attr_whitelist = ("style") + for tag in soup.findAll(True): + for attr in [attr for attr in tag.attrs if attr not in attr_whitelist]: + del tag[attr] html_content = str(soup) # Extraction de la date et formatage ISO