diff --git a/mbox2pdf.py b/mbox2pdf.py new file mode 100755 index 0000000..e8d7384 --- /dev/null +++ b/mbox2pdf.py @@ -0,0 +1,78 @@ +#!/bin/env python + +import mailbox +import codecs +from bs4 import BeautifulSoup +from weasyprint import HTML +from datetime import datetime +import sys +import os +import logging +from slugify import slugify + +logging.getLogger('fontTools').setLevel(logging.ERROR) +logging.getLogger('weasyprint').setLevel(logging.ERROR) +logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR) + +def extract_and_convert_email(message, output_file_base, i): + logging.debug(f"Extraction du mail {i}") + for part in message.walk(): + if part.get_content_maintype() == 'text' and part.get_content_subtype() == 'html': + charset = part.get_content_charset('windows-1252') + try: + payload = part.get_payload(decode=True).decode(charset, errors='replace') + soup = BeautifulSoup(payload, 'html.parser') + html_content = str(soup) + + # Extraction de la date et formatage ISO + date_header = message.get('Date') + from_header = message.get('From') + to_header = message.get('To') + if date_header: + try: + date_object = datetime.strptime(date_header, "%a, %d %b %Y %H:%M:%S %z") + date_iso = date_object.isoformat().replace(":", "-") # Remplace les ":" par "-" pour les noms de fichiers + date_simple = date_object.strftime("%d/%m/%Y, %H:%M:%S") + output_file = os.path.join(output_file_base, f"{date_iso}") # Chemin complet du fichier + except ValueError: + logging.warning(f"Format de date invalide: {date_header}") + output_file = os.path.join(output_file_base, f"email_{i}") # Nom par défaut si date invalide + date_simple = "date inconnue" + html_header = "