#!/bin/env python import mailbox import codecs from bs4 import BeautifulSoup from weasyprint import HTML from datetime import datetime import sys import os import logging from slugify import slugify logging.getLogger('fontTools').setLevel(logging.ERROR) logging.getLogger('weasyprint').setLevel(logging.ERROR) logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR) def extract_and_convert_email(message, output_file_base, i): logging.debug(f"Extraction du mail {i}") for part in message.walk(): if part.get_content_maintype() == 'text' and part.get_content_subtype() == 'html': charset = part.get_content_charset('windows-1252') try: payload = part.get_payload(decode=True).decode(charset, errors='replace') soup = BeautifulSoup(payload, 'html.parser') # Nettoyage de l'HTML attr_whitelist = ("style") for tag in soup.findAll(True): for attr in [attr for attr in tag.attrs if attr not in attr_whitelist]: del tag[attr] html_content = str(soup) # Extraction de la date et formatage ISO date_header = message.get('Date') from_header = message.get('From') to_header = message.get('To') if date_header: try: date_object = datetime.strptime(date_header, "%a, %d %b %Y %H:%M:%S %z") date_iso = date_object.isoformat().replace(":", "-") # Remplace les ":" par "-" pour les noms de fichiers date_simple = date_object.strftime("%d/%m/%Y, %H:%M:%S") output_file = os.path.join(output_file_base, f"{date_iso}") # Chemin complet du fichier except ValueError: logging.warning(f"Format de date invalide: {date_header}") output_file = os.path.join(output_file_base, f"email_{i}") # Nom par défaut si date invalide date_simple = "date inconnue" html_header = "