Premier jet
This commit is contained in:
		
							
								
								
									
										78
									
								
								mbox2pdf.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										78
									
								
								mbox2pdf.py
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,78 @@
 | 
				
			|||||||
 | 
					#!/bin/env python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import mailbox
 | 
				
			||||||
 | 
					import codecs
 | 
				
			||||||
 | 
					from bs4 import BeautifulSoup
 | 
				
			||||||
 | 
					from weasyprint import HTML
 | 
				
			||||||
 | 
					from datetime import datetime
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					import logging
 | 
				
			||||||
 | 
					from slugify import slugify
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					logging.getLogger('fontTools').setLevel(logging.ERROR)
 | 
				
			||||||
 | 
					logging.getLogger('weasyprint').setLevel(logging.ERROR)
 | 
				
			||||||
 | 
					logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def extract_and_convert_email(message, output_file_base, i):
 | 
				
			||||||
 | 
					    logging.debug(f"Extraction du mail {i}")
 | 
				
			||||||
 | 
					    for part in message.walk():
 | 
				
			||||||
 | 
					        if part.get_content_maintype() == 'text' and part.get_content_subtype() == 'html':
 | 
				
			||||||
 | 
					            charset = part.get_content_charset('windows-1252')
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                payload = part.get_payload(decode=True).decode(charset, errors='replace')
 | 
				
			||||||
 | 
					                soup = BeautifulSoup(payload, 'html.parser')
 | 
				
			||||||
 | 
					                html_content = str(soup)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                # Extraction de la date et formatage ISO
 | 
				
			||||||
 | 
					                date_header = message.get('Date')
 | 
				
			||||||
 | 
					                from_header = message.get('From')
 | 
				
			||||||
 | 
					                to_header = message.get('To')
 | 
				
			||||||
 | 
					                if date_header:
 | 
				
			||||||
 | 
					                    try:
 | 
				
			||||||
 | 
					                        date_object = datetime.strptime(date_header, "%a, %d %b %Y %H:%M:%S %z")
 | 
				
			||||||
 | 
					                        date_iso = date_object.isoformat().replace(":", "-")  # Remplace les ":" par "-" pour les noms de fichiers
 | 
				
			||||||
 | 
					                        date_simple = date_object.strftime("%d/%m/%Y, %H:%M:%S")
 | 
				
			||||||
 | 
					                        output_file = os.path.join(output_file_base, f"{date_iso}") # Chemin complet du fichier
 | 
				
			||||||
 | 
					                    except ValueError:
 | 
				
			||||||
 | 
					                        logging.warning(f"Format de date invalide: {date_header}")
 | 
				
			||||||
 | 
					                        output_file = os.path.join(output_file_base, f"email_{i}") # Nom par défaut si date invalide
 | 
				
			||||||
 | 
					                        date_simple = "<i>date inconnue</i>"
 | 
				
			||||||
 | 
					                html_header = "<p><ul>"
 | 
				
			||||||
 | 
					                html_header += f"<li>De : {from_header}</li>"
 | 
				
			||||||
 | 
					                html_header += f"<li>À : {to_header}</li>"
 | 
				
			||||||
 | 
					                html_header += f"<li>Date : {date_simple}</li>"
 | 
				
			||||||
 | 
					                html_header += "</ul><br /></p>"
 | 
				
			||||||
 | 
					                # Conversion en PDF
 | 
				
			||||||
 | 
					                try:
 | 
				
			||||||
 | 
					                    html_content = html_header + html_content
 | 
				
			||||||
 | 
					                    HTML(string=html_content).write_pdf(output_file + ".pdf")
 | 
				
			||||||
 | 
					                    logging.info(f"Email converti en PDF : {output_file}.pdf")
 | 
				
			||||||
 | 
					                except Exception as e:
 | 
				
			||||||
 | 
					                    logging.debug(f"Erreur lors de la conversion PDF : {e}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            except Exception as e:
 | 
				
			||||||
 | 
					                logging.error(f"Erreur de décodage: {e}")
 | 
				
			||||||
 | 
					                logging.debug(f"Contenu brut (non décodé): {part.get_payload(decode=False)}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def process_mbox(mbox_file):
 | 
				
			||||||
 | 
					    mbox = mailbox.mbox(mbox_file)
 | 
				
			||||||
 | 
					    logging.info(f"Traitement de {mbox_file}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Création du répertoire de sortie
 | 
				
			||||||
 | 
					    output_dir_name = slugify(os.path.splitext(os.path.basename(mbox_file))[0])
 | 
				
			||||||
 | 
					    output_dir = os.path.join(os.getcwd(), output_dir_name)
 | 
				
			||||||
 | 
					    os.makedirs(output_dir, exist_ok=True)
 | 
				
			||||||
 | 
					    logging.debug(f"Extraction dans le dossier {output_dir}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for i, message in enumerate(mbox):
 | 
				
			||||||
 | 
					        extract_and_convert_email(message, output_dir, i)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == "__main__":
 | 
				
			||||||
 | 
					    if len(sys.argv) > 1:
 | 
				
			||||||
 | 
					        mbox_file = sys.argv[1]
 | 
				
			||||||
 | 
					        process_mbox(mbox_file)
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        print("Veuillez fournir le fichier mbox en argument.")
 | 
				
			||||||
		Reference in New Issue
	
	Block a user