Nettoyage du HTML
This commit is contained in:
		@@ -22,6 +22,11 @@ def extract_and_convert_email(message, output_file_base, i):
 | 
				
			|||||||
            try:
 | 
					            try:
 | 
				
			||||||
                payload = part.get_payload(decode=True).decode(charset, errors='replace')
 | 
					                payload = part.get_payload(decode=True).decode(charset, errors='replace')
 | 
				
			||||||
                soup = BeautifulSoup(payload, 'html.parser')
 | 
					                soup = BeautifulSoup(payload, 'html.parser')
 | 
				
			||||||
 | 
					                # Nettoyage de l'HTML
 | 
				
			||||||
 | 
					                attr_whitelist = ("style")
 | 
				
			||||||
 | 
					                for tag in soup.findAll(True):
 | 
				
			||||||
 | 
					                    for attr in [attr for attr in tag.attrs if attr not in attr_whitelist]:
 | 
				
			||||||
 | 
					                        del tag[attr]
 | 
				
			||||||
                html_content = str(soup)
 | 
					                html_content = str(soup)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                # Extraction de la date et formatage ISO
 | 
					                # Extraction de la date et formatage ISO
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user