Nettoyage du HTML
This commit is contained in:
parent
584a5e82b1
commit
220b102830
@ -22,6 +22,11 @@ def extract_and_convert_email(message, output_file_base, i):
|
||||
try:
|
||||
payload = part.get_payload(decode=True).decode(charset, errors='replace')
|
||||
soup = BeautifulSoup(payload, 'html.parser')
|
||||
# Nettoyage de l'HTML
|
||||
attr_whitelist = ("style")
|
||||
for tag in soup.findAll(True):
|
||||
for attr in [attr for attr in tag.attrs if attr not in attr_whitelist]:
|
||||
del tag[attr]
|
||||
html_content = str(soup)
|
||||
|
||||
# Extraction de la date et formatage ISO
|
||||
|
Loading…
x
Reference in New Issue
Block a user