mirror of
https://github.com/Ascyii/scripts.git
synced 2026-01-01 04:44:24 -05:00
82 lines
2.5 KiB
Python
82 lines
2.5 KiB
Python
import os
|
|
import email
|
|
from email import policy
|
|
from email.parser import BytesParser
|
|
import re
|
|
from datetime import datetime
|
|
|
|
# Path to the email file
|
|
input_file = "/home/jonas/mail/saved-messages"
|
|
|
|
# Output folder
|
|
output_dir = "/home/jonas/mail/plain_emails"
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
def sanitize_filename(name):
|
|
"""Remove problematic characters for filenames"""
|
|
return re.sub(r'[\\/*?:"<>|]', "", name)
|
|
|
|
def parse_email_datetime(date_str):
|
|
"""Try to parse the Date header into YYYY-MM-DD_HH-MM format"""
|
|
try:
|
|
parsed_date = email.utils.parsedate_to_datetime(date_str)
|
|
return parsed_date.strftime('%Y-%m-%d_%H-%M')
|
|
except Exception:
|
|
return "unknown-date"
|
|
|
|
# Read the whole file
|
|
with open(input_file, "rb") as f:
|
|
raw_data = f.read()
|
|
|
|
# Split raw emails (assuming mbox format with "From " separator)
|
|
emails = raw_data.split(b'\nFrom ')
|
|
|
|
# If first email doesn't start with "From ", fix it
|
|
if emails and not emails[0].startswith(b'From '):
|
|
emails[0] = b'From ' + emails[0]
|
|
|
|
# Save the first email to keep
|
|
first_email = emails[0]
|
|
|
|
# Process the rest, ignoring the first
|
|
for raw_email in emails[1:]:
|
|
if not raw_email.strip():
|
|
continue
|
|
|
|
raw_email = b'From ' + raw_email # Add back separator if missing
|
|
msg = BytesParser(policy=policy.default).parsebytes(raw_email)
|
|
|
|
subject = msg['subject'] or "No Subject"
|
|
sender = msg['from'] or "Unknown Sender"
|
|
receiver = msg['to'] or "Unknown Receiver"
|
|
date = msg['date'] or "Unknown Date"
|
|
|
|
subject_clean = sanitize_filename(subject.strip())
|
|
date_clean = parse_email_datetime(date)
|
|
|
|
# Get the plain text part
|
|
if msg.is_multipart():
|
|
for part in msg.walk():
|
|
if part.get_content_type() == "text/plain":
|
|
body = part.get_payload(decode=True).decode(part.get_content_charset() or 'utf-8', errors='replace')
|
|
break
|
|
else:
|
|
body = "(No plain text part found)"
|
|
else:
|
|
body = msg.get_payload(decode=True).decode(msg.get_content_charset() or 'utf-8', errors='replace')
|
|
|
|
# Create the filename: Date_Time_Subject.txt
|
|
filename = f"{date_clean}_{subject_clean}.txt"
|
|
output_path = os.path.join(output_dir, filename)
|
|
|
|
# Write to file
|
|
with open(output_path, "w", encoding="utf-8") as out_f:
|
|
out_f.write(f"Date: {date}\n")
|
|
out_f.write(f"From: {sender}\n")
|
|
out_f.write(f"To: {receiver}\n\n")
|
|
out_f.write(body)
|
|
|
|
# After processing, overwrite the mailbox with only the first email
|
|
with open(input_file, "wb") as f:
|
|
f.write(first_email)
|