-
Notifications
You must be signed in to change notification settings - Fork 0
/
EmlParser.py
107 lines (94 loc) · 2.8 KB
/
EmlParser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import glob
import datetime
import json
import eml_parser
import email
import os
import base64
import filetype
EmlDir = ".\\Storage\\*.eml"
AttachDir = ".\\Attachment"
class AttachMent() :
def __init__(self) :
self.path = None
self.filename = None
self.size = None
self.sha256 = None
self.md5 = None
self.extension = None
self.raw = None
self.content_header = None
self.type = None
self.mime_type = None
def parse(self, info) :
self.filename = info['filename']
self.size = info['size']
self.sha256 = info['hash']['sha256']
self.md5 = info['hash']['sha256']
if 'extension' in info.keys() :
self.extension = info['extension']
self.raw = info['raw']
self.content_header = info['content_header']
def save(self, path) :
self.path = path + "\\" + self.sha256
if os.path.exists(self.path) :
return
with open(self.path, 'wb') as f_out:
f_out.write(base64.b64decode(self.raw))
kind = filetype.guess(self.path)
if not kind is None:
self.type = kind.extension
self.mime_type = kind.mime
class EmlInfo() :
def __init__(self) :
self.path = None
self.sha256 = None
self.md5 = None
self.subject = None
self.date = None
self.from_ = None
self.to = None
self.return_path = None
self.reply_to = None
self.message_id = None
self.attachmentList = []
def parse(self, path) :
self.path = path
with open(path, 'rb') as f_eml:
raw_eml = f_eml.read()
hash = eml_parser.eml_parser.get_file_hash(raw_eml)
self.sha256 = hash['sha256']
self.md5 = hash['md5']
eml = eml_parser.eml_parser.decode_email_b(raw_eml, include_attachment_data=True)
self.subject = eml['header']['subject']
self.date = eml['header']['date']
self.to = eml['header']['to']
self.from_ = eml['header']['from']
self.return_path = eml['header']['header']['return-path']
if 'message-id' in eml['header']['header'].keys() :
self.message_id = eml['header']['header']['message-id']
if 'reply-to' in eml['header']['header'].keys() :
self.reply_to = eml['header']['header']['reply-to']
if 'attachment' in eml.keys() :
for attachInfo in eml['attachment'] :
attachment = AttachMent()
attachment.parse(attachInfo)
self.attachmentList.append(attachment)
def saveAttachment(self, path) :
for attachment in self.attachmentList :
attachment.save(path)
class EmlParser() :
def __init__(self) :
self.path = None
self.emlList = []
def parseEml(self, emlDir, attachDir) :
emlPathList = glob.glob(emlDir)
for emlPath in emlPathList :
eml = EmlInfo()
eml.parse(emlPath)
eml.saveAttachment(attachDir)
self.emlList.append(eml)
break
if __name__ == '__main__':
emlParser = EmlParser()
emlParser.parseEml(EmlDir, AttachDir)