v1.2.2
This commit is contained in:
261
backend/utils/mail/msg_parser.go
Normal file
261
backend/utils/mail/msg_parser.go
Normal file
@@ -0,0 +1,261 @@
|
||||
package internal
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/richardlehane/mscfb"
|
||||
"golang.org/x/text/encoding/unicode"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// MAPI Property Tags
|
||||
const (
|
||||
prSubject = "0037"
|
||||
prBody = "1000"
|
||||
prBodyHTML = "1013"
|
||||
prSenderName = "0C1A"
|
||||
prSenderEmail = "0C1F"
|
||||
prDisplayTo = "0E04" // Display list of To recipients
|
||||
prDisplayCc = "0E03"
|
||||
prDisplayBcc = "0E02"
|
||||
prMessageHeaders = "007D"
|
||||
prClientSubmitTime = "0039" // Date
|
||||
prAttachLongFilename = "3707"
|
||||
prAttachFilename = "3704"
|
||||
prAttachData = "3701"
|
||||
prAttachMimeTag = "370E"
|
||||
)
|
||||
|
||||
// MAPI Property Types
|
||||
const (
|
||||
ptUnicode = "001F"
|
||||
ptString8 = "001E"
|
||||
ptBinary = "0102"
|
||||
)
|
||||
|
||||
type msgParser struct {
|
||||
reader *mscfb.Reader
|
||||
props map[string][]byte
|
||||
}
|
||||
|
||||
func parseMsgFile(filePath string) (*EmailData, error) {
|
||||
f, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
doc, err := mscfb.New(f)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
email := &EmailData{
|
||||
To: []string{},
|
||||
Cc: []string{},
|
||||
Bcc: []string{},
|
||||
}
|
||||
|
||||
// We need to iterate through the entries to find properties and attachments
|
||||
// Since mscfb is a sequential reader, we might need to be careful.
|
||||
// However, usually properties are in streams.
|
||||
|
||||
// Strategy:
|
||||
// 1. Read all streams into a map keyed by their path/name for easier access?
|
||||
// MSG files can be large (attachments), so maybe not all.
|
||||
// 2. Identify properties from their stream names directly.
|
||||
|
||||
// Simplified approach: scan for stream names matching our patterns.
|
||||
|
||||
// Better approach:
|
||||
// The Root Entry has "properties".
|
||||
// We need to detect if we are in an attachment storage.
|
||||
|
||||
// Since mscfb iterates flat (Post-Order?), we can track context?
|
||||
// mscfb File struct provides Name and path.
|
||||
|
||||
attachmentsMap := make(map[string]*EmailAttachment)
|
||||
|
||||
for entry, err := doc.Next(); err == nil; entry, err = doc.Next() {
|
||||
name := entry.Name
|
||||
|
||||
// Check if it's a property stream
|
||||
if strings.HasPrefix(name, "__substg1.0_") {
|
||||
path := entry.Path // Path is array of directory names
|
||||
|
||||
// Root properties
|
||||
if len(path) == 0 { // In root
|
||||
val, err := io.ReadAll(doc)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
processRootProperty(name, val, email)
|
||||
} else if strings.HasPrefix(path[len(path)-1], "__attach_version1.0_") {
|
||||
// Attachment property
|
||||
attachStorageName := path[len(path)-1]
|
||||
if _, exists := attachmentsMap[attachStorageName]; !exists {
|
||||
attachmentsMap[attachStorageName] = &EmailAttachment{}
|
||||
}
|
||||
|
||||
val, err := io.ReadAll(doc)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
processAttachProperty(name, val, attachmentsMap[attachStorageName])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Finalize attachments
|
||||
for _, att := range attachmentsMap {
|
||||
if strings.Contains(strings.ToLower(att.ContentType), "multipart/signed") {
|
||||
dataStr := string(att.Data)
|
||||
// Check if it already looks like a plain text EML (contains typical headers)
|
||||
if strings.Contains(dataStr, "Content-Type:") || strings.Contains(dataStr, "MIME-Version:") || strings.Contains(dataStr, "From:") {
|
||||
if !strings.HasSuffix(strings.ToLower(att.Filename), ".eml") {
|
||||
att.Filename += ".eml"
|
||||
}
|
||||
} else {
|
||||
// Try to decode as Base64
|
||||
// Clean up the base64 string: remove newlines and spaces
|
||||
base64Str := strings.Map(func(r rune) rune {
|
||||
if r == '\r' || r == '\n' || r == ' ' || r == '\t' {
|
||||
return -1
|
||||
}
|
||||
return r
|
||||
}, dataStr)
|
||||
|
||||
// Try standard decoding
|
||||
decoded, err := base64.StdEncoding.DecodeString(base64Str)
|
||||
if err != nil {
|
||||
// Try raw decoding (no padding)
|
||||
decoded, err = base64.RawStdEncoding.DecodeString(base64Str)
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
att.Data = decoded
|
||||
if !strings.HasSuffix(strings.ToLower(att.Filename), ".eml") {
|
||||
att.Filename += ".eml"
|
||||
}
|
||||
} else {
|
||||
fmt.Println("Failed to decode multipart/signed attachment:", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if att.Filename == "" {
|
||||
att.Filename = "attachment"
|
||||
}
|
||||
// Only add if we have data
|
||||
if len(att.Data) > 0 {
|
||||
email.Attachments = append(email.Attachments, *att)
|
||||
}
|
||||
}
|
||||
|
||||
return email, nil
|
||||
}
|
||||
|
||||
func processRootProperty(name string, data []byte, email *EmailData) {
|
||||
tag := name[12:16]
|
||||
typ := name[16:20]
|
||||
|
||||
strVal := ""
|
||||
if typ == ptUnicode {
|
||||
strVal = decodeUTF16(data)
|
||||
} else if typ == ptString8 {
|
||||
strVal = string(data)
|
||||
}
|
||||
|
||||
switch tag {
|
||||
case prSubject:
|
||||
email.Subject = strVal
|
||||
case prBody:
|
||||
if email.Body == "" { // Prefer body if not set
|
||||
email.Body = strVal
|
||||
}
|
||||
case prBodyHTML:
|
||||
email.Body = strVal // Prefer HTML
|
||||
case prSenderName:
|
||||
if email.From == "" {
|
||||
email.From = strVal
|
||||
} else {
|
||||
email.From = fmt.Sprintf("%s <%s>", strVal, email.From)
|
||||
}
|
||||
case prSenderEmail:
|
||||
if email.From == "" {
|
||||
email.From = strVal
|
||||
} else if !strings.Contains(email.From, "<") {
|
||||
email.From = fmt.Sprintf("%s <%s>", email.From, strVal)
|
||||
}
|
||||
case prDisplayTo:
|
||||
// Split by ; or similar if needed, but display string is usually one line
|
||||
email.To = splitAndTrim(strVal)
|
||||
case prDisplayCc:
|
||||
email.Cc = splitAndTrim(strVal)
|
||||
case prDisplayBcc:
|
||||
email.Bcc = splitAndTrim(strVal)
|
||||
case prClientSubmitTime:
|
||||
// Date logic to be added if struct supports it
|
||||
}
|
||||
|
||||
/*
|
||||
if tag == prClientSubmitTime && typ == "0040" {
|
||||
if len(data) >= 8 {
|
||||
ft := binary.LittleEndian.Uint64(data)
|
||||
t := time.Date(1601, 1, 1, 0, 0, 0, 0, time.UTC).Add(time.Duration(ft) * 100 * time.Nanosecond)
|
||||
email.Date = t.Format(time.RFC1123Z)
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
func processAttachProperty(name string, data []byte, att *EmailAttachment) {
|
||||
tag := name[12:16]
|
||||
typ := name[16:20]
|
||||
|
||||
strVal := ""
|
||||
if typ == ptUnicode {
|
||||
strVal = decodeUTF16(data)
|
||||
} else if typ == ptString8 {
|
||||
strVal = string(data)
|
||||
}
|
||||
|
||||
switch tag {
|
||||
case prAttachLongFilename:
|
||||
att.Filename = strVal
|
||||
case prAttachFilename:
|
||||
if att.Filename == "" {
|
||||
att.Filename = strVal
|
||||
}
|
||||
case prAttachMimeTag:
|
||||
att.ContentType = strVal
|
||||
case prAttachData:
|
||||
att.Data = data
|
||||
}
|
||||
}
|
||||
|
||||
func decodeUTF16(b []byte) string {
|
||||
decoder := unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM).NewDecoder()
|
||||
decoded, _, _ := transform.Bytes(decoder, b)
|
||||
// Remove null terminators if present
|
||||
return strings.TrimRight(string(decoded), "\x00")
|
||||
}
|
||||
|
||||
func splitAndTrim(s string) []string {
|
||||
if s == "" {
|
||||
return nil
|
||||
}
|
||||
parts := strings.Split(s, ";")
|
||||
var res []string
|
||||
for _, p := range parts {
|
||||
t := strings.TrimSpace(p)
|
||||
if t != "" {
|
||||
res = append(res, t)
|
||||
}
|
||||
}
|
||||
return res
|
||||
}
|
||||
Reference in New Issue
Block a user