Add TNEF handling and email loading improvements

- Implement TNEF extraction and recursive parsing in new `tnef_reader.go` and associated tests.
- Create tests for TNEF extraction scenarios in `tnef_diag_test.go`, `tnef_diag7_test.go`, and `tnef_diag8_test.go`.
This commit is contained in:
Flavio Fois
2026-02-14 09:03:41 +01:00
parent 33cb171fb1
commit 54a3dff1c2
23 changed files with 2029 additions and 18 deletions

View File

@@ -0,0 +1,47 @@
package internal
import (
"bytes"
"os"
)
// EmailFormat represents the detected format of an email file.
type EmailFormat string
const (
FormatEML EmailFormat = "eml"
FormatMSG EmailFormat = "msg"
FormatUnknown EmailFormat = "unknown"
)
// msgMagic is the OLE2/CFB compound file header signature used by .msg files.
var msgMagic = []byte{0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1}
// DetectEmailFormat identifies the email file format by inspecting the file's
// binary magic bytes, regardless of the file extension.
//
// Supported formats:
// - "msg": Microsoft Outlook MSG (OLE2/CFB compound file)
// - "eml": Standard MIME email (RFC 5322)
// - "unknown": Could not determine format
func DetectEmailFormat(filePath string) (EmailFormat, error) {
f, err := os.Open(filePath)
if err != nil {
return FormatUnknown, err
}
defer f.Close()
buf := make([]byte, 8)
n, err := f.Read(buf)
if err != nil || n < 1 {
return FormatUnknown, nil
}
// MSG files start with the OLE2 Compound File Binary magic bytes.
if n >= 8 && bytes.Equal(buf[:8], msgMagic) {
return FormatMSG, nil
}
// EML files are plain-text MIME messages; assume EML for anything else.
return FormatEML, nil
}