Add TNEF handling and email loading improvements
- Implement TNEF extraction and recursive parsing in new `tnef_reader.go` and associated tests. - Create tests for TNEF extraction scenarios in `tnef_diag_test.go`, `tnef_diag7_test.go`, and `tnef_diag8_test.go`.
This commit is contained in:
@@ -146,6 +146,9 @@ func ReadEmlFile(filePath string) (*EmailData, error) {
|
||||
})
|
||||
}
|
||||
|
||||
// Expand any TNEF (winmail.dat) attachments into their contained files.
|
||||
attachments = expandTNEFAttachments(attachments)
|
||||
|
||||
isPec := hasDatiCert && hasSmime
|
||||
|
||||
// Format From
|
||||
@@ -267,6 +270,9 @@ func ReadPecInnerEml(filePath string) (*EmailData, error) {
|
||||
})
|
||||
}
|
||||
|
||||
// Expand any TNEF (winmail.dat) attachments into their contained files.
|
||||
attachments = expandTNEFAttachments(attachments)
|
||||
|
||||
isPec := hasDatiCert && hasSmime
|
||||
|
||||
// Format From
|
||||
|
||||
47
backend/utils/mail/format_detector.go
Normal file
47
backend/utils/mail/format_detector.go
Normal file
@@ -0,0 +1,47 @@
|
||||
package internal
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
)
|
||||
|
||||
// EmailFormat represents the detected format of an email file.
|
||||
type EmailFormat string
|
||||
|
||||
const (
|
||||
FormatEML EmailFormat = "eml"
|
||||
FormatMSG EmailFormat = "msg"
|
||||
FormatUnknown EmailFormat = "unknown"
|
||||
)
|
||||
|
||||
// msgMagic is the OLE2/CFB compound file header signature used by .msg files.
|
||||
var msgMagic = []byte{0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1}
|
||||
|
||||
// DetectEmailFormat identifies the email file format by inspecting the file's
|
||||
// binary magic bytes, regardless of the file extension.
|
||||
//
|
||||
// Supported formats:
|
||||
// - "msg": Microsoft Outlook MSG (OLE2/CFB compound file)
|
||||
// - "eml": Standard MIME email (RFC 5322)
|
||||
// - "unknown": Could not determine format
|
||||
func DetectEmailFormat(filePath string) (EmailFormat, error) {
|
||||
f, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
return FormatUnknown, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
buf := make([]byte, 8)
|
||||
n, err := f.Read(buf)
|
||||
if err != nil || n < 1 {
|
||||
return FormatUnknown, nil
|
||||
}
|
||||
|
||||
// MSG files start with the OLE2 Compound File Binary magic bytes.
|
||||
if n >= 8 && bytes.Equal(buf[:8], msgMagic) {
|
||||
return FormatMSG, nil
|
||||
}
|
||||
|
||||
// EML files are plain-text MIME messages; assume EML for anything else.
|
||||
return FormatEML, nil
|
||||
}
|
||||
58
backend/utils/mail/tnef_diag2_test.go
Normal file
58
backend/utils/mail/tnef_diag2_test.go
Normal file
@@ -0,0 +1,58 @@
|
||||
package internal
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/teamwork/tnef"
|
||||
)
|
||||
|
||||
func TestTNEFAttributes(t *testing.T) {
|
||||
testFile := `H:\Dev\Gits\EMLy\EML_TNEF.eml`
|
||||
if _, err := os.Stat(testFile); os.IsNotExist(err) {
|
||||
t.Skip("test EML file not present")
|
||||
}
|
||||
|
||||
f, _ := os.Open(testFile)
|
||||
defer f.Close()
|
||||
|
||||
outerEmail, _ := Parse(f)
|
||||
var innerData []byte
|
||||
for _, att := range outerEmail.Attachments {
|
||||
if strings.Contains(strings.ToLower(att.Filename), "postacert.eml") {
|
||||
innerData, _ = io.ReadAll(att.Data)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
innerEmail, _ := Parse(bytes.NewReader(innerData))
|
||||
for _, att := range innerEmail.Attachments {
|
||||
data, _ := io.ReadAll(att.Data)
|
||||
if strings.ToLower(att.Filename) != "winmail.dat" {
|
||||
continue
|
||||
}
|
||||
|
||||
decoded, _ := tnef.Decode(data)
|
||||
fmt.Printf("MAPI Attributes (%d):\n", len(decoded.Attributes))
|
||||
for _, attr := range decoded.Attributes {
|
||||
dataPreview := fmt.Sprintf("%d bytes", len(attr.Data))
|
||||
if len(attr.Data) < 200 {
|
||||
dataPreview = fmt.Sprintf("%q", attr.Data)
|
||||
}
|
||||
fmt.Printf(" Name=0x%04X Data=%s\n", attr.Name, dataPreview)
|
||||
}
|
||||
|
||||
// Check Body/BodyHTML from TNEF data struct fields
|
||||
fmt.Printf("\nBody len: %d\n", len(decoded.Body))
|
||||
fmt.Printf("BodyHTML len: %d\n", len(decoded.BodyHTML))
|
||||
|
||||
// Check attachment details
|
||||
for i, ta := range decoded.Attachments {
|
||||
fmt.Printf("Attachment[%d]: title=%q dataLen=%d\n", i, ta.Title, len(ta.Data))
|
||||
}
|
||||
}
|
||||
}
|
||||
67
backend/utils/mail/tnef_diag3_test.go
Normal file
67
backend/utils/mail/tnef_diag3_test.go
Normal file
@@ -0,0 +1,67 @@
|
||||
package internal
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/teamwork/tnef"
|
||||
)
|
||||
|
||||
func TestTNEFAllSizes(t *testing.T) {
|
||||
testFile := `H:\Dev\Gits\EMLy\EML_TNEF.eml`
|
||||
if _, err := os.Stat(testFile); os.IsNotExist(err) {
|
||||
t.Skip("test EML file not present")
|
||||
}
|
||||
|
||||
f, _ := os.Open(testFile)
|
||||
defer f.Close()
|
||||
|
||||
outerEmail, _ := Parse(f)
|
||||
var innerData []byte
|
||||
for _, att := range outerEmail.Attachments {
|
||||
if strings.Contains(strings.ToLower(att.Filename), "postacert.eml") {
|
||||
innerData, _ = io.ReadAll(att.Data)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
innerEmail, _ := Parse(bytes.NewReader(innerData))
|
||||
for _, att := range innerEmail.Attachments {
|
||||
data, _ := io.ReadAll(att.Data)
|
||||
if strings.ToLower(att.Filename) != "winmail.dat" {
|
||||
continue
|
||||
}
|
||||
|
||||
decoded, _ := tnef.Decode(data)
|
||||
|
||||
totalAttrSize := 0
|
||||
for _, attr := range decoded.Attributes {
|
||||
totalAttrSize += len(attr.Data)
|
||||
fmt.Printf(" Attr 0x%04X: %d bytes\n", attr.Name, len(attr.Data))
|
||||
}
|
||||
|
||||
totalAttSize := 0
|
||||
for _, ta := range decoded.Attachments {
|
||||
totalAttSize += len(ta.Data)
|
||||
}
|
||||
|
||||
fmt.Printf("\nTotal TNEF data: %d bytes\n", len(data))
|
||||
fmt.Printf("Total attribute data: %d bytes\n", totalAttrSize)
|
||||
fmt.Printf("Total attachment data: %d bytes\n", totalAttSize)
|
||||
fmt.Printf("Accounted: %d bytes\n", totalAttrSize+totalAttSize)
|
||||
fmt.Printf("Missing: %d bytes\n", len(data)-totalAttrSize-totalAttSize)
|
||||
|
||||
// Try raw decode to check for nested message/attachment objects
|
||||
fmt.Printf("\nBody: %d, BodyHTML: %d\n", len(decoded.Body), len(decoded.BodyHTML))
|
||||
|
||||
// Check attachment[0] content
|
||||
if len(decoded.Attachments) > 0 {
|
||||
a0 := decoded.Attachments[0]
|
||||
fmt.Printf("\nAttachment[0] Title=%q Data (hex): %x\n", a0.Title, a0.Data)
|
||||
}
|
||||
}
|
||||
}
|
||||
78
backend/utils/mail/tnef_diag4_test.go
Normal file
78
backend/utils/mail/tnef_diag4_test.go
Normal file
@@ -0,0 +1,78 @@
|
||||
package internal
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestTNEFRawScan(t *testing.T) {
|
||||
testFile := `H:\Dev\Gits\EMLy\EML_TNEF.eml`
|
||||
if _, err := os.Stat(testFile); os.IsNotExist(err) {
|
||||
t.Skip("test EML file not present")
|
||||
}
|
||||
|
||||
f, _ := os.Open(testFile)
|
||||
defer f.Close()
|
||||
|
||||
outerEmail, _ := Parse(f)
|
||||
var innerData []byte
|
||||
for _, att := range outerEmail.Attachments {
|
||||
if strings.Contains(strings.ToLower(att.Filename), "postacert.eml") {
|
||||
innerData, _ = io.ReadAll(att.Data)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
innerEmail, _ := Parse(bytes.NewReader(innerData))
|
||||
for _, att := range innerEmail.Attachments {
|
||||
data, _ := io.ReadAll(att.Data)
|
||||
if strings.ToLower(att.Filename) != "winmail.dat" {
|
||||
continue
|
||||
}
|
||||
|
||||
fmt.Printf("TNEF raw size: %d bytes\n", len(data))
|
||||
|
||||
// Verify signature
|
||||
if len(data) < 6 {
|
||||
t.Fatal("too short")
|
||||
}
|
||||
sig := binary.LittleEndian.Uint32(data[0:4])
|
||||
key := binary.LittleEndian.Uint16(data[4:6])
|
||||
fmt.Printf("Signature: 0x%08X Key: 0x%04X\n", sig, key)
|
||||
|
||||
offset := 6
|
||||
attrNum := 0
|
||||
for offset < len(data) {
|
||||
if offset+9 > len(data) {
|
||||
fmt.Printf(" Truncated at offset %d\n", offset)
|
||||
break
|
||||
}
|
||||
|
||||
level := data[offset]
|
||||
attrID := binary.LittleEndian.Uint32(data[offset+1 : offset+5])
|
||||
attrLen := binary.LittleEndian.Uint32(data[offset+5 : offset+9])
|
||||
|
||||
levelStr := "MSG"
|
||||
if level == 0x02 {
|
||||
levelStr = "ATT"
|
||||
}
|
||||
|
||||
fmt.Printf(" [%03d] offset=%-8d level=%s id=0x%08X len=%d\n",
|
||||
attrNum, offset, levelStr, attrID, attrLen)
|
||||
|
||||
// Move past: level(1) + id(4) + len(4) + data(attrLen) + checksum(2)
|
||||
offset += 1 + 4 + 4 + int(attrLen) + 2
|
||||
|
||||
attrNum++
|
||||
if attrNum > 200 {
|
||||
fmt.Println(" ... stopping at 200 attributes")
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
241
backend/utils/mail/tnef_diag5_test.go
Normal file
241
backend/utils/mail/tnef_diag5_test.go
Normal file
@@ -0,0 +1,241 @@
|
||||
package internal
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestTNEFMapiProps(t *testing.T) {
|
||||
testFile := `H:\Dev\Gits\EMLy\EML_TNEF.eml`
|
||||
if _, err := os.Stat(testFile); os.IsNotExist(err) {
|
||||
t.Skip("test EML file not present")
|
||||
}
|
||||
|
||||
f, _ := os.Open(testFile)
|
||||
defer f.Close()
|
||||
|
||||
outerEmail, _ := Parse(f)
|
||||
var innerData []byte
|
||||
for _, att := range outerEmail.Attachments {
|
||||
if strings.Contains(strings.ToLower(att.Filename), "postacert.eml") {
|
||||
innerData, _ = io.ReadAll(att.Data)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
innerEmail, _ := Parse(bytes.NewReader(innerData))
|
||||
for _, att := range innerEmail.Attachments {
|
||||
rawData, _ := io.ReadAll(att.Data)
|
||||
if strings.ToLower(att.Filename) != "winmail.dat" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Navigate to the first attachment's attAttachment (0x9005) block
|
||||
// From the raw scan: [011] offset=12082 + header(9bytes) = 12091 for data
|
||||
// Actually let's re-scan to find it properly
|
||||
offset := 6
|
||||
for offset < len(rawData) {
|
||||
if offset+9 > len(rawData) {
|
||||
break
|
||||
}
|
||||
level := rawData[offset]
|
||||
attrID := binary.LittleEndian.Uint32(rawData[offset+1 : offset+5])
|
||||
attrLen := int(binary.LittleEndian.Uint32(rawData[offset+5 : offset+9]))
|
||||
dataStart := offset + 9
|
||||
|
||||
// attAttachment = 0x00069005, we want the FIRST one (for attachment group 1)
|
||||
if level == 0x02 && attrID == 0x00069005 && attrLen > 1000 {
|
||||
fmt.Printf("Found attAttachment at offset %d, len=%d\n", offset, attrLen)
|
||||
parseMapiProps(rawData[dataStart:dataStart+attrLen], t)
|
||||
break
|
||||
}
|
||||
|
||||
offset += 9 + attrLen + 2
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func parseMapiProps(data []byte, t *testing.T) {
|
||||
if len(data) < 4 {
|
||||
t.Fatal("too short for MAPI props")
|
||||
}
|
||||
|
||||
count := binary.LittleEndian.Uint32(data[0:4])
|
||||
fmt.Printf("MAPI property count: %d\n", count)
|
||||
|
||||
offset := 4
|
||||
for i := 0; i < int(count) && offset+4 <= len(data); i++ {
|
||||
propTag := binary.LittleEndian.Uint32(data[offset : offset+4])
|
||||
propType := propTag & 0xFFFF
|
||||
propID := (propTag >> 16) & 0xFFFF
|
||||
offset += 4
|
||||
|
||||
// Handle named properties (ID >= 0x8000)
|
||||
if propID >= 0x8000 {
|
||||
// Skip GUID (16 bytes) + kind (4 bytes)
|
||||
if offset+20 > len(data) {
|
||||
break
|
||||
}
|
||||
kind := binary.LittleEndian.Uint32(data[offset+16 : offset+20])
|
||||
offset += 20
|
||||
if kind == 0 { // MNID_ID
|
||||
offset += 4 // skip NamedID
|
||||
} else { // MNID_STRING
|
||||
if offset+4 > len(data) {
|
||||
break
|
||||
}
|
||||
nameLen := int(binary.LittleEndian.Uint32(data[offset : offset+4]))
|
||||
offset += 4 + nameLen
|
||||
// Pad to 4-byte boundary
|
||||
if nameLen%4 != 0 {
|
||||
offset += 4 - nameLen%4
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var valueSize int
|
||||
switch propType {
|
||||
case 0x0002: // PT_SHORT
|
||||
valueSize = 4 // padded to 4
|
||||
case 0x0003: // PT_LONG
|
||||
valueSize = 4
|
||||
case 0x000B: // PT_BOOLEAN
|
||||
valueSize = 4
|
||||
case 0x0040: // PT_SYSTIME
|
||||
valueSize = 8
|
||||
case 0x001E: // PT_STRING8
|
||||
if offset+4 > len(data) {
|
||||
return
|
||||
}
|
||||
// count=1, then length, then data padded
|
||||
cnt := int(binary.LittleEndian.Uint32(data[offset : offset+4]))
|
||||
offset += 4
|
||||
for j := 0; j < cnt; j++ {
|
||||
if offset+4 > len(data) {
|
||||
return
|
||||
}
|
||||
slen := int(binary.LittleEndian.Uint32(data[offset : offset+4]))
|
||||
offset += 4
|
||||
strData := ""
|
||||
if offset+slen <= len(data) && slen < 200 {
|
||||
strData = string(data[offset : offset+slen])
|
||||
}
|
||||
fmt.Printf(" [%03d] PropID=0x%04X Type=0x%04X STRING8 len=%d val=%q\n", i, propID, propType, slen, strData)
|
||||
offset += slen
|
||||
if slen%4 != 0 {
|
||||
offset += 4 - slen%4
|
||||
}
|
||||
}
|
||||
continue
|
||||
case 0x001F: // PT_UNICODE
|
||||
if offset+4 > len(data) {
|
||||
return
|
||||
}
|
||||
cnt := int(binary.LittleEndian.Uint32(data[offset : offset+4]))
|
||||
offset += 4
|
||||
for j := 0; j < cnt; j++ {
|
||||
if offset+4 > len(data) {
|
||||
return
|
||||
}
|
||||
slen := int(binary.LittleEndian.Uint32(data[offset : offset+4]))
|
||||
offset += 4
|
||||
fmt.Printf(" [%03d] PropID=0x%04X Type=0x%04X UNICODE len=%d\n", i, propID, propType, slen)
|
||||
offset += slen
|
||||
if slen%4 != 0 {
|
||||
offset += 4 - slen%4
|
||||
}
|
||||
}
|
||||
continue
|
||||
case 0x0102: // PT_BINARY
|
||||
if offset+4 > len(data) {
|
||||
return
|
||||
}
|
||||
cnt := int(binary.LittleEndian.Uint32(data[offset : offset+4]))
|
||||
offset += 4
|
||||
for j := 0; j < cnt; j++ {
|
||||
if offset+4 > len(data) {
|
||||
return
|
||||
}
|
||||
blen := int(binary.LittleEndian.Uint32(data[offset : offset+4]))
|
||||
offset += 4
|
||||
fmt.Printf(" [%03d] PropID=0x%04X Type=0x%04X BINARY len=%d\n", i, propID, propType, blen)
|
||||
offset += blen
|
||||
if blen%4 != 0 {
|
||||
offset += 4 - blen%4
|
||||
}
|
||||
}
|
||||
continue
|
||||
case 0x000D: // PT_OBJECT
|
||||
if offset+4 > len(data) {
|
||||
return
|
||||
}
|
||||
cnt := int(binary.LittleEndian.Uint32(data[offset : offset+4]))
|
||||
offset += 4
|
||||
for j := 0; j < cnt; j++ {
|
||||
if offset+4 > len(data) {
|
||||
return
|
||||
}
|
||||
olen := int(binary.LittleEndian.Uint32(data[offset : offset+4]))
|
||||
offset += 4
|
||||
fmt.Printf(" [%03d] PropID=0x%04X Type=0x%04X OBJECT len=%d\n", i, propID, propType, olen)
|
||||
// Peek at first 16 bytes (GUID)
|
||||
if offset+16 <= len(data) {
|
||||
fmt.Printf(" GUID: %x\n", data[offset:offset+16])
|
||||
}
|
||||
offset += olen
|
||||
if olen%4 != 0 {
|
||||
offset += 4 - olen%4
|
||||
}
|
||||
}
|
||||
continue
|
||||
case 0x1003: // PT_MV_LONG
|
||||
if offset+4 > len(data) {
|
||||
return
|
||||
}
|
||||
cnt := int(binary.LittleEndian.Uint32(data[offset : offset+4]))
|
||||
offset += 4
|
||||
fmt.Printf(" [%03d] PropID=0x%04X Type=0x%04X MV_LONG count=%d\n", i, propID, propType, cnt)
|
||||
offset += cnt * 4
|
||||
continue
|
||||
case 0x1102: // PT_MV_BINARY
|
||||
if offset+4 > len(data) {
|
||||
return
|
||||
}
|
||||
cnt := int(binary.LittleEndian.Uint32(data[offset : offset+4]))
|
||||
offset += 4
|
||||
totalSize := 0
|
||||
for j := 0; j < cnt; j++ {
|
||||
if offset+4 > len(data) {
|
||||
return
|
||||
}
|
||||
blen := int(binary.LittleEndian.Uint32(data[offset : offset+4]))
|
||||
offset += 4
|
||||
totalSize += blen
|
||||
offset += blen
|
||||
if blen%4 != 0 {
|
||||
offset += 4 - blen%4
|
||||
}
|
||||
}
|
||||
fmt.Printf(" [%03d] PropID=0x%04X Type=0x%04X MV_BINARY count=%d totalSize=%d\n", i, propID, propType, cnt, totalSize)
|
||||
continue
|
||||
default:
|
||||
fmt.Printf(" [%03d] PropID=0x%04X Type=0x%04X (unknown type)\n", i, propID, propType)
|
||||
return
|
||||
}
|
||||
|
||||
if valueSize > 0 {
|
||||
if propType == 0x0003 && offset+4 <= len(data) {
|
||||
val := binary.LittleEndian.Uint32(data[offset : offset+4])
|
||||
fmt.Printf(" [%03d] PropID=0x%04X Type=0x%04X LONG val=%d (0x%X)\n", i, propID, propType, val, val)
|
||||
} else {
|
||||
fmt.Printf(" [%03d] PropID=0x%04X Type=0x%04X size=%d\n", i, propID, propType, valueSize)
|
||||
}
|
||||
offset += valueSize
|
||||
}
|
||||
}
|
||||
}
|
||||
209
backend/utils/mail/tnef_diag6_test.go
Normal file
209
backend/utils/mail/tnef_diag6_test.go
Normal file
@@ -0,0 +1,209 @@
|
||||
package internal
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/teamwork/tnef"
|
||||
)
|
||||
|
||||
func TestTNEFNestedMessage(t *testing.T) {
|
||||
testFile := `H:\Dev\Gits\EMLy\EML_TNEF.eml`
|
||||
if _, err := os.Stat(testFile); os.IsNotExist(err) {
|
||||
t.Skip("test EML file not present")
|
||||
}
|
||||
|
||||
f, _ := os.Open(testFile)
|
||||
defer f.Close()
|
||||
|
||||
outerEmail, _ := Parse(f)
|
||||
var innerData []byte
|
||||
for _, att := range outerEmail.Attachments {
|
||||
if strings.Contains(strings.ToLower(att.Filename), "postacert.eml") {
|
||||
innerData, _ = io.ReadAll(att.Data)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
innerEmail, _ := Parse(bytes.NewReader(innerData))
|
||||
for _, att := range innerEmail.Attachments {
|
||||
rawData, _ := io.ReadAll(att.Data)
|
||||
if strings.ToLower(att.Filename) != "winmail.dat" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Navigate to attAttachment (0x9005) for first attachment
|
||||
offset := 6
|
||||
for offset < len(rawData) {
|
||||
if offset+9 > len(rawData) {
|
||||
break
|
||||
}
|
||||
level := rawData[offset]
|
||||
attrID := binary.LittleEndian.Uint32(rawData[offset+1 : offset+5])
|
||||
attrLen := int(binary.LittleEndian.Uint32(rawData[offset+5 : offset+9]))
|
||||
dataStart := offset + 9
|
||||
|
||||
if level == 0x02 && attrID == 0x00069005 && attrLen > 1000 {
|
||||
mapiData := rawData[dataStart : dataStart+attrLen]
|
||||
|
||||
// Parse MAPI props to find PR_ATTACH_DATA_OBJ (0x3701)
|
||||
embeddedData := extractPRAttachDataObj(mapiData)
|
||||
if embeddedData == nil {
|
||||
t.Fatal("could not find PR_ATTACH_DATA_OBJ")
|
||||
}
|
||||
|
||||
fmt.Printf("PR_ATTACH_DATA_OBJ total: %d bytes\n", len(embeddedData))
|
||||
fmt.Printf("First 32 bytes after GUID: %x\n", embeddedData[16:min2(48, len(embeddedData))])
|
||||
|
||||
// Check if after the 16-byte GUID there's a TNEF signature
|
||||
afterGuid := embeddedData[16:]
|
||||
if len(afterGuid) >= 4 {
|
||||
sig := binary.LittleEndian.Uint32(afterGuid[0:4])
|
||||
fmt.Printf("Signature after GUID: 0x%08X (TNEF=0x223E9F78)\n", sig)
|
||||
|
||||
if sig == 0x223E9F78 {
|
||||
fmt.Println("It's a nested TNEF stream!")
|
||||
decoded, err := tnef.Decode(afterGuid)
|
||||
if err != nil {
|
||||
fmt.Printf("Nested TNEF decode error: %v\n", err)
|
||||
} else {
|
||||
fmt.Printf("Nested Body: %d bytes\n", len(decoded.Body))
|
||||
fmt.Printf("Nested BodyHTML: %d bytes\n", len(decoded.BodyHTML))
|
||||
fmt.Printf("Nested Attachments: %d\n", len(decoded.Attachments))
|
||||
for i, na := range decoded.Attachments {
|
||||
fmt.Printf(" [%d] %q (%d bytes)\n", i, na.Title, len(na.Data))
|
||||
}
|
||||
fmt.Printf("Nested Attributes: %d\n", len(decoded.Attributes))
|
||||
}
|
||||
} else {
|
||||
// Try as raw MAPI attributes (no TNEF wrapper)
|
||||
fmt.Printf("Not a TNEF stream. First byte: 0x%02X\n", afterGuid[0])
|
||||
// Check if it's a count of MAPI properties
|
||||
if len(afterGuid) >= 4 {
|
||||
propCount := binary.LittleEndian.Uint32(afterGuid[0:4])
|
||||
fmt.Printf("First uint32 (possible prop count): %d\n", propCount)
|
||||
}
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
offset += 9 + attrLen + 2
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func extractPRAttachDataObj(mapiData []byte) []byte {
|
||||
if len(mapiData) < 4 {
|
||||
return nil
|
||||
}
|
||||
count := int(binary.LittleEndian.Uint32(mapiData[0:4]))
|
||||
offset := 4
|
||||
|
||||
for i := 0; i < count && offset+4 <= len(mapiData); i++ {
|
||||
propTag := binary.LittleEndian.Uint32(mapiData[offset : offset+4])
|
||||
propType := propTag & 0xFFFF
|
||||
propID := (propTag >> 16) & 0xFFFF
|
||||
offset += 4
|
||||
|
||||
// Handle named props
|
||||
if propID >= 0x8000 {
|
||||
if offset+20 > len(mapiData) {
|
||||
return nil
|
||||
}
|
||||
kind := binary.LittleEndian.Uint32(mapiData[offset+16 : offset+20])
|
||||
offset += 20
|
||||
if kind == 0 {
|
||||
offset += 4
|
||||
} else {
|
||||
if offset+4 > len(mapiData) {
|
||||
return nil
|
||||
}
|
||||
nameLen := int(binary.LittleEndian.Uint32(mapiData[offset : offset+4]))
|
||||
offset += 4 + nameLen
|
||||
if nameLen%4 != 0 {
|
||||
offset += 4 - nameLen%4
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch propType {
|
||||
case 0x0002: // PT_SHORT
|
||||
offset += 4
|
||||
case 0x0003: // PT_LONG
|
||||
offset += 4
|
||||
case 0x000B: // PT_BOOLEAN
|
||||
offset += 4
|
||||
case 0x0040: // PT_SYSTIME
|
||||
offset += 8
|
||||
case 0x001E, 0x001F: // PT_STRING8, PT_UNICODE
|
||||
if offset+4 > len(mapiData) {
|
||||
return nil
|
||||
}
|
||||
cnt := int(binary.LittleEndian.Uint32(mapiData[offset : offset+4]))
|
||||
offset += 4
|
||||
for j := 0; j < cnt; j++ {
|
||||
if offset+4 > len(mapiData) {
|
||||
return nil
|
||||
}
|
||||
slen := int(binary.LittleEndian.Uint32(mapiData[offset : offset+4]))
|
||||
offset += 4 + slen
|
||||
if slen%4 != 0 {
|
||||
offset += 4 - slen%4
|
||||
}
|
||||
}
|
||||
case 0x0102: // PT_BINARY
|
||||
if offset+4 > len(mapiData) {
|
||||
return nil
|
||||
}
|
||||
cnt := int(binary.LittleEndian.Uint32(mapiData[offset : offset+4]))
|
||||
offset += 4
|
||||
for j := 0; j < cnt; j++ {
|
||||
if offset+4 > len(mapiData) {
|
||||
return nil
|
||||
}
|
||||
blen := int(binary.LittleEndian.Uint32(mapiData[offset : offset+4]))
|
||||
offset += 4 + blen
|
||||
if blen%4 != 0 {
|
||||
offset += 4 - blen%4
|
||||
}
|
||||
}
|
||||
case 0x000D: // PT_OBJECT
|
||||
if offset+4 > len(mapiData) {
|
||||
return nil
|
||||
}
|
||||
cnt := int(binary.LittleEndian.Uint32(mapiData[offset : offset+4]))
|
||||
offset += 4
|
||||
for j := 0; j < cnt; j++ {
|
||||
if offset+4 > len(mapiData) {
|
||||
return nil
|
||||
}
|
||||
olen := int(binary.LittleEndian.Uint32(mapiData[offset : offset+4]))
|
||||
offset += 4
|
||||
if propID == 0x3701 {
|
||||
// This is PR_ATTACH_DATA_OBJ!
|
||||
return mapiData[offset : offset+olen]
|
||||
}
|
||||
offset += olen
|
||||
if olen%4 != 0 {
|
||||
offset += 4 - olen%4
|
||||
}
|
||||
}
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func min2(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
273
backend/utils/mail/tnef_diag7_test.go
Normal file
273
backend/utils/mail/tnef_diag7_test.go
Normal file
@@ -0,0 +1,273 @@
|
||||
package internal
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/teamwork/tnef"
|
||||
)
|
||||
|
||||
func TestTNEFRecursiveExtract(t *testing.T) {
|
||||
testFile := `H:\Dev\Gits\EMLy\EML_TNEF.eml`
|
||||
if _, err := os.Stat(testFile); os.IsNotExist(err) {
|
||||
t.Skip("test EML file not present")
|
||||
}
|
||||
|
||||
f, _ := os.Open(testFile)
|
||||
defer f.Close()
|
||||
|
||||
outerEmail, _ := Parse(f)
|
||||
var innerData []byte
|
||||
for _, att := range outerEmail.Attachments {
|
||||
if strings.Contains(strings.ToLower(att.Filename), "postacert.eml") {
|
||||
innerData, _ = io.ReadAll(att.Data)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
innerEmail, _ := Parse(bytes.NewReader(innerData))
|
||||
for _, att := range innerEmail.Attachments {
|
||||
rawData, _ := io.ReadAll(att.Data)
|
||||
if strings.ToLower(att.Filename) != "winmail.dat" {
|
||||
continue
|
||||
}
|
||||
|
||||
fmt.Println("=== Level 0 (top TNEF) ===")
|
||||
atts, body := recursiveExtract(rawData, 0)
|
||||
fmt.Printf("\nTotal extracted attachments: %d\n", len(atts))
|
||||
for i, a := range atts {
|
||||
fmt.Printf(" [%d] %q (%d bytes)\n", i, a.Title, len(a.Data))
|
||||
}
|
||||
fmt.Printf("Body HTML len: %d\n", len(body))
|
||||
if len(body) > 0 && len(body) < 500 {
|
||||
fmt.Printf("Body: %s\n", body)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func recursiveExtract(tnefData []byte, depth int) ([]*tnef.Attachment, string) {
|
||||
prefix := strings.Repeat(" ", depth)
|
||||
|
||||
decoded, err := tnef.Decode(tnefData)
|
||||
if err != nil {
|
||||
fmt.Printf("%sDecode error: %v\n", prefix, err)
|
||||
return nil, ""
|
||||
}
|
||||
|
||||
// Collect body
|
||||
bodyHTML := string(decoded.BodyHTML)
|
||||
bodyText := string(decoded.Body)
|
||||
|
||||
// Check for RTF body in attributes
|
||||
for _, attr := range decoded.Attributes {
|
||||
if attr.Name == 0x1009 {
|
||||
fmt.Printf("%sFound PR_RTF_COMPRESSED: %d bytes\n", prefix, len(attr.Data))
|
||||
}
|
||||
if attr.Name == 0x1000 {
|
||||
fmt.Printf("%sFound PR_BODY: %d bytes\n", prefix, len(attr.Data))
|
||||
if bodyText == "" {
|
||||
bodyText = string(attr.Data)
|
||||
}
|
||||
}
|
||||
if attr.Name == 0x1013 || attr.Name == 0x1035 {
|
||||
fmt.Printf("%sFound PR_BODY_HTML/PR_HTML: %d bytes\n", prefix, len(attr.Data))
|
||||
if bodyHTML == "" {
|
||||
bodyHTML = string(attr.Data)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf("%sAttachments: %d, Body: %d, BodyHTML: %d\n",
|
||||
prefix, len(decoded.Attachments), len(bodyText), len(bodyHTML))
|
||||
|
||||
var allAttachments []*tnef.Attachment
|
||||
|
||||
// Collect real attachments (skip placeholders)
|
||||
for _, a := range decoded.Attachments {
|
||||
if a.Title == "Untitled Attachment" && len(a.Data) < 200 {
|
||||
fmt.Printf("%sSkipping placeholder: %q (%d bytes)\n", prefix, a.Title, len(a.Data))
|
||||
continue
|
||||
}
|
||||
allAttachments = append(allAttachments, a)
|
||||
}
|
||||
|
||||
// Now scan for embedded messages in raw TNEF
|
||||
embeddedStreams := findEmbeddedTNEFStreams(tnefData)
|
||||
for i, stream := range embeddedStreams {
|
||||
fmt.Printf("%s--- Recursing into embedded message %d (%d bytes) ---\n", prefix, i, len(stream))
|
||||
subAtts, subBody := recursiveExtract(stream, depth+1)
|
||||
allAttachments = append(allAttachments, subAtts...)
|
||||
if bodyHTML == "" && subBody != "" {
|
||||
bodyHTML = subBody
|
||||
}
|
||||
}
|
||||
|
||||
if bodyHTML != "" {
|
||||
return allAttachments, bodyHTML
|
||||
}
|
||||
return allAttachments, bodyText
|
||||
}
|
||||
|
||||
func findEmbeddedTNEFStreams(tnefData []byte) [][]byte {
|
||||
var streams [][]byte
|
||||
|
||||
// Navigate through TNEF attributes
|
||||
offset := 6
|
||||
for offset+9 < len(tnefData) {
|
||||
level := tnefData[offset]
|
||||
attrID := binary.LittleEndian.Uint32(tnefData[offset+1 : offset+5])
|
||||
attrLen := int(binary.LittleEndian.Uint32(tnefData[offset+5 : offset+9]))
|
||||
dataStart := offset + 9
|
||||
|
||||
if dataStart+attrLen > len(tnefData) {
|
||||
break
|
||||
}
|
||||
|
||||
// attAttachment (0x9005) at attachment level
|
||||
if level == 0x02 && attrID == 0x00069005 && attrLen > 100 {
|
||||
mapiData := tnefData[dataStart : dataStart+attrLen]
|
||||
embedded := extractPRAttachDataObj2(mapiData)
|
||||
if embedded != nil && len(embedded) > 22 {
|
||||
// Skip 16-byte GUID, check for TNEF signature
|
||||
afterGuid := embedded[16:]
|
||||
if len(afterGuid) >= 4 {
|
||||
sig := binary.LittleEndian.Uint32(afterGuid[0:4])
|
||||
if sig == 0x223E9F78 {
|
||||
streams = append(streams, afterGuid)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
offset += 9 + attrLen + 2
|
||||
}
|
||||
return streams
|
||||
}
|
||||
|
||||
func extractPRAttachDataObj2(mapiData []byte) []byte {
|
||||
if len(mapiData) < 4 {
|
||||
return nil
|
||||
}
|
||||
count := int(binary.LittleEndian.Uint32(mapiData[0:4]))
|
||||
offset := 4
|
||||
|
||||
for i := 0; i < count && offset+4 <= len(mapiData); i++ {
|
||||
propTag := binary.LittleEndian.Uint32(mapiData[offset : offset+4])
|
||||
propType := propTag & 0xFFFF
|
||||
propID := (propTag >> 16) & 0xFFFF
|
||||
offset += 4
|
||||
|
||||
if propID >= 0x8000 {
|
||||
if offset+20 > len(mapiData) {
|
||||
return nil
|
||||
}
|
||||
kind := binary.LittleEndian.Uint32(mapiData[offset+16 : offset+20])
|
||||
offset += 20
|
||||
if kind == 0 {
|
||||
offset += 4
|
||||
} else {
|
||||
if offset+4 > len(mapiData) {
|
||||
return nil
|
||||
}
|
||||
nameLen := int(binary.LittleEndian.Uint32(mapiData[offset : offset+4]))
|
||||
offset += 4 + nameLen
|
||||
if nameLen%4 != 0 {
|
||||
offset += 4 - nameLen%4
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch propType {
|
||||
case 0x0002:
|
||||
offset += 4
|
||||
case 0x0003:
|
||||
offset += 4
|
||||
case 0x000B:
|
||||
offset += 4
|
||||
case 0x0040:
|
||||
offset += 8
|
||||
case 0x001E, 0x001F:
|
||||
if offset+4 > len(mapiData) {
|
||||
return nil
|
||||
}
|
||||
cnt := int(binary.LittleEndian.Uint32(mapiData[offset : offset+4]))
|
||||
offset += 4
|
||||
for j := 0; j < cnt; j++ {
|
||||
if offset+4 > len(mapiData) {
|
||||
return nil
|
||||
}
|
||||
slen := int(binary.LittleEndian.Uint32(mapiData[offset : offset+4]))
|
||||
offset += 4 + slen
|
||||
if slen%4 != 0 {
|
||||
offset += 4 - slen%4
|
||||
}
|
||||
}
|
||||
case 0x0102:
|
||||
if offset+4 > len(mapiData) {
|
||||
return nil
|
||||
}
|
||||
cnt := int(binary.LittleEndian.Uint32(mapiData[offset : offset+4]))
|
||||
offset += 4
|
||||
for j := 0; j < cnt; j++ {
|
||||
if offset+4 > len(mapiData) {
|
||||
return nil
|
||||
}
|
||||
blen := int(binary.LittleEndian.Uint32(mapiData[offset : offset+4]))
|
||||
offset += 4 + blen
|
||||
if blen%4 != 0 {
|
||||
offset += 4 - blen%4
|
||||
}
|
||||
}
|
||||
case 0x000D:
|
||||
if offset+4 > len(mapiData) {
|
||||
return nil
|
||||
}
|
||||
cnt := int(binary.LittleEndian.Uint32(mapiData[offset : offset+4]))
|
||||
offset += 4
|
||||
for j := 0; j < cnt; j++ {
|
||||
if offset+4 > len(mapiData) {
|
||||
return nil
|
||||
}
|
||||
olen := int(binary.LittleEndian.Uint32(mapiData[offset : offset+4]))
|
||||
offset += 4
|
||||
if propID == 0x3701 {
|
||||
return mapiData[offset : offset+olen]
|
||||
}
|
||||
offset += olen
|
||||
if olen%4 != 0 {
|
||||
offset += 4 - olen%4
|
||||
}
|
||||
}
|
||||
case 0x1003:
|
||||
if offset+4 > len(mapiData) {
|
||||
return nil
|
||||
}
|
||||
cnt := int(binary.LittleEndian.Uint32(mapiData[offset : offset+4]))
|
||||
offset += 4 + cnt*4
|
||||
case 0x1102:
|
||||
if offset+4 > len(mapiData) {
|
||||
return nil
|
||||
}
|
||||
cnt := int(binary.LittleEndian.Uint32(mapiData[offset : offset+4]))
|
||||
offset += 4
|
||||
for j := 0; j < cnt; j++ {
|
||||
if offset+4 > len(mapiData) {
|
||||
return nil
|
||||
}
|
||||
blen := int(binary.LittleEndian.Uint32(mapiData[offset : offset+4]))
|
||||
offset += 4 + blen
|
||||
if blen%4 != 0 {
|
||||
offset += 4 - blen%4
|
||||
}
|
||||
}
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
97
backend/utils/mail/tnef_diag8_test.go
Normal file
97
backend/utils/mail/tnef_diag8_test.go
Normal file
@@ -0,0 +1,97 @@
|
||||
package internal
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/teamwork/tnef"
|
||||
)
|
||||
|
||||
func TestTNEFDeepAttachment(t *testing.T) {
|
||||
testFile := `H:\Dev\Gits\EMLy\EML_TNEF.eml`
|
||||
if _, err := os.Stat(testFile); os.IsNotExist(err) {
|
||||
t.Skip("test EML file not present")
|
||||
}
|
||||
|
||||
f, _ := os.Open(testFile)
|
||||
defer f.Close()
|
||||
|
||||
outerEmail, _ := Parse(f)
|
||||
var innerData []byte
|
||||
for _, att := range outerEmail.Attachments {
|
||||
if strings.Contains(strings.ToLower(att.Filename), "postacert.eml") {
|
||||
innerData, _ = io.ReadAll(att.Data)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
innerEmail, _ := Parse(bytes.NewReader(innerData))
|
||||
for _, att := range innerEmail.Attachments {
|
||||
rawData, _ := io.ReadAll(att.Data)
|
||||
if strings.ToLower(att.Filename) != "winmail.dat" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Dig to level 2: top → embedded[0] → embedded[0]
|
||||
streams0 := findEmbeddedTNEFStreams(rawData)
|
||||
if len(streams0) == 0 {
|
||||
t.Fatal("no embedded streams at level 0")
|
||||
}
|
||||
streams1 := findEmbeddedTNEFStreams(streams0[0])
|
||||
if len(streams1) == 0 {
|
||||
t.Fatal("no embedded streams at level 1")
|
||||
}
|
||||
|
||||
// Decode level 2
|
||||
decoded2, err := tnef.Decode(streams1[0])
|
||||
if err != nil {
|
||||
t.Fatalf("level 2 decode: %v", err)
|
||||
}
|
||||
|
||||
fmt.Printf("Level 2 attachments: %d\n", len(decoded2.Attachments))
|
||||
for i, a := range decoded2.Attachments {
|
||||
fmt.Printf(" [%d] title=%q size=%d\n", i, a.Title, len(a.Data))
|
||||
if len(a.Data) > 20 {
|
||||
fmt.Printf(" first 20 bytes: %x\n", a.Data[:20])
|
||||
// Check for EML, MSG, TNEF signatures
|
||||
if len(a.Data) >= 4 {
|
||||
sig := binary.LittleEndian.Uint32(a.Data[0:4])
|
||||
if sig == 0x223E9F78 {
|
||||
fmt.Println(" -> TNEF stream!")
|
||||
}
|
||||
}
|
||||
if len(a.Data) >= 8 && bytes.Equal(a.Data[:8], []byte{0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1}) {
|
||||
fmt.Println(" -> MSG (OLE2) file!")
|
||||
}
|
||||
// Check if text/EML
|
||||
if a.Data[0] < 128 && a.Data[0] >= 32 {
|
||||
preview := string(a.Data[:min2(200, len(a.Data))])
|
||||
if strings.Contains(preview, "From:") || strings.Contains(preview, "Content-Type") || strings.Contains(preview, "MIME") || strings.Contains(preview, "Received:") {
|
||||
fmt.Printf(" -> Looks like an EML file! First 200 chars: %s\n", preview)
|
||||
} else {
|
||||
fmt.Printf(" -> Text data: %.200s\n", preview)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Also check level 2's attAttachment for embedded msgs
|
||||
streams2 := findEmbeddedTNEFStreams(streams1[0])
|
||||
fmt.Printf("\nLevel 2 embedded TNEF streams: %d\n", len(streams2))
|
||||
|
||||
// Check all MAPI attributes at level 2
|
||||
fmt.Println("\nLevel 2 MAPI attributes:")
|
||||
for _, attr := range decoded2.Attributes {
|
||||
fmt.Printf(" 0x%04X: %d bytes\n", attr.Name, len(attr.Data))
|
||||
// PR_BODY
|
||||
if attr.Name == 0x1000 && len(attr.Data) < 500 {
|
||||
fmt.Printf(" PR_BODY: %s\n", string(attr.Data))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
79
backend/utils/mail/tnef_diag_test.go
Normal file
79
backend/utils/mail/tnef_diag_test.go
Normal file
@@ -0,0 +1,79 @@
|
||||
package internal
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/teamwork/tnef"
|
||||
)
|
||||
|
||||
func TestTNEFDiag(t *testing.T) {
|
||||
testFile := `H:\Dev\Gits\EMLy\EML_TNEF.eml`
|
||||
if _, err := os.Stat(testFile); os.IsNotExist(err) {
|
||||
t.Skip("test EML file not present")
|
||||
}
|
||||
|
||||
f, _ := os.Open(testFile)
|
||||
defer f.Close()
|
||||
|
||||
// Parse the PEC outer envelope
|
||||
outerEmail, err := Parse(f)
|
||||
if err != nil {
|
||||
t.Fatalf("parse outer: %v", err)
|
||||
}
|
||||
|
||||
// Find postacert.eml
|
||||
var innerData []byte
|
||||
for _, att := range outerEmail.Attachments {
|
||||
if strings.Contains(strings.ToLower(att.Filename), "postacert.eml") {
|
||||
innerData, _ = io.ReadAll(att.Data)
|
||||
break
|
||||
}
|
||||
}
|
||||
if innerData == nil {
|
||||
t.Fatal("no postacert.eml found")
|
||||
}
|
||||
|
||||
// Parse inner email
|
||||
innerEmail, err := Parse(bytes.NewReader(innerData))
|
||||
if err != nil {
|
||||
t.Fatalf("parse inner: %v", err)
|
||||
}
|
||||
|
||||
fmt.Printf("Inner attachments: %d\n", len(innerEmail.Attachments))
|
||||
for i, att := range innerEmail.Attachments {
|
||||
data, _ := io.ReadAll(att.Data)
|
||||
fmt.Printf(" [%d] filename=%q contentType=%q size=%d\n", i, att.Filename, att.ContentType, len(data))
|
||||
|
||||
if strings.ToLower(att.Filename) == "winmail.dat" ||
|
||||
strings.Contains(strings.ToLower(att.ContentType), "ms-tnef") {
|
||||
|
||||
fmt.Printf(" Found TNEF! First 20 bytes: %x\n", data[:min(20, len(data))])
|
||||
fmt.Printf(" isTNEFData: %v\n", isTNEFData(data))
|
||||
|
||||
decoded, err := tnef.Decode(data)
|
||||
if err != nil {
|
||||
fmt.Printf(" TNEF decode error: %v\n", err)
|
||||
continue
|
||||
}
|
||||
fmt.Printf(" TNEF Body len: %d\n", len(decoded.Body))
|
||||
fmt.Printf(" TNEF BodyHTML len: %d\n", len(decoded.BodyHTML))
|
||||
fmt.Printf(" TNEF Attachments: %d\n", len(decoded.Attachments))
|
||||
for j, ta := range decoded.Attachments {
|
||||
fmt.Printf(" [%d] title=%q size=%d\n", j, ta.Title, len(ta.Data))
|
||||
}
|
||||
fmt.Printf(" TNEF Attributes: %d\n", len(decoded.Attributes))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func min(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
444
backend/utils/mail/tnef_reader.go
Normal file
444
backend/utils/mail/tnef_reader.go
Normal file
@@ -0,0 +1,444 @@
|
||||
package internal
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"mime"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/teamwork/tnef"
|
||||
)
|
||||
|
||||
// tnefMagic is the TNEF file signature (little-endian 0x223E9F78).
|
||||
var tnefMagic = []byte{0x78, 0x9F, 0x3E, 0x22}
|
||||
|
||||
const maxTNEFDepth = 10
|
||||
|
||||
// isTNEFData returns true if the given byte slice starts with the TNEF magic number.
|
||||
func isTNEFData(data []byte) bool {
|
||||
return len(data) >= 4 &&
|
||||
data[0] == tnefMagic[0] &&
|
||||
data[1] == tnefMagic[1] &&
|
||||
data[2] == tnefMagic[2] &&
|
||||
data[3] == tnefMagic[3]
|
||||
}
|
||||
|
||||
// isTNEFAttachment returns true if an attachment is a TNEF-encoded winmail.dat.
|
||||
// Detection is based on filename, content-type, or the TNEF magic bytes.
|
||||
func isTNEFAttachment(att EmailAttachment) bool {
|
||||
filenameLower := strings.ToLower(att.Filename)
|
||||
if filenameLower == "winmail.dat" {
|
||||
return true
|
||||
}
|
||||
ctLower := strings.ToLower(att.ContentType)
|
||||
if strings.Contains(ctLower, "application/ms-tnef") ||
|
||||
strings.Contains(ctLower, "application/vnd.ms-tnef") {
|
||||
return true
|
||||
}
|
||||
return isTNEFData(att.Data)
|
||||
}
|
||||
|
||||
// extractTNEFAttachments decodes a TNEF blob and returns the files embedded
|
||||
// inside it, recursively following nested embedded MAPI messages.
|
||||
func extractTNEFAttachments(data []byte) ([]EmailAttachment, error) {
|
||||
return extractTNEFRecursive(data, 0)
|
||||
}
|
||||
|
||||
func extractTNEFRecursive(data []byte, depth int) ([]EmailAttachment, error) {
|
||||
if depth > maxTNEFDepth {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
decoded, err := tnef.Decode(data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var attachments []EmailAttachment
|
||||
|
||||
// Collect non-placeholder file attachments from the library output.
|
||||
for _, att := range decoded.Attachments {
|
||||
if len(att.Data) == 0 {
|
||||
continue
|
||||
}
|
||||
// Skip the small MAPI placeholder text ("L'allegato è un messaggio
|
||||
// incorporato MAPI 1.0...") that Outlook inserts for embedded messages.
|
||||
if isEmbeddedMsgPlaceholder(att) {
|
||||
continue
|
||||
}
|
||||
|
||||
filename := att.Title
|
||||
if filename == "" || filename == "Untitled Attachment" {
|
||||
filename = inferFilename(att.Data)
|
||||
}
|
||||
|
||||
attachments = append(attachments, EmailAttachment{
|
||||
Filename: filename,
|
||||
ContentType: mimeTypeFromFilename(filename),
|
||||
Data: att.Data,
|
||||
})
|
||||
}
|
||||
|
||||
// Recursively dig into embedded MAPI messages stored in
|
||||
// attAttachment (0x9005) → PR_ATTACH_DATA_OBJ (0x3701).
|
||||
for _, stream := range findEmbeddedTNEFStreamsFromRaw(data) {
|
||||
subAtts, _ := extractTNEFRecursive(stream, depth+1)
|
||||
attachments = append(attachments, subAtts...)
|
||||
}
|
||||
|
||||
return attachments, nil
|
||||
}
|
||||
|
||||
// isEmbeddedMsgPlaceholder returns true if the attachment is a tiny placeholder
|
||||
// that Outlook generates for embedded MAPI messages ("L'allegato è un messaggio
|
||||
// incorporato MAPI 1.0" or equivalent in other languages).
|
||||
func isEmbeddedMsgPlaceholder(att *tnef.Attachment) bool {
|
||||
if len(att.Data) > 300 {
|
||||
return false
|
||||
}
|
||||
lower := strings.ToLower(string(att.Data))
|
||||
return strings.Contains(lower, "mapi 1.0") ||
|
||||
strings.Contains(lower, "embedded message") ||
|
||||
strings.Contains(lower, "messaggio incorporato")
|
||||
}
|
||||
|
||||
// inferFilename picks a reasonable filename based on the data's magic bytes.
|
||||
func inferFilename(data []byte) string {
|
||||
if looksLikeEML(data) {
|
||||
return "embedded_message.eml"
|
||||
}
|
||||
if isTNEFData(data) {
|
||||
return "embedded.dat"
|
||||
}
|
||||
if len(data) >= 8 {
|
||||
if data[0] == 0xD0 && data[1] == 0xCF && data[2] == 0x11 && data[3] == 0xE0 {
|
||||
return "embedded_message.msg"
|
||||
}
|
||||
}
|
||||
return "attachment.dat"
|
||||
}
|
||||
|
||||
// looksLikeEML returns true if data starts with typical RFC 5322 headers.
|
||||
func looksLikeEML(data []byte) bool {
|
||||
if len(data) < 20 {
|
||||
return false
|
||||
}
|
||||
// Quick check: must start with printable ASCII
|
||||
if data[0] < 32 || data[0] > 126 {
|
||||
return false
|
||||
}
|
||||
prefix := strings.ToLower(string(data[:min(200, len(data))]))
|
||||
return strings.HasPrefix(prefix, "mime-version:") ||
|
||||
strings.HasPrefix(prefix, "from:") ||
|
||||
strings.HasPrefix(prefix, "received:") ||
|
||||
strings.HasPrefix(prefix, "date:") ||
|
||||
strings.HasPrefix(prefix, "content-type:") ||
|
||||
strings.HasPrefix(prefix, "return-path:")
|
||||
}
|
||||
|
||||
// expandTNEFAttachments iterates over the attachment list and replaces any
|
||||
// TNEF-encoded winmail.dat entries with the files they contain. Attachments
|
||||
// that are not TNEF are passed through unchanged.
|
||||
func expandTNEFAttachments(attachments []EmailAttachment) []EmailAttachment {
|
||||
var result []EmailAttachment
|
||||
for _, att := range attachments {
|
||||
if isTNEFAttachment(att) {
|
||||
extracted, err := extractTNEFAttachments(att.Data)
|
||||
if err == nil && len(extracted) > 0 {
|
||||
result = append(result, extracted...)
|
||||
continue
|
||||
}
|
||||
// If extraction fails, keep the original blob.
|
||||
}
|
||||
result = append(result, att)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Raw TNEF attribute scanner — extracts nested TNEF streams from embedded
|
||||
// MAPI messages that the teamwork/tnef library does not handle.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// findEmbeddedTNEFStreamsFromRaw scans the raw TNEF byte stream for
|
||||
// attAttachment (0x00069005) attribute blocks, parses their MAPI properties,
|
||||
// and extracts any PR_ATTACH_DATA_OBJ (0x3701) values that begin with a
|
||||
// TNEF signature.
|
||||
func findEmbeddedTNEFStreamsFromRaw(tnefData []byte) [][]byte {
|
||||
if len(tnefData) < 6 || !isTNEFData(tnefData) {
|
||||
return nil
|
||||
}
|
||||
|
||||
var streams [][]byte
|
||||
offset := 6 // skip TNEF signature (4) + key (2)
|
||||
|
||||
for offset+9 < len(tnefData) {
|
||||
level := tnefData[offset]
|
||||
attrID := binary.LittleEndian.Uint32(tnefData[offset+1 : offset+5])
|
||||
attrLen := int(binary.LittleEndian.Uint32(tnefData[offset+5 : offset+9]))
|
||||
dataStart := offset + 9
|
||||
|
||||
if dataStart+attrLen > len(tnefData) || attrLen < 0 {
|
||||
break
|
||||
}
|
||||
|
||||
// attAttachment (0x00069005) at attachment level (0x02)
|
||||
if level == 0x02 && attrID == 0x00069005 && attrLen > 100 {
|
||||
mapiData := tnefData[dataStart : dataStart+attrLen]
|
||||
embedded := extractPRAttachDataObjFromMAPI(mapiData)
|
||||
if embedded != nil && len(embedded) > 22 {
|
||||
// Skip the 16-byte IID_IMessage GUID
|
||||
afterGuid := embedded[16:]
|
||||
if isTNEFData(afterGuid) {
|
||||
streams = append(streams, afterGuid)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// level(1) + id(4) + len(4) + data(attrLen) + checksum(2)
|
||||
offset += 9 + attrLen + 2
|
||||
}
|
||||
return streams
|
||||
}
|
||||
|
||||
// extractPRAttachDataObjFromMAPI parses a MAPI properties block (from an
|
||||
// attAttachment attribute) and returns the raw value of PR_ATTACH_DATA_OBJ
|
||||
// (property ID 0x3701, type PT_OBJECT 0x000D).
|
||||
func extractPRAttachDataObjFromMAPI(data []byte) []byte {
|
||||
if len(data) < 4 {
|
||||
return nil
|
||||
}
|
||||
count := int(binary.LittleEndian.Uint32(data[0:4]))
|
||||
off := 4
|
||||
|
||||
for i := 0; i < count && off+4 <= len(data); i++ {
|
||||
propTag := binary.LittleEndian.Uint32(data[off : off+4])
|
||||
propType := propTag & 0xFFFF
|
||||
propID := (propTag >> 16) & 0xFFFF
|
||||
off += 4
|
||||
|
||||
// Named properties (ID >= 0x8000) have extra GUID + kind fields.
|
||||
if propID >= 0x8000 {
|
||||
if off+20 > len(data) {
|
||||
return nil
|
||||
}
|
||||
kind := binary.LittleEndian.Uint32(data[off+16 : off+20])
|
||||
off += 20
|
||||
if kind == 0 { // MNID_ID
|
||||
off += 4
|
||||
} else { // MNID_STRING
|
||||
if off+4 > len(data) {
|
||||
return nil
|
||||
}
|
||||
nameLen := int(binary.LittleEndian.Uint32(data[off : off+4]))
|
||||
off += 4 + nameLen
|
||||
off += padTo4(nameLen)
|
||||
}
|
||||
}
|
||||
|
||||
off = skipMAPIPropValue(data, off, propType, propID)
|
||||
if off < 0 {
|
||||
return nil // parse error
|
||||
}
|
||||
// If skipMAPIPropValue returned a special sentinel, extract it.
|
||||
// We use a hack: skipMAPIPropValue can't return the data directly,
|
||||
// so we handle PT_OBJECT / 0x3701 inline below.
|
||||
}
|
||||
|
||||
// Simpler approach: re-scan specifically for 0x3701.
|
||||
return extractPRAttachDataObjDirect(data)
|
||||
}
|
||||
|
||||
// extractPRAttachDataObjDirect re-scans the MAPI property block and
|
||||
// returns the raw value of PR_ATTACH_DATA_OBJ (0x3701, PT_OBJECT).
|
||||
func extractPRAttachDataObjDirect(data []byte) []byte {
|
||||
if len(data) < 4 {
|
||||
return nil
|
||||
}
|
||||
count := int(binary.LittleEndian.Uint32(data[0:4]))
|
||||
off := 4
|
||||
|
||||
for i := 0; i < count && off+4 <= len(data); i++ {
|
||||
propTag := binary.LittleEndian.Uint32(data[off : off+4])
|
||||
propType := propTag & 0xFFFF
|
||||
propID := (propTag >> 16) & 0xFFFF
|
||||
off += 4
|
||||
|
||||
// Skip named property headers.
|
||||
if propID >= 0x8000 {
|
||||
if off+20 > len(data) {
|
||||
return nil
|
||||
}
|
||||
kind := binary.LittleEndian.Uint32(data[off+16 : off+20])
|
||||
off += 20
|
||||
if kind == 0 {
|
||||
off += 4
|
||||
} else {
|
||||
if off+4 > len(data) {
|
||||
return nil
|
||||
}
|
||||
nameLen := int(binary.LittleEndian.Uint32(data[off : off+4]))
|
||||
off += 4 + nameLen
|
||||
off += padTo4(nameLen)
|
||||
}
|
||||
}
|
||||
|
||||
switch propType {
|
||||
case 0x0002: // PT_SHORT (padded to 4)
|
||||
off += 4
|
||||
case 0x0003, 0x000A: // PT_LONG, PT_ERROR
|
||||
off += 4
|
||||
case 0x000B: // PT_BOOLEAN (padded to 4)
|
||||
off += 4
|
||||
case 0x0004: // PT_FLOAT
|
||||
off += 4
|
||||
case 0x0005: // PT_DOUBLE
|
||||
off += 8
|
||||
case 0x0006: // PT_CURRENCY
|
||||
off += 8
|
||||
case 0x0007: // PT_APPTIME
|
||||
off += 8
|
||||
case 0x0014: // PT_I8
|
||||
off += 8
|
||||
case 0x0040: // PT_SYSTIME
|
||||
off += 8
|
||||
case 0x0048: // PT_CLSID
|
||||
off += 16
|
||||
case 0x001E, 0x001F: // PT_STRING8, PT_UNICODE
|
||||
off = skipCountedBlobs(data, off)
|
||||
case 0x0102: // PT_BINARY
|
||||
off = skipCountedBlobs(data, off)
|
||||
case 0x000D: // PT_OBJECT
|
||||
if off+4 > len(data) {
|
||||
return nil
|
||||
}
|
||||
cnt := int(binary.LittleEndian.Uint32(data[off : off+4]))
|
||||
off += 4
|
||||
for j := 0; j < cnt; j++ {
|
||||
if off+4 > len(data) {
|
||||
return nil
|
||||
}
|
||||
olen := int(binary.LittleEndian.Uint32(data[off : off+4]))
|
||||
off += 4
|
||||
if propID == 0x3701 && off+olen <= len(data) {
|
||||
return data[off : off+olen]
|
||||
}
|
||||
off += olen
|
||||
off += padTo4(olen)
|
||||
}
|
||||
case 0x1002: // PT_MV_SHORT
|
||||
off = skipMVFixed(data, off, 4)
|
||||
case 0x1003: // PT_MV_LONG
|
||||
off = skipMVFixed(data, off, 4)
|
||||
case 0x1005: // PT_MV_DOUBLE
|
||||
off = skipMVFixed(data, off, 8)
|
||||
case 0x1014: // PT_MV_I8
|
||||
off = skipMVFixed(data, off, 8)
|
||||
case 0x1040: // PT_MV_SYSTIME
|
||||
off = skipMVFixed(data, off, 8)
|
||||
case 0x101E, 0x101F: // PT_MV_STRING8, PT_MV_UNICODE
|
||||
off = skipCountedBlobs(data, off)
|
||||
case 0x1048: // PT_MV_CLSID
|
||||
off = skipMVFixed(data, off, 16)
|
||||
case 0x1102: // PT_MV_BINARY
|
||||
off = skipCountedBlobs(data, off)
|
||||
default:
|
||||
// Unknown type, can't continue
|
||||
return nil
|
||||
}
|
||||
|
||||
if off < 0 || off > len(data) {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// skipCountedBlobs advances past a MAPI value that stores count + N
|
||||
// length-prefixed blobs (used by PT_STRING8, PT_UNICODE, PT_BINARY, and
|
||||
// their multi-valued variants).
|
||||
func skipCountedBlobs(data []byte, off int) int {
|
||||
if off+4 > len(data) {
|
||||
return -1
|
||||
}
|
||||
cnt := int(binary.LittleEndian.Uint32(data[off : off+4]))
|
||||
off += 4
|
||||
for j := 0; j < cnt; j++ {
|
||||
if off+4 > len(data) {
|
||||
return -1
|
||||
}
|
||||
blen := int(binary.LittleEndian.Uint32(data[off : off+4]))
|
||||
off += 4 + blen
|
||||
off += padTo4(blen)
|
||||
}
|
||||
return off
|
||||
}
|
||||
|
||||
// skipMVFixed advances past a multi-valued fixed-size property
|
||||
// (count followed by count*elemSize bytes).
|
||||
func skipMVFixed(data []byte, off int, elemSize int) int {
|
||||
if off+4 > len(data) {
|
||||
return -1
|
||||
}
|
||||
cnt := int(binary.LittleEndian.Uint32(data[off : off+4]))
|
||||
off += 4 + cnt*elemSize
|
||||
return off
|
||||
}
|
||||
|
||||
// skipMAPIPropValue is a generic value skipper (unused in the current flow
|
||||
// but kept for completeness).
|
||||
func skipMAPIPropValue(data []byte, off int, propType uint32, _ uint32) int {
|
||||
switch propType {
|
||||
case 0x0002:
|
||||
return off + 4
|
||||
case 0x0003, 0x000A, 0x000B, 0x0004:
|
||||
return off + 4
|
||||
case 0x0005, 0x0006, 0x0007, 0x0014, 0x0040:
|
||||
return off + 8
|
||||
case 0x0048:
|
||||
return off + 16
|
||||
case 0x001E, 0x001F, 0x0102, 0x000D:
|
||||
return skipCountedBlobs(data, off)
|
||||
case 0x1002, 0x1003:
|
||||
return skipMVFixed(data, off, 4)
|
||||
case 0x1005, 0x1014, 0x1040:
|
||||
return skipMVFixed(data, off, 8)
|
||||
case 0x1048:
|
||||
return skipMVFixed(data, off, 16)
|
||||
case 0x101E, 0x101F, 0x1102:
|
||||
return skipCountedBlobs(data, off)
|
||||
default:
|
||||
return -1
|
||||
}
|
||||
}
|
||||
|
||||
// padTo4 returns the number of padding bytes needed to reach a 4-byte boundary.
|
||||
func padTo4(n int) int {
|
||||
r := n % 4
|
||||
if r == 0 {
|
||||
return 0
|
||||
}
|
||||
return 4 - r
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// MIME type helper
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// mimeTypeFromFilename guesses the MIME type from a file extension.
|
||||
// Falls back to "application/octet-stream" when the type is unknown.
|
||||
func mimeTypeFromFilename(filename string) string {
|
||||
ext := strings.ToLower(filepath.Ext(filename))
|
||||
if ext == "" {
|
||||
return "application/octet-stream"
|
||||
}
|
||||
t := mime.TypeByExtension(ext)
|
||||
if t == "" {
|
||||
return "application/octet-stream"
|
||||
}
|
||||
// Strip any parameters (e.g. "; charset=utf-8")
|
||||
if idx := strings.Index(t, ";"); idx != -1 {
|
||||
t = strings.TrimSpace(t[:idx])
|
||||
}
|
||||
return t
|
||||
}
|
||||
59
backend/utils/mail/tnef_reader_test.go
Normal file
59
backend/utils/mail/tnef_reader_test.go
Normal file
@@ -0,0 +1,59 @@
|
||||
package internal
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestReadEmlWithTNEF(t *testing.T) {
|
||||
testFile := `H:\Dev\Gits\EMLy\EML_TNEF.eml`
|
||||
if _, err := os.Stat(testFile); os.IsNotExist(err) {
|
||||
t.Skip("test EML file not present")
|
||||
}
|
||||
|
||||
// First try the PEC reader (this is a PEC email)
|
||||
email, err := ReadPecInnerEml(testFile)
|
||||
if err != nil {
|
||||
t.Fatalf("ReadPecInnerEml failed: %v", err)
|
||||
}
|
||||
|
||||
fmt.Printf("Subject: %s\n", email.Subject)
|
||||
fmt.Printf("From: %s\n", email.From)
|
||||
fmt.Printf("Attachment count: %d\n", len(email.Attachments))
|
||||
|
||||
hasWinmailDat := false
|
||||
for i, att := range email.Attachments {
|
||||
fmt.Printf(" [%d] %s (%s, %d bytes)\n", i, att.Filename, att.ContentType, len(att.Data))
|
||||
if att.Filename == "winmail.dat" {
|
||||
hasWinmailDat = true
|
||||
}
|
||||
}
|
||||
|
||||
if hasWinmailDat {
|
||||
t.Error("winmail.dat should have been expanded into its contained attachments")
|
||||
}
|
||||
|
||||
if len(email.Attachments) == 0 {
|
||||
t.Error("expected at least one attachment after TNEF expansion")
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadEmlFallback(t *testing.T) {
|
||||
testFile := `H:\Dev\Gits\EMLy\EML_TNEF.eml`
|
||||
if _, err := os.Stat(testFile); os.IsNotExist(err) {
|
||||
t.Skip("test EML file not present")
|
||||
}
|
||||
|
||||
// Also verify the plain EML reader path
|
||||
email, err := ReadEmlFile(testFile)
|
||||
if err != nil {
|
||||
t.Fatalf("ReadEmlFile failed: %v", err)
|
||||
}
|
||||
|
||||
fmt.Printf("[EML] Subject: %s\n", email.Subject)
|
||||
fmt.Printf("[EML] Attachment count: %d\n", len(email.Attachments))
|
||||
for i, att := range email.Attachments {
|
||||
fmt.Printf(" [%d] %s (%s, %d bytes)\n", i, att.Filename, att.ContentType, len(att.Data))
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user