Skip to content

Commit

Permalink
Handle multiple attachments with the same name
Browse files Browse the repository at this point in the history
  • Loading branch information
nickrussler committed May 31, 2024
1 parent f406e34 commit 0be1b54
Show file tree
Hide file tree
Showing 4 changed files with 56,810 additions and 34 deletions.
6 changes: 3 additions & 3 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ dependencies {
implementation 'com.google.guava:guava:29.0-jre'
implementation 'com.sun.mail:jakarta.mail:2.0.1'
implementation 'com.beust:jcommander:1.78'
implementation 'org.apache.tika:tika-core:1.24.1'
implementation 'org.apache.tika:tika-core:2.9.2'
implementation 'com.github.markusbernhardt:proxy-vole:1.0.5'
implementation 'org.simplejavamail:simple-java-mail:8.10.1'
implementation 'org.simplejavamail:outlook-module:8.10.1'
implementation 'org.simplejavamail:simple-java-mail:8.11.1'
implementation 'org.simplejavamail:outlook-module:8.11.1'
implementation 'org.slf4j:slf4j-simple:1.7.30'

testImplementation 'junit:junit:4.+'
Expand Down
92 changes: 61 additions & 31 deletions src/main/java/mimeparser/MimeMessageConverter.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@
import com.google.common.io.ByteStreams;
import com.google.common.io.Files;
import com.google.common.io.Resources;
import jakarta.mail.MessagingException;
import jakarta.mail.internet.MimeMessage;
import jakarta.mail.internet.MimeUtility;
import org.apache.tika.mime.MimeTypes;
import org.simplejavamail.api.email.AttachmentResource;
import org.simplejavamail.converter.EmailConverter;
import util.*;

import java.io.*;
import java.net.URL;
Expand All @@ -32,14 +39,9 @@
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import jakarta.mail.MessagingException;
import jakarta.mail.internet.MimeMessage;
import jakarta.mail.internet.MimeUtility;
import org.apache.tika.io.FilenameUtils;
import org.apache.tika.mime.MimeTypes;
import org.simplejavamail.api.email.AttachmentResource;
import org.simplejavamail.converter.EmailConverter;
import util.*;

import static com.google.common.io.Files.getFileExtension;
import static com.google.common.io.Files.getNameWithoutExtension;

/**
* Converts email (eml, msg) files into pdf files.
Expand Down Expand Up @@ -114,15 +116,14 @@ private static void execCommand(List<String> command) {
*
* @throws Exception
*/
public static void convertToPdf(
String emailFilePath, String pdfOutputPath, boolean hideHeaders, boolean addAttachmentNames, boolean extractAttachments, String attachmentsdir, List<String> extParams) throws Exception {
public static void convertToPdf(String emailFilePath, String pdfOutputPath, boolean hideHeaders, boolean addAttachmentNames, boolean extractAttachments, String attachmentsdir, List<String> extParams) throws Exception {
Logger.info("Start converting %s to %s", emailFilePath, pdfOutputPath);

final MimeMessage message;
if (emailFilePath.toLowerCase().endsWith(".msg")) {
Logger.debug("Read msg file from %s, convert it to eml", emailFilePath);
message = new MimeMessage(
null, new ByteArrayInputStream(EmailConverter.outlookMsgToEML(new FileInputStream(emailFilePath)).getBytes(StandardCharsets.UTF_8)));
String emlString = EmailConverter.outlookMsgToEML(new FileInputStream(emailFilePath));
message = new MimeMessage(null, new ByteArrayInputStream(emlString.getBytes(StandardCharsets.UTF_8)));
} else {
Logger.debug("Read eml file from %s", emailFilePath);
message = new MimeMessage(null, new FileInputStream(emailFilePath));
Expand Down Expand Up @@ -348,41 +349,34 @@ public String replace(Matcher m) throws Exception {
if (!Strings.isNullOrEmpty(attachmentsdir)) {
attachmentDir = new File(attachmentsdir);
} else {
attachmentDir = new File(pdf.getParentFile(), Files.getNameWithoutExtension(pdfOutputPath) + "-attachments");
attachmentDir = new File(pdf.getParentFile(), getNameWithoutExtension(pdfOutputPath) + "-attachments");
}

List<AttachmentResource> attachments = EmailConverter.mimeMessageToEmail(message).getAttachments();

Logger.debug("Found %s attachments", attachments.size());

if (attachments.size() > 0) {
attachmentDir.mkdirs();
if (!attachments.isEmpty()) {
boolean successfullyCreatedAttachmentDir = attachmentDir.mkdirs();

if (!successfullyCreatedAttachmentDir) {
throw new IllegalStateException("Failed to create attachment directory");
}

Logger.info("Extract attachments to %s", attachmentDir.getAbsolutePath());
}

Map<String, Integer> attachmentFileNameFrequency = new HashMap<>();
for (int i = 0; i < attachments.size(); i++) {
File attachFile = null;
try {
Logger.debug("Process Attachment %s", i);

AttachmentResource attachmentResource = attachments.get(i);

String attachmentFilename = null;
try {
attachmentFilename = attachmentResource.getDataSource().getName();

// see simple-java-mail MimeMessageParser.java (https://tinyurl.com/45f98j3x)
if (attachmentFilename.equals("UnknownAttachment")) {
attachmentFilename = null;
}
} catch (Exception e) {
// ignore this error
}
String attachmentFilename = getAttachmentFilename(attachmentResource, attachmentFileNameFrequency);

if (!Strings.isNullOrEmpty(attachmentFilename)) {
// sanitize filename
attachmentFilename = FileNameSanitizer.sanitizeFileName(attachmentFilename, '_');

attachFile = new File(attachmentDir, attachmentFilename);
} else {
String extension = "";
Expand All @@ -403,8 +397,6 @@ public String replace(Matcher m) throws Exception {
}

Logger.debug("Saved Attachment %s to %s", i, attachFile.getAbsolutePath());

attachFile = null;
} catch (Exception e) {
Logger.error(
"Could not save attachment to %s. Error: %s", attachFile, Throwables.getStackTraceAsString(e));
Expand All @@ -415,6 +407,44 @@ public String replace(Matcher m) throws Exception {
Logger.info("Conversion finished");
}

private static String getAttachmentFilename(AttachmentResource attachmentResource, Map<String, Integer> attachmentFileNameFrequency) {
String attachmentFilename = null;
try {
attachmentFilename = attachmentResource.getDataSource().getName();
} catch (Exception e) {
// ignore this error
}

if (Strings.isNullOrEmpty(attachmentFilename)) {
return null;
}

// see simple-java-mail MimeMessageParser.java (https://tinyurl.com/45f98j3x)
if (attachmentFilename.equals("UnknownAttachment")) {
return null;
}

// sanitize filename
attachmentFilename = FileNameSanitizer.sanitizeFileName(attachmentFilename, '_');

Integer fileNamesCount = attachmentFileNameFrequency.get(attachmentFilename);
if (fileNamesCount != null) {
String extension = getFileExtension(attachmentFilename);

attachmentFilename = String.format("%s (%d)", getNameWithoutExtension(attachmentFilename), fileNamesCount);

if (!Strings.isNullOrEmpty(extension)) {
attachmentFilename += "." + extension;
}

attachmentFileNameFrequency.put(attachmentFilename, fileNamesCount + 1);
} else {
attachmentFileNameFrequency.put(attachmentFilename, 2);
}

return attachmentFilename;
}

private static String[] getRecipients(final MimeMessage message, String header) throws MessagingException {
String[] recipients = new String[0];
String recipientsRaw = message.getHeader(header, null);
Expand Down
35 changes: 35 additions & 0 deletions src/test/java/cli/MainTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,41 @@ public void main_attachments() throws IOException, URISyntaxException {
}
}

@Test
public void main_attachmentsDuplicateName() throws IOException, URISyntaxException {
File tmpPdf = File.createTempFile("emailtopdf", ".pdf");
String eml = new File(MainTest.class.getClassLoader().getResource("eml/testAttachmentsDuplicateAttachmentName.eml").toURI()).getAbsolutePath();

String[] args = new String[]{"-o", tmpPdf.getAbsolutePath(), "-a", eml};

LogLevel old = Logger.level;
Logger.level = LogLevel.Error;

Main.main(args);

Logger.level = old;

File attachmentDir = new File(tmpPdf.getParent(), Files.getNameWithoutExtension(tmpPdf.getName()) + "-attachments");

List<String> attachments = Arrays.asList(attachmentDir.list());
assertThat(attachments, hasItems("IMAG0144.jpg", "IMAG0144 (2).jpg"));

if (!tmpPdf.delete()) {
tmpPdf.deleteOnExit();
}

for (String fileName : attachments) {
File f = new File(attachmentDir, fileName);
if (!f.delete()) {
f.deleteOnExit();
}
}

if (!attachmentDir.delete()) {
attachmentDir.deleteOnExit();
}
}

@Test
public void main_attachmentsWithSlashInAttachmentName() throws IOException, URISyntaxException {
File tmpPdf = File.createTempFile("emailtopdf", ".pdf");
Expand Down
Loading

0 comments on commit 0be1b54

Please sign in to comment.