2
votes

I am trying to use a google apps script to dump emails that I have under a specific label to a Google Docs Spreadsheet. I want to list each email message body in a thread as a separate row, such that if a thread has a chain of 9 messages, each one is listed separately (without the chain) in a row.

I have managed get it to where each message body + its entire previous thread is stored, in one cell and I can get the entire thread in a cell. But this is not what I want.

This code will put the entire body of the thread in a row.

function getEmails() {
  clearCanvas();
  var label = GmailApp.getUserLabelByName(LabelWithEmails);
  var threads = label.getThreads();

  // var threads = GmailApp.getInboxThreads(0, 50);
  var row = getFirstRow() + 1;
  var firstmessageId = getfirstmsgid();
  UserProperties.setProperty("firstmsgid", firstmessageId);
  spreadsheet.toast("Loading emails..Please wait. It could take few seconds", "Status", -1);

  var messages = GmailApp.getMessagesForThreads(threads); //gets messages in 2D array
  for (i = 0; i < 5; ++i)
  {
   try {
       j = messages[i].length; //to process most recent conversation in thread (contains messages from previous conversations as well, reduces redundancy
       messageBody = messages[i][j-1].getBody(); //gets body of message in HTML
       messageSubject = messages[i][j-1].getSubject();
       messageDate = messages[i][j-1].getDate();
       messageFrom = messages[i][j-1].getFrom();

       Logger.log("Message Subject:" + messageSubject);
       Logger.log("Message Date:" + messageDate);
       Logger.log("Message From:" + messageFrom);

       sheet.getRange(row, 1).setValue(messageFrom);
       sheet.getRange(row, 2).setValue(messageSubject);
       sheet.getRange(row, 3).setValue(messageDate);
       sheet.getRange(row, 4).setValue(getTextFromHtml(messageBody));
       row++;
    } catch (error) {
        spreadsheet.toast("Error Occured. Report it @ http://techawakening.org/", "Status", -1);
    }

    if (i == threads.length - 1) {
        spreadsheet.toast("Successfully loaded emails.", "Status", -1);
        spreadsheet.toast("Now mark emails to be forwarded by changing the background color of the cells to green. Then select Forward->Forward selected emails", "Status", -1);
    }

  }
}

This will put each message body including it's previous thread/message chain in a row.

function getEmails() {
clearCanvas();
var label = GmailApp.getUserLabelByName(LabelWithEmails);
var threads = label.getThreads();

// var threads = GmailApp.getInboxThreads(0, 50);
var row = getFirstRow() + 1;
var firstmessageId = getfirstmsgid();
UserProperties.setProperty("firstmsgid", firstmessageId);
spreadsheet.toast("Loading emails..Please wait. It could take few seconds", "Status", -1);

var messages = GmailApp.getMessagesForThreads(threads); //gets messages in 2D array
// messages.length
// jknipp - working except it keeps the thread chain
for (var i = 0; i < threads.length; i++) {
    try {
        var messages = threads[i].getMessages();
        for (var m = 0; m < messages.length; m++) {
            sheet.getRange(row, 1).setValue(messages[m].getFrom());
            sheet.getRange(row, 2).setValue(messages[m].getSubject());
            sheet.getRange(row, 3).setValue(messages[m].getDate());
            sheet.getRange(row, 4).setValue(getTextFromHtml(messages[m].getBody()));

            row++;
        }
    } catch (error) {
        spreadsheet.toast("Error Occured. Report it @ http://techawakening.org/", "Status", -1);
    }

    if (i == threads.length - 1) {
        spreadsheet.toast("Successfully loaded emails.", "Status", -1);
        spreadsheet.toast("Now mark emails to be forwarded by changing the background color of the cells to green. Then select Forward->Forward selected emails", "Status", -1);
    }
  }
}

References https://stackoverflow.com/a/11034461/39803

2

2 Answers

1
votes

This is a case of "garbage-in, garbage-out". When you're using the gmail app in thread view, Google's servers are parsing the body of emails and cleverly hiding the bodies of old messages. This makes it appear that the latest message in a thread consists of only the new lines of that message, and that you have a "chain" of smaller messages.

This is an illusion. The last message in a thread typically contains the new content first, followed by the content of all previous message bodies, as a single message body. Different email services and clients use different patterns for this.

You would need to be able to identify most or all of the ways that the content from previous messages in a thread are represented in the current message body, and use that to extract only the new content.

1
votes

I was able to pull out only the body of the emails by identifying where the 'previous conversation' started.

var sheet = SpreadsheetApp.getActiveSheet();
var spreadsheet = SpreadsheetApp.getActiveSpreadsheet();
var LabelWithEmails = sheet.getRange(3, 2).getValue();

function getEmails() {
    clearCanvas();
    var label = GmailApp.getUserLabelByName(LabelWithEmails);
    var threads = label.getThreads();

    var row = getFirstRow() + 1;
    var firstmessageId = getfirstmsgid();
    UserProperties.setProperty("firstmsgid", firstmessageId);
    spreadsheet.toast("Loading emails..Please wait. It could take few seconds", "Status", -1);

    var messages = GmailApp.getMessagesForThreads(threads); //gets messages in 2D array

    for (var i = 0; i < 2;/*threads.length;*/ i++) {
        try {
            var messages = threads[i].getMessages();

            for (var m = 0; m < messages.length; m++) {
                var msg = messages[m];
                var isForward = msg.getBody().search(/---------- Forwarded message/i) != -1;

                if(!isValidMessage) continue;

                sheet.getRange(row, 1).setValue(msg.getFrom());
                sheet.getRange(row, 2).setValue(msg.getTo() + ";" + msg.getCc() + ";" + msg.getBcc());
                sheet.getRange(row, 3).setValue(msg.getSubject());
                sheet.getRange(row, 4).setValue(msg.getDate());

                if(!isForward) {
                    // Get only this messages body, ignore the previous chain
                    var body = msg.getBody();
                    var firstIndexOfThread = body.search(/gmail_quote/i); 
                    body = (firstIndexOfThread == -1) ? body : body.substring(0, firstIndexOfThread);

                        sheet.getRange(row, 5).setValue(getTextFromHtml(body));

                } else {
                    // Use the whole body if its a forward
                    sheet.getRange(row, 5).setValue(getTextFromHtml(msg.getBody()));
                    sheet.getRange(row, 6).setValue("***");
                }

            row++;
        }
    } catch (error) {
            Logger.log(error);
        spreadsheet.toast("Error Occured - please see the logs.", "Status", -1);
    }

    if (i == threads.length - 1) {
        spreadsheet.toast("Successfully loaded emails.", "Status", -1);
    }
  }
}