1
votes

I am trying to write a VBA script to extract information from a text document and tabulate it into corresponding columns. The code is based on https://stackguides.com/questions/51635537/extract-data-from-text-file-into-excel/51636080. I am having an issue doing multiple extractions.

Sample text

Age: 35 
Rank: Lieutenant 
Classification: Volunteer 
Incident date: Jun 22, 1997
Date of death: Jun 22, 1997 
Cause of death: Caught or Trapped 
Nature of death: Burns 
Activity type: Advance Hose Lines/Fire Attack (includes Wildland) 
Emergency duty: Yes 
Duty type: On-Scene Fire 
Fixed property use: Residential 
Memorial fund information:

Age: 18 
Rank: Firefighter 
Classification: Volunteer
Incident date: Jun 16, 1997 
Date of death: Jun 17, 1997 
Cause of death: Struck By 
Nature of death: Trauma 
Activity type: Driving/Operating Vehicle/Apparatus
Emergency duty: Yes 
Duty type: Responding 
Fixed property use: N/A
Memorial fund information:

Output of working code enter image description here

Desired Output enter image description here

Failed code output enter image description here

Problem: VBA code fails after column "F" and does not move to the next row

Working code:

Sub ExtractData()
Dim filename As String, nextrow As Long, MyFolder As String
Dim MyFile As String, text As String, textline As String, filedate As String
Dim filenum As Integer
Dim idx%

MyFolder = "/Users/user/Downloads/test/"
MyFile = Dir(MyFolder & "*.txt")

nextrow = ActiveSheet.Cells(Rows.Count, "A").End(xlUp).Row + 1

Do While MyFile <> ""

    Open (MyFolder & MyFile) For Input As #1

    'nextrow = ActiveSheet.Cells(Rows.Count, "A").End(xlUp).Row + 1

    Do Until EOF(1)
        Line Input #1, textline 'read a line

    idx = InStr(textline, "Age:") ' if has date, set it but not move to the next ROW
    If idx > 0 Then
        ActiveSheet.Cells(nextrow, "A").Value = Right(textline, Len(textline) - InStr(textline, ":") - 1)
    End If

    idx = InStr(textline, "Rank:") ' if has date, set it but not move to the next ROW
    If idx > 0 Then
        ActiveSheet.Cells(nextrow, "B").Value = Right(textline, Len(textline) - InStr(textline, ":") - 1)
    End If

    idx = InStr(textline, "Classification:") ' if has date, set it but not move to the next ROW
    If idx > 0 Then
        ActiveSheet.Cells(nextrow, "C").Value = Right(textline, Len(textline) - InStr(textline, ":") - 1)
    End If

    idx = InStr(textline, "Incident date:") ' if has date, set it but not move to the next ROW
    If idx > 0 Then
        ActiveSheet.Cells(nextrow, "D").Value = Right(textline, Len(textline) - InStr(textline, ":") - 1)
    End If

    idx = InStr(textline, "Date of death:") ' if has date, set it but not move to the next ROW
    If idx > 0 Then
        ActiveSheet.Cells(nextrow, "E").Value = Right(textline, Len(textline) - InStr(textline, ":") - 1)
    End If

    idx = InStr(textline, "Cause of death:")
    If idx > 0 Then
        ActiveSheet.Cells(nextrow, "F").Value = Right(textline, Len(textline) - InStr(textline, ":") - 1)

        nextrow = nextrow + 1 'now move to next row

    End If

Loop
Close #1
MyFile = Dir()

Loop
End Sub

Code that fails

Sub ExtractData()
Dim filename As String, nextrow As Long, MyFolder As String
Dim MyFile As String, text As String, textline As String, filedate As String
Dim filenum As Integer
Dim idx%

MyFolder = "/Users/josephheaton/Downloads/test/"
MyFile = Dir(MyFolder & "*.txt")

nextrow = ActiveSheet.Cells(Rows.Count, "A").End(xlUp).Row + 1

Do While MyFile <> ""

    Open (MyFolder & MyFile) For Input As #1

    'nextrow = ActiveSheet.Cells(Rows.Count, "A").End(xlUp).Row + 1

    Do Until EOF(1)
    Line Input #1, textline 'read a line

    idx = InStr(textline, "Age:") ' if has date, set it but not move to the next ROW
    If idx > 0 Then
        ActiveSheet.Cells(nextrow, "A").Value = Right(textline, Len(textline) - InStr(textline, ":") - 1)
    End If

    idx = InStr(textline, "Rank:")
    If idx > 0 Then
        ActiveSheet.Cells(nextrow, "B").Value = Right(textline, Len(textline) - InStr(textline, ":") - 1)
    End If

    idx = InStr(textline, "Classification:")
    If idx > 0 Then
        ActiveSheet.Cells(nextrow, "C").Value = Right(textline, Len(textline) - InStr(textline, ":") - 1)
    End If

    idx = InStr(textline, "Incident date:") 
    If idx > 0 Then
        ActiveSheet.Cells(nextrow, "D").Value = Right(textline, Len(textline) - InStr(textline, ":") - 1)
    End If

    idx = InStr(textline, "Date of death:")
    If idx > 0 Then
        ActiveSheet.Cells(nextrow, "E").Value = Right(textline, Len(textline) - InStr(textline, ":") - 1)
    End If

    idx = InStr(textline, "Cause of death:")
    If idx > 0 Then
        ActiveSheet.Cells(nextrow, "F").Value = Right(textline, Len(textline) - InStr(textline, ":") - 1)
    End If

    idx = InStr(textline, "Nature of death:")
    If idx > 0 Then
        ActiveSheet.Cells(nextrow, "G").Value = Right(textline, Len(textline) - InStr(textline, ":") - 1)
    End If

    idx = InStr(textline, "Activity:")
    If idx > 0 Then
        ActiveSheet.Cells(nextrow, "H").Value = Right(textline, Len(textline) - InStr(textline, ":") - 1)
    End If

    idx = InStr(textline, "Emergency:")
    If idx > 0 Then
        ActiveSheet.Cells(nextrow, "I").Value = Right(textline, Len(textline) - InStr(textline, ":") - 1)
    End If

    idx = InStr(textline, "Duty:")
    If idx > 0 Then
        ActiveSheet.Cells(nextrow, "J").Value = Right(textline, Len(textline) - InStr(textline, ":") - 1)
    End If

    idx = InStr(textline, "Property type:")
    If idx > 0 Then
        ActiveSheet.Cells(nextrow, "L").Value = Right(textline, Len(textline) - InStr(textline, ":") - 1)
    End If


    idx = InStr(textline, "Memorial fund info:")
    If idx > 0 Then
        ActiveSheet.Cells(nextrow, "L").Value = Right(textline, Len(textline) - InStr(textline, ":") - 1)

        nextrow = nextrow + 1 'now move to next row

    End If

Loop
Close #1
MyFile = Dir()

Loop
End Sub
1

1 Answers

0
votes

Your "not working" code is actually writting out all the data. But your nextrow logic is flawed, so some data is being overwritten.

Rather than try to fix that code, I would suggest an alternative method

Sub ExtractData()
    Dim NextRow As Long
    Dim MyFolder As String
    Dim MyFile As String
    Dim TextLine As String
    Dim idx As Variant
    Dim FileNum As Integer
    Dim ws As Worksheet
    Dim Tags() As Variant
    Dim Tag As Variant
    Dim Results As Variant
    
    Set ws = ActiveSheet
    
    MyFolder = "C:/Users/user/Downloads/test/"
    MyFile = Dir(MyFolder & "*.txt")
    
    
    ' Create an array of item tags, and poutput column numbers
    Tags = Array( _
      Array("Age:", 1), _
      Array("Rank:", 2), _
      Array("Classification:", 3), _
      Array("Incident date:", 4), _
      Array("Date of death:", 5), _
      Array("Cause of death:", 6), _
      Array("Nature of death:", 7), _
      Array("Activity type:", 8), _
      Array("Emergency duty:", 9), _
      Array("Duty type:", 10), _
      Array("Fixed property use:", 11), _
      Array("Memorial fund information:", 12))
      
    FileNum = FreeFile ' don't assume file 1 is available
    NextRow = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row + 1
    ReDim Results(1 To 1, 1 To UBound(Tags) - LBound(Tags) + 1)
    Debug.Print "++++++++++++++++++++++++++++++++"
    Debug.Print "Extracting data", Now()
    Do While MyFile <> ""
        Open MyFolder & MyFile For Input As #FileNum
        Debug.Print "File:", MyFolder & MyFile
        
        Do Until EOF(FileNum)
            Line Input #FileNum, TextLine 'read a line
            If TextLine = vbNullString Then
                ' Blank line indicates end of record
                ' write out record and clear Results array
                ws.Cells(NextRow, 1).Resize(1, UBound(Results, 2)).Value = Results
                ReDim Results(1 To 1, 1 To UBound(Tags) - LBound(Tags) + 1)
                NextRow = NextRow + 1
            Else
                ' get Tag, text up to :
                Tag = Left$(TextLine, InStr(TextLine, ":"))
                idx = Application.VLookup(Tag, Tags, 2, 0)
                ' look up Tag in Tags, return output column number
                If Not IsError(idx) Then
                    Results(1, idx) = Trim$(Mid$(TextLine, Len(Tag) + 1))
                Else
                    ' Unrecognised Field. Review in Immediate window after running code
                    Debug.Print "Unrecognised data:", TextLine
                End If
            End If
            DoEvents
        Loop
        ' add final record
        If Results(1, 1) <> vbNullString Then
            ws.Cells(NextRow, 1).Resize(1, UBound(Results, 2)).Value = Results
            NextRow = NextRow + 1
        End If
        Close #FileNum
        MyFile = Dir()
    Loop
    Debug.Print "Completed        ", Now()
End Sub