I am struggling with parsing values from a CSV file because of two consecutive double quotes "".
Here's an example of a CSV field I pulled from wikipedia: 1997,Ford,E350,"Super, ""luxurious"" truck"
I have tried to find different ways to account for it.
The result I keep getting is:
"1997"
"Ford"
"E350"
"Super,"
""Super"
" ""luxurious"" truck""
This is my VB.Net function.
Private Function splitCSV(ByVal sLine As String) As List(Of String)
Dim comA As Integer = -1, comB = -1, quotA = -1, quotB = -1, pos = 0
Dim parsed As New List(Of String)
Dim quote As String = """"
Dim comma As String = ","
Dim len As Integer = sLine.Length
Dim first As Boolean = True
comA = sLine.IndexOf(comma, pos) ' Find the next comma.
quotA = sLine.IndexOf(quote, pos) ' Find the next quotation mark.
' This if function works if there is only one field in the given row.
If comA < 0 Then
parsed.Add(False)
Return parsed
End If
While pos < len ' While not at end of the string
comB = sLine.IndexOf(comma, comA + 1) ' Find the second comma
quotB = sLine.IndexOf(quote, quotA + 1) ' Find the second quotation mark
' Looking for the actual second quote mark
' Skips over the double quotation marks.
If quotA > -1 And quotA < comB Then ' If the quotation mark is before the first comma
If Math.Abs(quotA - quotB).Equals(1) Then
Dim tempA As Integer = quotA
Dim tempB As Integer = quotB
' Looking for the actual second quote mark
' Skips over the double quotation marks.
While (Math.Abs(tempA - tempB).Equals(1))
tempA = tempB
If Not tempA.Equals(sLine.LastIndexOf(quote)) Then
tempB = sLine.IndexOf(quote, tempA + 1)
Else
tempA = tempB - 2
End If
End While
quotB = tempB
End If
If quotB < 0 Then ' If second quotation mark does not exist
parsed.Add(False) ' End the function and Return False
Return parsed
End If
parsed.Add(sLine.Substring(quotA + 1, quotB - quotA - 1)) ' Otherwise, add the substring of initial and end quotation marks.
quotA = quotB ' Give quotA the position of quotB
pos = quotB ' Mark the current position
ElseIf comA < comB Then
If first Then ' If it is the first comma in the line,
parsed.Add(sLine.Substring(pos, comA)) ' Parse the first field
first = False ' The future commas will not be considered as the first one.
End If
comB = sLine.IndexOf(comma, comA + 1) ' Find the second comma
If comB > comA Then ' If the second comma exists
parsed.Add(sLine.Substring(comA + 1, comB - comA - 1)) ' Add the substring of the first and second comma.
comA = comB ' Give comA the position of comB
pos = comB ' Mark the current position
End If
ElseIf len > 0 Then ' If the first comma does not exist, as long as sLine has something,
parsed.Add(sLine.Substring(pos + 1, len - pos - 1)) ' Return the substing of position to end of string.
pos = len ' Mark the position at the end to exit out of while loop
End If
End While
Return parsed ' Return parsed list of string
End Function
"James ""Jim"" Doe", 123 Any St, Anytown, AS, 01234- Sometowngeek"this ""string"" that must stay together"- SometowngeekHasFieldsEnclosedInQuotesto true:"1997 Ford E350 Super, "luxurious" truck"(your post has a,inside the quotes after super so it is preserved) - Ňɏssa Pøngjǣrdenlarp