I am building a parser for a custom pipe delimited file format and I am finding my code to be very bulky, could someone suggest better methods of parsing this data?
The file's data is broken down by a line delimited by a pipe (|), each line starts with a record type, followed by an ID, followed by different number of columns after.
Ex: CDI|11111|OTHERDATA|somemore|other
CEX001|123131|DATA|data
CCC|123131|DATA|data1|data2|data3|data4|data5|data6
. I am splitting by pipe, then grabbing the first two columns, and then using a switch checking the first line and calling a function that will parse the remaining into an object purpose built for that record type. I would really like a more elegant method.
public Dictionary<string, DataRecord> Parse()
{
var data = new Dictionary<string, DataRecord>();
var rawDataDict = new Dictionary<string, List<List<string>>>();
foreach (var line in File.ReadLines(_path))
{
var split = line.Split('|');
var Id = split[1];
if (!rawDataDict.ContainsKey(Id))
{
rawDataDict.Add(Id, new List<List<string>> {split.ToList()});
}
else
{
rawDataDict[Id].Add(split.ToList());
}
}
rawDataDict.ToList().ForEach(pair =>
{
var key = pair.Key.ToString();
var values = pair.Value;
foreach (var value in values)
{
var recordType = value[0];
switch (recordType)
{
case "CDI":
var cdiRecord = ParseCdi(value);
if (!data.ContainsKey(key))
{
data.Add(key, new DataRecord
{
Id = key, CdiRecords = new List<CdiRecord>() { cdiRecord }
});
}
else
{
data[key].CdiRecords.Add(cdiRecord);
}
break;
case "CEX015":
var cexRecord = ParseCex(value);
if (!data.ContainsKey(key))
{
data.Add(key, new DataRecord
{
Id = key,
CexRecords = new List<Cex015Record>() { cexRecord }
});
}
else
{
data[key].CexRecords.Add(cexRecord);
}
break;
case "CPH":
CphRecord cphRecord = ParseCph(value);
if (!data.ContainsKey(key))
{
data.Add(key, new DataRecord
{
Id = key,
CphRecords = new List<CphRecord>() { cphRecord }
});
}
else
{
data[key].CphRecords.Add(cphRecord);
}
break;
}
}
});
return data;
}