1
votes

I have a CSV with records which have break line but I have consider it in same row.

Here is a sample:

• strbarcode,strdescription,strsize,decprice,intcaseqty,deccost,vat,departmentId
• 5015397310361,Yellow/Black Post Complete with Base ,1,0.01,2,0.01,20,18
• 5015397615305,"Hand sanitiser board c/w manual dispenser - 6 image design - Turquoise (300 x 400mm)
• Complete with fixings",1,0.01,0,0.01,20,18
• ,"Barrier cream board c/w with manual dispenser - Hands - Blue (300 x 400mm)
• Complete with fixings",1,0.01,0,0.01,20,18
• ,"Barrier cream board c/w with manual dispenser - Hands - Turquiose (300 x 400mm)
• Complete with fixings",1,0.01,0,0.01,20,18
• ,"Barrier cream board c/w with manual dispenser - 3 image design - Blue (300 x 400mm)
• Complete with fixings",1,0.01,0,0.01,20,18
• 5015397310354,Red/White Post Complete with Base ,1,0.01,2,0.01,20,18

Here is my current code:

private void ImportProductDataFile(string fileName)
{
    try
    {
        pictureBox_loading.Visible = true;
        pictureBox_loading.Refresh();
        Cursor.Current = Cursors.WaitCursor;
        StreamReader sr = new StreamReader(fileName);
        string[] headers = sr.ReadLine().Split(',');
        DataTable dt = new DataTable();
        DataTable dtnew = new DataTable();
        foreach (string header in headers)
        {
            dt.Columns.Add(header);
            dtnew.Columns.Add(header);
        }
        while (!sr.EndOfStream)
        {
            string[] rows = Regex.Split(sr.ReadLine(), ",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)");
            DataRow dr = dt.NewRow();
            for (int i = 0; i < headers.Length; i++)
            {
                if (rows.Length > 0 && i >= (rows.Length))
                {
                    dr[i] = "";
                }
                else
                {
                    dr[i] = rows[i];
                }
            }
            dt.Rows.Add(dr);
        }
        dt.AcceptChanges();
        var emptyRows = dt.Select().Where(
               row => dt.Columns.Cast<DataColumn>().All(column => string.IsNullOrEmpty(row[column].ToString()))).ToArray();
        Array.ForEach(emptyRows, x => x.Delete());
        dt.AcceptChanges();
        if (dt.Columns.Contains("departmentId"))
        {
            dt.Columns.Remove("departmentId");
            dt.AcceptChanges();
        }
        if (dtnew.Columns.Contains("departmentId"))
        {
            dtnew.Columns.Remove("departmentId");
            dtnew.AcceptChanges();
        }
        //dt = dt.AsEnumerable().GroupBy(r => new { strbarcode = r.Field<string>("strbarcode")}).Select(g => g.Last()).CopyToDataTable();
        dt = dt.AsEnumerable()
             .GroupBy(r => r.Field<string>("strbarcode").Trim(),
             (key, g) => g.OrderByDescending(
                 y =>
                 {
                     try
                     {
                         return Convert.ToDecimal(y.Field<string>("decprice"));
                     }
                     catch (Exception)
                     {
                         return Decimal.Zero;
                     }
                 }
             ).First()).CopyToDataTable();
        dt.AcceptChanges();
        //add data dt to dtnew 
        DataRow Rownew;
        foreach (DataRow row in dt.Rows)//dt  as sourcetable and dt new as destination  table
        {
            Rownew = dtnew.NewRow();
            //if (row["strbarcode"].ToString().Trim() == "")
            //{
            //    continue;
            //}
            Rownew["strbarcode"] = row["strbarcode"].ToString().Trim() != "" ? row["strbarcode"].ToString() : "";
            Rownew["strdescription"] = row["strdescription"].ToString().Trim() != "" ? row["strdescription"].ToString().Replace(",", "") : "";
            Rownew["strsize"] = row["strsize"].ToString() != "" ? row["strsize"].ToString() : "";
            Rownew["decprice"] = row["decprice"].ToString().Trim() != "" ? objUtility.ToDecimal(row["decprice"].ToString()) : 0;
            Rownew["intcaseqty"] = row["intcaseqty"].ToString().Trim() != "" ? objUtility.ToInt(row["intcaseqty"].ToString()) : 0;
            Rownew["deccost"] = row["deccost"].ToString().Trim() != "" ? objUtility.ToDecimal(row["deccost"].ToString()) : 0;
            Rownew["vat"] = row["vat"].ToString().Trim() != "" ? objUtility.ToDecimal(row["vat"].ToString()) : 0;
            dtnew.Rows.Add(Rownew);
        }
        dtnew.AcceptChanges();
        sr.Close();
        //create use type table and stored procedure 
        string strStatus = OjbDataAccess.UpdateDBForImportProductSP();
        if (strStatus == "success")
        {
            //inserting and updating data in plof database 
            string strMsg = OjbDataAccess.ImportProdcttosuperplofInsertUpdate(dtnew);
            pictureBox_loading.Visible = false;
            Cursor.Current = Cursors.Default;
            MessageBox.Show(strMsg, "Data Import Status", MessageBoxButtons.OK, MessageBoxIcon.Information);
        }
        else
        {
            pictureBox_loading.Visible = false;
            Cursor.Current = Cursors.Default;
            MessageBox.Show(strStatus, "Data Import Status", MessageBoxButtons.OK, MessageBoxIcon.Information);
        }
    }
    catch (Exception ex)
    {
        pictureBox_loading.Visible = false;
        Cursor.Current = Cursors.Default;
        txtFile.Text = string.Empty;
        btnImportFile.Enabled = true;
        MessageBox.Show("Please enter valid CSV file");
        WriteErrorLog("=============================================================================" + Environment.NewLine);
        WriteErrorLog("Product CSV file is not imported  due to some error in import file." + Environment.NewLine);
        WriteErrorLog(ex.ToString() + Environment.NewLine);
        WriteErrorLog("=============================================================================" + Environment.NewLine);
        return;
    }
    this.Dispose();
}
2
Maybe use a standards-compliant CSV library that knows how to deal with quoting instead of rolling your own?AKX
Instead of inventing the wheel unless you want to make one, take a look at FileHelpersCsvHelperFlatFilesNugetMustHaves:CSVOlivier Rogier
If you absolutely want to implement your own CSV reader, I suggest using tokens instead of Regex.Sebastian Grunow

2 Answers

0
votes

If the structure of your text file is always the same, a single regular expression can parse the file:

    Regex lineExp = new Regex(@"(?<strbarcode>[0-9]+)?,((?<strdescription>[^,""]+)|""(?<strdescription>[^""]+)""),(?<strsize>[0-9]+),(?<decprice>[0-9.]+),(?<intcaseqty>[0-9]+),(?<deccost>[0-9.]+),(?<vat>[0-9]+),(?<departmentId>[0-9]+)([\r\n]+|$)", RegexOptions.Singleline | RegexOptions.Compiled);
    Regex lineBreakExp = new Regex(@"[\r\n+]", RegexOptions.Singleline | RegexOptions.Compiled);

    string data = @"strbarcode,strdescription,strsize,decprice,intcaseqty,deccost,vat,departmentId
5015397310361,Yellow/Black Post Complete with Base ,1,0.01,2,0.01,20,18
5015397615305,""Hand sanitiser board c/w manual dispenser - 6 image design - Turquoise (300 x 400mm)
Complete with fixings"",1,0.01,0,0.01,20,18
,""Barrier cream board c/w with manual dispenser - Hands - Blue (300 x 400mm)
Complete with fixings"",1,0.01,0,0.01,20,18
,""Barrier cream board c/w with manual dispenser - Hands - Turquiose (300 x 400mm)
Complete with fixings"",1,0.01,0,0.01,20,18
,""Barrier cream board c/w with manual dispenser - 3 image design - Blue (300 x 400mm)
Complete with fixings"",1,0.01,0,0.01,20,18
5015397310354,Red/White Post Complete with Base ,1,0.01,2,0.01,20,18
5015397310361,""YProduct with delimiter, so take care"",1,0.01,2,0.01,20,18
5015397310361,Product with incalid strsize will be ignored,a,0.01,2,0.01,20,18
5015397310361,Last line ends not with CRLF,3,0.01,2,0.01,20,18";

    var matches = lineExp.Matches(data);
    int i = 1;
    foreach (Match m in matches)
    {
        Console.Write($"Item {i++}\t");
        Console.Write($"{m.Groups["strbarcode"].Value}\t");
        Console.Write($"{lineBreakExp.Replace(m.Groups["strdescription"].Value, " ")}\t");
        Console.Write($"{m.Groups["strsize"].Value}\t");
        Console.Write($"{m.Groups["decprice"].Value}\t");
        Console.Write($"{m.Groups["intcaseqty"].Value}\t");
        Console.Write($"{m.Groups["deccost"].Value}\t");
        Console.Write($"{m.Groups["vat"].Value}\t");
        Console.WriteLine(m.Groups["departmentId"].Value);
    }

Output:

Item 1  5015397310361   Yellow/Black Post Complete with Base    1   0.01    2   0.01    20  18
Item 2  5015397615305   Hand sanitiser board c/w manual dispenser - 6 image design - Turquoise (300 x 400mm)  Complete with fixings 1   0.01    0   0.01    20  18
Item 3      Barrier cream board c/w with manual dispenser - Hands - Blue (300 x 400mm)  Complete with fixings   1   0.01    0   0.01    20  18
Item 4      Barrier cream board c/w with manual dispenser - Hands - Turquiose (300 x 400mm)  Complete with fixings  1   0.01    0   0.01    20  18
Item 5      Barrier cream board c/w with manual dispenser - 3 image design - Blue (300 x 400mm)  Complete with fixings  1   0.01    0   0.01    20  18
Item 6  5015397310354   Red/White Post Complete with Base   1   0.01    2   0.01    20  18
Item 7  5015397310361   YProduct with delimiter, so take care   1   0.01    2   0.01    20  18
Item 8  5015397310361   Last line ends not with CRLF    3   0.01    2   0.01    20  18
0
votes

I maintain a couple libraries that might help you: Sylvan.Data.Csv and Sylvan.Data. The Csv library provides CsvDataReader which handles parsing CSV data with quoted fields with delimiters and new lines such as in your file. The Sylvan.Data library (currently prerelease, but available on nuget) provides the Schema type which can be used to apply a strongly typed schema to your CSV data. Together, these libraries make it very easy to load a DataTable with strongly-typed data from a CSV file.

// define a schema for your csv.
var schema = Schema.Parse("strbarcode,strdescription,strsize,decprice:decimal,intcaseqty:int,deccost:decimal,vat:int,departmentId:int");
// provide the schema when constructing the CsvDataReader
var options = new CsvDataReaderOptions { Schema = new CsvSchema(schema) };
var data = CsvDataReader.Create("data.csv", options);
var dt = new DataTable();
// Load the data table with the csv data
dt.Load(data);

I don't have any documentation for the Schema.Parse syntax, but all of the .NET primitive types are supported. You can also remap column names via the schema if you desire using ">" notation: "strbarcode>BarCode,strdescription>Description,strsize>Size,decprice>Price:decimal,intcaseqty>CaseQuantity:int,deccost>Cost:decimal,vat>Vat:int,departmentId>DepartmentId:int"

Sylvan.Data.Csv also happens to be the fastest CSV parser in the .NET ecosystem, so if you have huge files to deal with you won't find a faster solution.