I download a CSV file from another server using perl script. After download I wish to check whether the file contains any corrupted data or not. I tried to use Encode::Detect::Detector to detect encoding but it returns 'undef' in both cases:
- if the string is ASCII or
- if the string is corrupted
So using the below program I can't differentiate between ASCII & Corrupted Data.
use strict;
use Text::CSV;
use Encode::Detect::Detector;
use XML::Simple;
use Encode;
require Encode::Detect;
my @rows;
my $init_file = "new-data-jp-2013-8-8.csv";
my $csv = Text::CSV->new ( { binary => 1 } )
or die "Cannot use CSV: ".Text::CSV->error_diag ();
open my $fh, $init_file or die $init_file.": $!";
while ( my $row = $csv->getline( $fh ) ) {
my @fields = @$row; # get line into array
for (my $i=1; $i<=23; $i++){ # I already know that CSV file has 23 columns
if ((Encode::Detect::Detector::detect($fields[$i-1])) eq undef){
print "the encoding is undef in col".$i.
" where field is ".$fields[$i-1].
" and its length is ".length($fields[$i-1])." \n";
}
else {
my $string = decode("Detect", $fields[$i-1]);
print "this is string print ".$string.
" the encoding is ".Encode::Detect::Detector::detect($fields[$i-1]).
" and its length is ".length($fields[$i-1])."\n";
}
}
}