1
votes

When I want to change a column from factor to numeric with "as.numeric()", the final numbers are totally different from what I imported.

It is a problem due to the comma? Really strange...

Thanks!

dput(datatest) structure(list(HOURS.at.sea = structure(c(261L, 84L, 83L, 260L, 307L, 292L, 252L, 72L, 59L, 343L, 244L, 78L, 56L, 256L, 9L, 269L, 291L, 254L, 69L, 65L, 267L, 283L, 1L, 80L, 169L, 1L, 115L, 67L, 75L, 3L, 309L, 59L, 33L, 52L, 75L, 37L, 51L, 75L, 22L, 2L, 49L, 83L, 21L, 2L, 53L, 70L, 19L, 3L, 57L, 70L, 22L, 3L, 58L, 78L, 36L, 49L, 2L, 66L, 115L, 52L, 72L, 114L, 57L, 78L, 116L, 56L, 68L, 116L, 55L, 70L, 327L, 6L, 257L, 2L, 107L, 176L, 182L, 4L, 114L, 35L, 46L, 3L, 67L, 34L, 46L, 3L, 79L, 40L, 50L, 69L, 38L, 52L, 2L, 69L, 38L, 85L, 49L, 70L, 64L, 70L, 3L, 1L, 206L, 231L, 58L, 55L, 109L, 212L, 220L, 53L, 56L, 125L, 3L, 5L, 214L, 231L, 57L, 55L, 107L, 217L, 228L, 52L, 57L, 105L, 210L, 231L, 56L, 55L, 105L, 215L, 232L, 55L, 54L, 90L, 210L, 230L, 58L, 54L, 108L, 218L, 228L, 57L, 56L, 96L, 213L, 228L, 55L, 57L, 106L, 217L, 232L, 58L, 73L, 110L, 217L, 233L, 57L, 59L, 117L, 7L, 222L, 233L, 59L, 56L, 107L, 219L, 231L, 57L, 56L, 109L, 221L, 233L, 56L, 57L, 106L, 1L, 1L, 247L, 317L, 159L, 316L, 229L, 306L, 129L, 120L, 29L, 74L, 287L, 12L, 151L, 109L, 68L, 125L, 270L, 1L, 56L, 224L, 180L, 76L, 281L, 86L, 79L, 258L, 83L, 1L, 229L, 23L, 132L, 56L, 59L, 76L, 115L, 110L, 28L, 235L, 226L, 16L, 134L, 55L, 57L, 66L, 124L, 117L, 28L, 240L, 1L, 52L, 335L, 32L, 59L, 186L, 71L, 4L, 7L, 5L, 39L, 59L, 2L, 3L, 4L, 293L, 4L, 66L, 99L, 110L, 54L, 2L, 4L, 148L, 221L, 322L, 31L, 170L, 1L, 286L, 162L, 336L, 129L, 138L, 70L, 71L, 125L, 241L, 277L, 6L, 8L, 76L, 84L, 320L, 339L, 117L, 137L, 69L, 80L, 1L, 243L, 92L, 139L, 149L, 54L, 67L, 262L, 1L, 240L, 78L, 73L, 1L, 187L, 48L, 65L, 97L, 263L, 332L, 187L, 103L, 29L, 79L, 266L, 334L, 188L, 303L, 96L, 264L, 313L, 184L, 45L, 66L, 105L, 263L, 330L, 184L, 53L, 24L, 96L, 97L, 272L, 331L, 185L, 47L, 68L, 102L, 265L, 342L, 187L, 97L, 49L, 58L, 264L, 333L, 195L, 96L, 31L, 181L, 265L, 1L, 236L, 79L, 270L, 89L, 246L, 45L, 207L, 166L, 79L, 41L, 177L, 55L, 128L, 1L, 152L, 205L, 57L, 168L, 195L, 284L, 94L, 118L, 340L, 82L, 123L, 57L, 73L, 118L, 58L, 227L, 18L, 51L, 323L, 178L, 49L, 120L, 26L, 59L, 52L, 67L, 67L, 114L, 234L, 1L, 15L, 201L, 163L, 42L, 72L, 47L, 10L, 66L, 106L, 224L, 17L, 189L, 44L, 200L, 164L, 45L, 69L, 47L, 67L, 102L, 275L, 14L, 190L, 30L, 199L, 161L, 47L, 71L, 52L, 276L, 32L, 199L, 161L, 48L, 73L, 46L, 70L, 103L, 273L, 23L, 271L, 47L, 74L, 46L, 57L, 308L, 23L, 1L, 66L, 338L, 242L, 155L, 72L, 249L, 57L, 80L, 94L, 225L, 341L, 104L, 1L, 116L, 44L, 5L, 90L, 293L, 50L, 238L, 84L, 46L, 216L, 46L, 278L, 325L, 57L, 314L, 48L, 108L, 44L, 50L, 59L, 1L, 289L, 49L, 42L, 295L, 53L, 304L, 3L, 49L, 59L, 1L, 74L, 15L, 84L, 26L, 244L, 129L, 80L, 82L, 158L, 126L, 45L, 79L, 65L, 344L, 258L, 106L, 81L, 75L, 95L, 4L, 144L, 140L, 25L, 83L, 76L, 1L, 209L, 99L, 42L, 79L, 312L, 1L, 27L, 238L, 76L, 70L, 119L, 115L, 69L, 305L, 1L, 318L, 139L, 187L, 84L, 67L, 150L, 44L, 131L, 73L, 172L, 123L, 57L, 73L, 251L, 1L, 122L, 254L, 28L, 5L, 4L, 4L, 4L, 6L, 156L, 247L, 88L, 248L, 81L, 78L, 42L, 258L, 87L, 165L, 50L, 173L, 80L, 98L, 41L, 72L, 108L, 102L, 48L, 74L, 102L, 223L, 103L, 48L, 5L, 5L, 67L, 98L, 226L, 104L, 57L, 4L, 3L, 3L, 2L, 72L, 98L, 228L, 105L, 48L, 3L, 4L, 4L, 4L, 78L, 94L, 234L, 103L, 2L, 26L, 3L, 3L, 4L, 4L, 3L, 3L, 60L, 98L, 84L, 218L, 11L, 45L, 1L, 158L, 175L, 95L, 4L, 65L, 112L, 311L, 216L, 146L, 1L, 131L, 217L, 55L, 302L, 60L, 202L, 66L, 56L, 167L, 79L, 179L, 142L, 2L, 54L, 145L, 1L, 196L, 60L, 70L, 118L, 23L, 91L, 42L, 197L, 50L, 67L, 77L, 83L, 44L, 66L, 140L, 44L, 83L, 337L, 1L, 52L, 319L, 1L, 47L, 62L, 50L, 234L, 81L, 57L, 254L, 56L, 61L, 45L, 232L, 80L, 57L, 253L, 121L, 234L, 77L, 51L, 241L, 49L, 63L, 44L, 234L, 81L, 54L, 250L, 2L, 1L, 153L, 338L, 107L, 141L, 68L, 55L, 128L, 225L, 100L, 171L, 193L, 194L, 160L, 338L, 112L, 139L, 3L, 68L, 2L, 65L, 130L, 49L, 139L, 1L, 50L, 310L, 76L, 279L, 141L, 1L, 1L, 74L, 251L, 204L, 1L, 217L, 84L, 198L, 43L, 54L, 298L, 268L, 26L, 208L, 49L, 51L, 297L, 226L, 92L, 199L, 42L, 54L, 299L, 267L, 26L, 199L, 31L, 58L, 298L, 218L, 96L, 203L, 43L, 58L, 299L, 267L, 288L, 25L, 183L, 42L, 315L, 92L, 199L, 32L, 56L, 300L, 1L, 105L, 103L, 96L, 90L, 76L, 248L, 290L, 294L, 274L, 94L, 56L, 133L, 136L, 72L, 67L, 67L, 115L, 244L, 174L, 112L, 84L, 68L, 143L, 240L, 1L, 220L, 147L, 209L, 117L, 157L, 321L, 113L, 138L, 13L, 154L, 118L, 139L, 111L, 55L, 1L, 102L, 51L, 80L, 208L, 241L, 70L, 96L, 80L, 259L, 103L, 70L, 223L, 326L, 57L, 285L, 53L, 324L, 67L, 255L, 105L, 221L, 105L, 76L, 53L, 54L, 83L, 84L, 247L, 91L, 68L, 106L, 30L, 324L, 59L, 65L, 296L, 79L, 20L, 79L, 301L, 52L, 70L, 1L, 135L, 245L, 53L, 217L, 247L, 83L, 75L, 2L, 78L, 242L, 192L, 249L, 56L, 77L, 99L, 82L, 75L, 65L, 241L, 6L, 63L, 112L, 82L, 80L, 49L, 239L, 191L, 101L, 1L, 14L, 222L, 71L, 74L, 98L, 215L, 325L, 282L, 117L, 325L, 226L, 97L, 91L, 92L, 57L, 72L, 246L, 1L, 211L, 53L, 70L, 131L, 239L, 237L, 217L, 53L, 70L, 93L, 242L, 329L, 43L, 92L, 104L, 127L, 50L, 306L, 328L, 256L, 76L, 49L, 280L, 1L, 201L, 103L, 73L, 4L, 4L, 4L, 242L, 125L), .Label = c("#VALUE!", "0,1", "0,2", "0,3", "0,4", "0,5", "0,6", "0,8", "1,0", "1,3", "10,1", "10,2", "10,3", "10,4", "10,5", "10,8", "10,9", "1023,7", "107,5", "108,5", "108,9", "109,2", "11,0", "11,1", "11,2", "11,3", "11,4", "11,5", "11,6", "11,7", "11,8", "11,9", "110,1", "110,2", "110,4", "110,7", "110,8", "110,9", "111,1", "115,8", "12,0", "12,1", "12,2", "12,3", "12,4", "12,5", "12,6", "12,7", "12,8", "12,9", "13,0", "13,1", "13,2", "13,3", "13,4", "13,5", "13,6", "13,7", "13,8", "13,9", "132,9", "133,8", "133,9", "138,1", "14,0", "14,1", "14,2", "14,3", "14,4", "14,5", "14,6", "14,7", "14,8", "14,9", "15,0", "15,1", "15,2", "15,3", "15,4", "15,5", "15,6", "15,7", "15,8", "15,9", "156,1", "1569,2", "157,3", "157,6", "158,5", "16,0", "16,1", "16,2", "16,3", "16,4", "16,5", "16,6", "16,7", "16,8", "16,9", "162,4", "165,6", "17,0", "17,1", "17,2", "17,3", "17,4", "17,5", "17,6", "17,7", "17,8", "17,9", "18,0", "18,2", "18,3", "18,4", "18,5", "18,6", "18,7", "18,8", "18,9", "180,9", "181,4", "19,0", "19,1", "19,3", "19,4", "19,5", "19,6", "19,7", "19,8", "19,9", "2,1", "2,3", "2,5", "2,7", "2,8", "20,0", "20,1", "20,3", "20,5", "20,7", "208,1", "21,0", "21,1", "21,3", "21,5", "21,6", "21,8", "2101,8", "22,0", "22,1", "22,4", "22,5", "22,6", "225,8", "23,1", "23,2", "23,3", "23,6", "23,9", "24,3", "24,9", "25,3", "25,4", "253,7", "254,9", "255,3", "26,9", "27,7", "277,6", "278,9", "279,3", "28,0", "288,3", "29,6", "297,3", "299,6", "3,2", "3,3", "3,8", "30,3", "303,8", "32,1", "32,4", "32,5", "32,6", "32,7", "32,9", "322,0", "322,2", "326,4", "326,7", "328,9", "329,4", "33,1", "33,2", "33,3", "33,4", "33,5", "33,6", "33,7", "33,8", "33,9", "331,4", "34,0", "34,4", "34,5", "34,7", "34,9", "35,0", "35,3", "35,5", "35,6", "35,7", "35,8", "35,9", "36,0", "36,1", "36,2", "36,3", "36,4", "36,5", "36,6", "36,7", "36,8", "36,9", "37,0", "37,1", "37,2", "37,3", "37,4", "37,5", "37,6", "37,8", "37,9", "370,8", "375,6", "38,0", "38,1", "38,2", "38,3", "38,4", "38,5", "38,6", "38,7", "38,8", "38,9", "39,0", "39,1", "39,2", "39,3", "39,4", "39,6", "39,7", "39,8", "39,9", "396,2", "40,1", "40,2", "40,3", "40,7", "40,9", "41,3", "41,4", "41,6", "41,7", "41,8", "41,9", "42,7", "42,8", "421,0", "43,5", "43,7", "433,0", "44,2", "446,1", "448,9", "449,3", "45,4", "45,8", "47,7", "472,8", "49,2", "492,3", "494,9", "497,9", "507,6", "520,8", "54,4", "541,6", "575,4", "577,4", "6,6", "6,8", "60,0", "61,2", "61,4", "61,5", "61,6", "61,7", "61,9", "619,1", "62,3", "62,9", "63,0", "63,5", "64,0", "64,1", "64,6", "642,8", "646,2", "65,8", "663,5", "666,2", "671,6", "7,2", "7,8", "708,7", "711,6", "8,1", "8,2", "8,3", "8,5", "8,7", "8,8", "8,9", "804,0", "829,0", "856,0", "87,0", "87,4", "87,5", "88,1", "88,3", "9,0", "9,1", "9,3", "9,5", "9,7", "9,8", "9,9", "90,9", "928,2", "975,8"), class = "factor")), .Names = "HOURS.at.sea", class = "data.frame", row.names = c(NA, -913L))

1
Comments are not for extended discussion; this conversation has been moved to chat.Taryn♦

1 Answers

8
votes

If the commas are thousands separators, a common problem when users save a formatted Excel spreadsheet as *.csv and then try to import it into R, then you have two problems. Commas are not allowed in numbers in R (or they are interpreted as decimal point - depends on your locale setting) so, e.g. as.numeric("1,000") will return NA, not 1000. You have to get rid of the commas and also convert from factor to numeric.

a <- factor(c("10","20","30","40","50"))
as.numeric(a)               # returns the factor codes!!
# [1] 1 2 3 4 5
as.numeric(as.character(a)) # returns the factor levels, as numeric
# [1] 10 20 30 40 50

b <- factor(c(10,20,30,40,50))
as.numeric(b)               # returns the factor codes!!
# [1] 1 2 3 4 5

c <- factor(c("1,000","2,000","3,000","4,000","5,000"))
as.numeric(c)               # returns the factor codes
# [1] 1 2 3 4 5
as.numeric(as.character(c)) # returns NA - commas are NOT allowed in numbers in R
# [1] NA NA NA NA NA
# Warning message:
# NAs introduced by coercion 
as.numeric(gsub(",","",c,fixed=TRUE))
# [1] 1000 2000 3000 4000 5000

In the last line, gsub(",","",c,fixed=TRUE) just removes the commas.

EDIT As @CarlWitthoft points out in the comment, if the commas are decimal separators, you can import the data using:

df <- read.csv("mydata.csv", dec=",")

This will avoid a lot of problems later. Given the (already imported) dataset you present in the question, this will fix it:

datatest$HOURS.at.sea <- as.numeric(gsub(",",".",datatest$HOURS.at.sea,fixed=TRUE))

This replaces the "," with ".". Since gsub(...) returns a character vector, not a factor, you can use as.numeric(...) directly on that. Notice that you still get some NAs, because some of the rows have "#VALUE!" - looks like an Excel dump.