Since you are composing functions, not passing a value from one function to another, you have to use the composition operator (>>) instead of the pipe (|>):
open System.IO
let getAllFiles (directory : string) =
Directory.GetFiles(directory)
let getFileInfo =
getAllFiles
>> Seq.map (fun eachFile -> (eachFile, new FileInfo(eachFile)))
Alternatively, you can make the argument explicit and then you can use the pipe:
open System.IO
let getAllFiles (directory : string) =
Directory.GetFiles(directory)
let getFileInfo directory =
directory
|> getAllFiles
|> Seq.map (fun eachFile -> (eachFile, new FileInfo(eachFile)))
Update for the comment:
If I specifically want to get the file length, how should I write it instead?
open System.IO
let getAllFiles directory =
Directory.GetFiles directory
let getFileInfos =
getAllFiles
>> Seq.map (fun fileName -> (fileName, FileInfo fileName))
let getFileLengths =
getFileInfos
>> Seq.map (fun (fileName, fileInfo) -> fileName, fileInfo.Length)
If you mean to get all file lengths of all the files in the directory, you could use a getFileLengths function like in the code above. Note that I modified the name of getFileInfo.
Update for the second comment:
I would like to group the tuples according to the length of each file using Seq.groupBy. How may I go about doing so?
The function removeSingletonGroups removes all groups that contain only one value.
The function getFilesGroupedByLength first uses Seq.groupBy to create groups with the file lengths as keys, then uses Seq.map to remove the file length from the values, so the value of each group is only a list of filenames. Finally it uses removeSingletonGroups to purge all groups that contain only one file.
let removeSingletonGroups groupedSeq =
groupedSeq
|> Seq.filter (snd >> List.length >> (<>) 1)
let getFilesGroupedByLength =
getFileLengths
>> Seq.groupBy snd
>> Seq.map (fun (fileLength, files) -> fileLength, files |> Seq.map fst |> List.ofSeq)
>> removeSingletonGroups
Final(?) update:
I just saw your question regarding the duplicate file checker. I think this approach is what you were looking for:
open System.IO
let getFilesOfDirectory directoryPath =
Directory.EnumerateFiles directoryPath
let getFileLength filePath =
(FileInfo filePath).Length
let groupFilesByLength files =
files
|> Seq.groupBy getFileLength
let removeSingletonGroups groupedSeq =
groupedSeq
|> Seq.filter (fun (_, group) -> Seq.length group > 1)
let getDuplicateFileCandidates directoryPath =
directoryPath
|> getFilesOfDirectory
|> groupFilesByLength
|> removeSingletonGroups