I have a solution for iOS and macOS without using external helpers like dd and xxd. I have just found it, so I report it as it is, planning to improve it at a later stage. For the moment, it relies on both Objective-C and Swift code. First of all, create this helper class in Objective-C:
AWS3MD5Hash.h
#import <Foundation/Foundation.h>
NS_ASSUME_NONNULL_BEGIN
@interface AWS3MD5Hash : NSObject
- (NSData *)dataFromFile:(FILE *)theFile startingOnByte:(UInt64)startByte length:(UInt64)length filePath:(NSString *)path singlePartSize:(NSUInteger)partSizeInMb;
- (NSData *)dataFromBigData:(NSData *)theData startingOnByte:(UInt64)startByte length:(UInt64)length;
- (NSData *)dataFromHexString:(NSString *)sourceString;
@end
NS_ASSUME_NONNULL_END
AWS3MD5Hash.m
#import "AWS3MD5Hash.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define SIZE 256
@implementation AWS3MD5Hash
- (NSData *)dataFromFile:(FILE *)theFile startingOnByte:(UInt64)startByte length:(UInt64)length filePath:(NSString *)path singlePartSize:(NSUInteger)partSizeInMb {
char *buffer = malloc(length);
NSURL *fileURL = [NSURL fileURLWithPath:path];
NSNumber *fileSizeValue = nil;
NSError *fileSizeError = nil;
[fileURL getResourceValue:&fileSizeValue
forKey:NSURLFileSizeKey
error:&fileSizeError];
NSInteger __unused result = fseek(theFile,startByte,SEEK_SET);
if (result != 0) {
free(buffer);
return nil;
}
NSInteger result2 = fread(buffer, length, 1, theFile);
NSUInteger difference = fileSizeValue.integerValue - startByte;
NSData *toReturn;
if (result2 == 0) {
toReturn = [NSData dataWithBytes:buffer length:difference];
} else {
toReturn = [NSData dataWithBytes:buffer length:result2 * length];
}
free(buffer);
return toReturn;
}
- (NSData *)dataFromBigData:(NSData *)theData startingOnByte: (UInt64)startByte length:(UInt64)length {
NSUInteger fileSizeValue = theData.length;
NSData *subData;
if (startByte + length > fileSizeValue) {
subData = [theData subdataWithRange:NSMakeRange(startByte, fileSizeValue - startByte)];
} else {
subData = [theData subdataWithRange:NSMakeRange(startByte, length)];
}
return subData;
}
- (NSData *)dataFromHexString:(NSString *)string {
string = [string lowercaseString];
NSMutableData *data= [NSMutableData new];
unsigned char whole_byte;
char byte_chars[3] = {'\0','\0','\0'};
NSInteger i = 0;
NSInteger length = string.length;
while (i < length-1) {
char c = [string characterAtIndex:i++];
if (c < '0' || (c > '9' && c < 'a') || c > 'f')
continue;
byte_chars[0] = c;
byte_chars[1] = [string characterAtIndex:i++];
whole_byte = strtol(byte_chars, NULL, 16);
[data appendBytes:&whole_byte length:1];
}
return data;
}
@end
Now create a plain swift file:
AWS Extensions.swift
import UIKit
import CommonCrypto
extension URL {
func calculateAWSS3MD5Hash(_ numberOfParts: UInt64) -> String? {
do {
var fileSize: UInt64!
var calculatedPartSize: UInt64!
let attr:NSDictionary? = try FileManager.default.attributesOfItem(atPath: self.path) as NSDictionary
if let _attr = attr {
fileSize = _attr.fileSize();
if numberOfParts != 0 {
let partSize = Double(fileSize / numberOfParts)
var partSizeInMegabytes = Double(partSize / (1024.0 * 1024.0))
partSizeInMegabytes = ceil(partSizeInMegabytes)
calculatedPartSize = UInt64(partSizeInMegabytes)
if calculatedPartSize % 2 != 0 {
calculatedPartSize += 1
}
if numberOfParts == 2 || numberOfParts == 3 { // Very important when there are 2 or 3 parts, in the majority of times
// the calculatedPartSize is already 8. In the remaining cases we force it.
calculatedPartSize = 8
}
if mainLogToggling {
print("The calculated part size is \(calculatedPartSize!) Megabytes")
}
}
}
if numberOfParts == 0 {
let string = self.memoryFriendlyMd5Hash()
return string
}
let hasher = AWS3MD5Hash.init()
let file = fopen(self.path, "r")
defer { let result = fclose(file)}
var index: UInt64 = 0
var bigString: String! = ""
var data: Data!
while autoreleasepool(invoking: {
if index == (numberOfParts-1) {
if mainLogToggling {
//print("Siamo all'ultima linea.")
}
}
data = hasher.data(from: file!, startingOnByte: index * calculatedPartSize * 1024 * 1024, length: calculatedPartSize * 1024 * 1024, filePath: self.path, singlePartSize: UInt(calculatedPartSize))
bigString = bigString + MD5.get(data: data) + "\n"
index += 1
if index == numberOfParts {
return false
}
return true
}) {}
let final = MD5.get(data :hasher.data(fromHexString: bigString)) + "-\(numberOfParts)"
return final
} catch {
}
return nil
}
func memoryFriendlyMd5Hash() -> String? {
let bufferSize = 1024 * 1024
do {
// Open file for reading:
let file = try FileHandle(forReadingFrom: self)
defer {
file.closeFile()
}
// Create and initialize MD5 context:
var context = CC_MD5_CTX()
CC_MD5_Init(&context)
// Read up to `bufferSize` bytes, until EOF is reached, and update MD5 context:
while autoreleasepool(invoking: {
let data = file.readData(ofLength: bufferSize)
if data.count > 0 {
data.withUnsafeBytes {
_ = CC_MD5_Update(&context, $0, numericCast(data.count))
}
return true // Continue
} else {
return false // End of file
}
}) { }
// Compute the MD5 digest:
var digest = Data(count: Int(CC_MD5_DIGEST_LENGTH))
digest.withUnsafeMutableBytes {
_ = CC_MD5_Final($0, &context)
}
let hexDigest = digest.map { String(format: "%02hhx", $0) }.joined()
return hexDigest
} catch {
print("Cannot open file:", error.localizedDescription)
return nil
}
}
struct MD5 {
static func get(data: Data) -> String {
var digest = [UInt8](repeating: 0, count: Int(CC_MD5_DIGEST_LENGTH))
let _ = data.withUnsafeBytes { bytes in
CC_MD5(bytes, CC_LONG(data.count), &digest)
}
var digestHex = ""
for index in 0..<Int(CC_MD5_DIGEST_LENGTH) {
digestHex += String(format: "%02x", digest[index])
}
return digestHex
}
// The following is a memory friendly version
static func get2(data: Data) -> String {
var currentIndex = 0
let bufferSize = 1024 * 1024
//var digest = [UInt8](repeating: 0, count: Int(CC_MD5_DIGEST_LENGTH))
// Create and initialize MD5 context:
var context = CC_MD5_CTX()
CC_MD5_Init(&context)
while autoreleasepool(invoking: {
var subData: Data!
if (currentIndex + bufferSize) < data.count {
subData = data.subdata(in: Range.init(NSMakeRange(currentIndex, bufferSize))!)
currentIndex = currentIndex + bufferSize
} else {
subData = data.subdata(in: Range.init(NSMakeRange(currentIndex, data.count - currentIndex))!)
currentIndex = currentIndex + (data.count - currentIndex)
}
if subData.count > 0 {
subData.withUnsafeBytes {
_ = CC_MD5_Update(&context, $0, numericCast(subData.count))
}
return true
} else {
return false
}
}) { }
// Compute the MD5 digest:
var digest = Data(count: Int(CC_MD5_DIGEST_LENGTH))
digest.withUnsafeMutableBytes {
_ = CC_MD5_Final($0, &context)
}
var digestHex = ""
for index in 0..<Int(CC_MD5_DIGEST_LENGTH) {
digestHex += String(format: "%02x", digest[index])
}
return digestHex
}
}
Now add:
#import "AWS3MD5Hash.h"
to your Objective-C Bridging header. You should be ok with this setup.
Example usage
To test this setup, you could be calling the following method inside the object that is in charge of handling the AWS connections:
func getMd5HashForFile() {
let credentialProvider = AWSCognitoCredentialsProvider(regionType: AWSRegionType.USEast2, identityPoolId: "<INSERT_POOL_ID>")
let configuration = AWSServiceConfiguration(region: AWSRegionType.APSoutheast2, credentialsProvider: credentialProvider)
configuration?.timeoutIntervalForRequest = 3.0
configuration?.timeoutIntervalForResource = 3.0
AWSServiceManager.default().defaultServiceConfiguration = configuration
AWSS3.register(with: configuration!, forKey: "defaultKey")
let s3 = AWSS3.s3(forKey: "defaultKey")
let headObjectRequest = AWSS3HeadObjectRequest()!
headObjectRequest.bucket = "<NAME_OF_YOUR_BUCKET>"
headObjectRequest.key = self.latestMapOnServer.key
let _: AWSTask? = s3.headObject(headObjectRequest).continueOnSuccessWith { (awstask) -> Any? in
let headObjectOutput: AWSS3HeadObjectOutput? = awstask.result
var ETag = headObjectOutput?.eTag!
// Here you should parse the returned Etag and extract the number of parts to provide to the helper function. Etags end with a "-" followed by the number of parts. If you don't see this format, then pass 0 as the number of parts.
ETag = ETag!.replacingOccurrences(of: "\"", with: "")
print("headObjectOutput.ETag \(ETag!)")
let mapOnDiskUrl = self.getMapsDirectory().appendingPathComponent(self.latestMapOnDisk!)
let hash = mapOnDiskUrl.calculateAWSS3MD5Hash(<Take the number of parts from the ETag returned by the server>)
if hash == ETag {
print("They are the same.")
}
print ("\(hash!)")
return nil
}
}
If the ETag returned by the server does not have "-" at the end of the ETag, just pass 0 to calculateAWSS3MD5Hash. Please comment if you encounter any problems. I am working on a swift only solution, I will update this answer as soon as I finish. Thanks