I'm working with ARKit and trying to get camera position from QR code with known size (0.16m). To detect QR code I'am using Vision framework so i can get each corner point on image.
Data preparation:
let intrinsics = arFrame.camera.intrinsics
let imageResolution = arFrame.camera.imageResolution
let imagePointsArray = [NSValue(cgPoint: visionResult.topLeft), NSValue(cgPoint: visionResult.topRight), NSValue(cgPoint: visionResult.bottomLeft), NSValue(cgPoint: visionResult.bottomRight)]
let intrinsicsArray = (0..<3).flatMap { x in (0..<3).map { y in NSNumber(value: intrinsics[x][y]) } }
let squareLength = NSNumber(value: 0.16)
let res = OpenCVWrapper.findPose(imagePointsArray, intrinsics: intrinsicsArray, size: imageResolution, squareLength: squareLength)
To get camera position I'm using OpenCV solution solvePnP() with flag = SOLVEPNP_IPPE_SQUARE
OpenCV in Objective-C++ based on this answer:
+(Pose)findPose: (NSArray<NSValue *> *) imagePoints
intrinsics: (NSArray<NSNumber *> *) intrinsics
imageResolution: (CGSize) imageResolution
squareLength: (NSNumber *) squareLength {
cv::Mat distCoeffs(4,1,cv::DataType<double>::type, 0.0);
cv::Mat rvec(3,1,cv::DataType<double>::type);
cv::Mat tvec(3,1,cv::DataType<double>::type);
cv::Mat cameraMatrix = [self intrinsicMatrixWithArray:intrinsics];
vector<Point2f> cvImagePoints = [self convertImagePoints:imagePoints toSize: imageResolution];
vector<Point3f> cvObjectPoints = [self getObjectPointsWithSquareLength:squareLength];
std::cout << "object points: \n" << cvObjectPoints << std::endl;
std::cout << "image points: \n" << cvImagePoints << std::endl;
std::cout << "cameraMatrix points: \n" << cameraMatrix << std::endl;
cv::solvePnP(cvObjectPoints, cvImagePoints, cameraMatrix, distCoeffs, rvec, tvec, false, SOLVEPNP_IPPE_SQUARE);
std::cout << "rvec: \n" << rvec << std::endl;
std::cout << "tvec: \n" << tvec << std::endl;
cv::Mat RotX(3, 3, cv::DataType<double>::type);
cv::setIdentity(RotX);
RotX.at<double>(4) = -1; //cos(180) = -1
RotX.at<double>(8) = -1;
cv::Mat R;
cv::Rodrigues(rvec, R);
R = R.t(); // rotation of inverse
Mat rvecConverted;
Rodrigues(R, rvecConverted); //
std::cout << "rvec in world coords:\n" << rvecConverted << std::endl;
rvecConverted = RotX * rvecConverted;
std::cout << "rvec scenekit :\n" << rvecConverted << std::endl;
Mat tvecConverted = -R * tvec;
std::cout << "tvec in world coords:\n" << tvecConverted << std::endl;
tvecConverted = RotX * tvecConverted;
std::cout << "tvec scenekit :\n" << tvecConverted << std::endl;
SCNVector4 rotationVector = SCNVector4Make(rvecConverted.at<double>(0), rvecConverted.at<double>(1), rvecConverted.at<double>(2), norm(rvecConverted));
SCNVector3 translationVector = SCNVector3Make(tvecConverted.at<double>(0), tvecConverted.at<double>(1), tvecConverted.at<double>(2));
return Pose{rotationVector, translationVector};
}
+ (vector<Point3f>) getObjectPointsWithSquareLength: (NSNumber*) squareLength {
vector<Point3f> points;
double squareLengthDouble = [squareLength doubleValue];
points.push_back(Point3f(-squareLengthDouble/2, squareLengthDouble/2, 0));
points.push_back(Point3f(squareLengthDouble/2, squareLengthDouble/2, 0));
points.push_back(Point3f(squareLengthDouble/2, -squareLengthDouble/2, 0));
points.push_back(Point3f(-squareLengthDouble/2, -squareLengthDouble/2, 0));
return points;
}
+ (vector<Point2f>) convertImagePoints: (NSArray<NSValue *> *) array
toSize: (CGSize) size {
vector<Point2f> points;
for (NSValue * value in array) {
CGPoint point = [value CGPointValue];
points.push_back(Point2f((point.x * size.width), (point.y * size.height)));
}
return points;
}
+ (cv::Mat) intrinsicMatrixWithArray: (NSArray<NSNumber *> *) intrinsics {
Mat result(3,3,cv::DataType<double>::type);
cv::setIdentity(result);
result.at<double>(0) = [intrinsics[0] doubleValue]; //fx
result.at<double>(4) = [intrinsics[4] doubleValue]; //fy
result.at<double>(2) = [intrinsics[6] doubleValue]; //cx
result.at<double>(5) = [intrinsics[7] doubleValue]; //cy
result.at<double>(8) = [intrinsics[8] doubleValue]; //1
return result;
}
The problem is when I point camera directly to QR code with 2 meters distance, the result of translationVector.z (tvec scenekit) should be equal to 2 meters, but instead there is a random positive or negative numbers.
Output:
Calculated distance to QR 2.0856588
object points:
[-0.079999998, 0.079999998, 0;
0.079999998, 0.079999998, 0;
0.079999998, -0.079999998, 0;
-0.079999998, -0.079999998, 0]
image points:
[795.98724, 717.27045;
684.5592, 715.80487;
793.31567, 826.06146;
684.40692, 824.39771]
cameraMatrix points:
[1454.490478515625, 0, 935.6685791015625;
0, 1454.490478515625, 717.999267578125;
0, 0, 1]
rvec:
[-0.9251278749049585;
1.185890362907954;
-0.9989977018022447]
tvec:
[0.04753833193572054;
-0.009999648596310796;
-0.3527916723601041]
rvec in world coords:
[0.9251278749049584;
-1.185890362907954;
0.9989977018022447]
rvec scenekit :
[0.9251278749049584;
1.185890362907954;
-0.9989977018022447]
tvec in world coords:
[-0.1159248829391864;
-0.3366933247327607;
0.004569098144615695]
tvec scenekit :
[-0.1159248829391864;
0.3366933247327607;
-0.004569098144615695]
Thanks for any help