Opencv 投影和三维视觉 Projection and Three-Dimensional Vision

投影

当我们已经获得了标定好的摄像头之后，我们就可以利用以下函数实现三维空间中的点向投影平面的投影。

void cv::projectPoints(
	cv::InputArray objectPoints, // 3xN/Nx3 Nc=1, 1xN/Nx1 Nc=3,
								 // or vector<Point3f>
	cv::InputArray rvec, // Rotation *vector*
						 // (see cv::Rodrigues())
	cv::InputArray tvec, // Translation vector
	cv::InputArray cameraMatrix, // 3x3 Camera intrinsics matrix
	cv::InputArray distCoeffs, // 4, 5, or 8 elements vector,
							   // or cv::noArray()
	cv::OutputArray imagePoints, // 2xN/Nx2 Nc=1, 1xN/Nx1 Nc=2,
								 // or vector<Point2f>
	cv::OutputArray jacobian = cv::noArray(), // Optional,
											  // 2N x (10+nDistCoeff)
	double aspectRatio = 0 // If nonzero, fix
						   // fx/fy at this value
);

参数说明：

objectPoints：实际三维空间中的坐标点
rvec，tvec：实际坐标系与相机坐标系之间的旋转矩阵和平移向量
cameraMatrix，distCoeffs：通过 cv::calibrateCamera() 计算得到的相机内置矩阵参数和畸变参数
imagePoints：投影之后的坐标点
jacobian：如果给出，其将被赋值为每个点的位置对旋转矩阵，平移向量，相机内置矩阵参数和畸变参数的偏导数。如果不需要，可以直接赋值为 cv::noArray()
aspectRatio：如果不为零，则 fx/fy 的比例将被固定

仿射和透视变换

具体内容可以参见：Opencv 通用图像变换 General Image Transforms 。这里只给出简要说明。

鸟瞰视图变换

算法流程：

读取内置矩阵和畸变参数
通过标定板找到平面上的物体
通过 cv::getPerspectiveTransform() 计算齐次变换矩阵 $H$
使用 cv::warpPerspective() 并设置 cv::WARP_INVERSE_MAP | cv::INTER_LINEAR 获得鸟瞰视图

//Example 19-1. Bird’s - eye view
#include <opencv2/opencv.hpp>
#include <iostream>

using namespace std;
    
void help(char *argv[]) {
	cout	<< "\nExample 19-01, using homography to get a bird's eye view."
			<< "\nThis file relies on you having created an intrinsic file via example_18-01_from_disk"
			<< "\n   but here, that file is already stored in ../birdseye/intrinsics.xml"
			<< "\nCall:"
			<< "\n./example_19-01 <chessboard_width> <chessboard_height> <path/camera_calib_filename> <path/chessboard_image>"
			<< "\n\nExample:"
			<< "\n./example_19-01 12 12 ../birdseye/intrinsics.xml ../birdseye/IMG_0215L.jpg\n"
			<< "\nPress 'd' for lower birdseye view, and 'u' for higher (it adjusts the apparent 'Z' height), Esc to exit\n" 
			<< endl;
}

// args: [board_w] [board_h] [intrinsics.xml] [checker_image]
//
int main(int argc, char *argv[]) {
  if (argc != 5) {
    cout << "\nERROR: too few parameters\n";
    help(argv);
    return -1;
  }
  // Input Parameters:
  //
  int board_w = atoi(argv[1]);
  int board_h = atoi(argv[2]);
  int board_n = board_w * board_h;
  cv::Size board_sz(board_w, board_h);
  cv::FileStorage fs(argv[3], cv::FileStorage::READ);
  cv::Mat intrinsic, distortion;

  fs["camera_matrix"] >> intrinsic;
  fs["distortion_coefficients"] >> distortion;

  if (!fs.isOpened() || intrinsic.empty() || distortion.empty()) {
    cout << "Error: Couldn't load intrinsic parameters from " << argv[3]
         << endl;
    return -1;
  }
  fs.release();

  cv::Mat gray_image, image, image0 = cv::imread(argv[4], 1);
  if (image0.empty()) {
    cout << "Error: Couldn't load image " << argv[4] << endl;
    return -1;
  }

  // UNDISTORT OUR IMAGE
  //
  cv::undistort(image0, image, intrinsic, distortion, intrinsic);
  cv::cvtColor(image, gray_image, cv::COLOR_BGRA2GRAY);

  // GET THE CHECKERBOARD ON THE PLANE
  //
  vector<cv::Point2f> corners;
  bool found = cv::findChessboardCorners( // True if found
      image,                              // Input image
      board_sz,                           // Pattern size
      corners,                            // Results
      cv::CALIB_CB_ADAPTIVE_THRESH | cv::CALIB_CB_FILTER_QUADS);
  if (!found) {
    cout << "Couldn't acquire checkerboard on " << argv[4] << ", only found "
         << corners.size() << " of " << board_n << " corners\n";
    return -1;
  }

  // Get Subpixel accuracy on those corners
  //
  cv::cornerSubPix(
      gray_image,       // Input image
      corners,          // Initial guesses, also output
      cv::Size(11, 11), // Search window size
      cv::Size(-1, -1), // Zero zone (in this case, don't use)
      cv::TermCriteria(cv::TermCriteria::EPS | cv::TermCriteria::COUNT, 30,
                       0.1));

  // GET THE IMAGE AND OBJECT POINTS:
  // Object points are at (r,c):
  // (0,0), (board_w-1,0), (0,board_h-1), (board_w-1,board_h-1)
  // That means corners are at: corners[r*board_w + c]
  //
  cv::Point2f objPts[4], imgPts[4];
  objPts[0].x = 0;
  objPts[0].y = 0;
  objPts[1].x = board_w - 1;
  objPts[1].y = 0;
  objPts[2].x = 0;
  objPts[2].y = board_h - 1;
  objPts[3].x = board_w - 1;
  objPts[3].y = board_h - 1;
  imgPts[0] = corners[0];
  imgPts[1] = corners[board_w - 1];
  imgPts[2] = corners[(board_h - 1) * board_w];
  imgPts[3] = corners[(board_h - 1) * board_w + board_w - 1];

  // DRAW THE POINTS in order: B,G,R,YELLOW
  //
  cv::circle(image, imgPts[0], 9, cv::Scalar(255, 0, 0), 3);
  cv::circle(image, imgPts[1], 9, cv::Scalar(0, 255, 0), 3);
  cv::circle(image, imgPts[2], 9, cv::Scalar(0, 0, 255), 3);
  cv::circle(image, imgPts[3], 9, cv::Scalar(0, 255, 255), 3);                                              

  // DRAW THE FOUND CHECKERBOARD
  //
  cv::drawChessboardCorners(image, board_sz, corners, found);
  cv::namedWindow("Checkers", 0);
  cv::imshow("Checkers", image);

  // FIND THE HOMOGRAPHY
  //
  cv::Mat H = cv::getPerspectiveTransform(objPts, imgPts);

  // LET THE USER ADJUST THE Z HEIGHT OF THE VIEW
  //
  cout << "\nPress 'd' for lower birdseye view, and 'u' for higher (it adjusts the apparent 'Z' height), Esc to exit" << endl;
  double Z = 15;
  cv::Mat birds_image;
  for (;;) {
    // escape key stops
    H.at<double>(2, 2) = Z;
    // USE HOMOGRAPHY TO REMAP THE VIEW
    //
    cv::warpPerspective(image,			// Source image
                        birds_image, 	// Output image
                        H,              // Transformation matrix
                        image.size(),   // Size for output image
                        cv::WARP_INVERSE_MAP | cv::INTER_LINEAR,
                        cv::BORDER_CONSTANT, cv::Scalar::all(0) // Fill border with black
                        );
	cv::namedWindow("Birds_Eye", 0);
	cv::imshow("Birds_Eye", birds_image);
    int key = cv::waitKey() & 255;
    if (key == 'u')
      Z += 0.5;
    if (key == 'd')
      Z -= 0.5;
    if (key == 27)
      break;
  }

  // SHOW ROTATION AND TRANSLATION VECTORS
  //
  vector<cv::Point2f> image_points;
  vector<cv::Point3f> object_points;
  for (int i = 0; i < 4; ++i) {
    image_points.push_back(imgPts[i]);
    object_points.push_back(cv::Point3f(objPts[i].x, objPts[i].y, 0));
  }
  cv::Mat rvec, tvec, rmat;
  cv::solvePnP(object_points, 	// 3-d points in object coordinate
               image_points,  	// 2-d points in image coordinates
               intrinsic,     	// Our camera matrix
               cv::Mat(),     	// Since we corrected distortion in the
								// beginning,now we have zero distortion
								// coefficients
               rvec, 			// Output rotation *vector*.
               tvec  			// Output translation vector.
               );
  cv::Rodrigues(rvec, rmat);

  // PRINT AND EXIT
  cout << "rotation matrix: " << rmat << endl;
  cout << "translation vector: " << tvec << endl;
  cout << "homography matrix: " << H << endl;
  cout << "inverted homography matrix: " << H.inv() << endl;

  return 1;
}

三维姿态估计

估计三维空间中物体的姿态可以使用单个摄像头也可以使用多个摄像头进行处理。多个摄像头能够处理未知环境下的未知物体的姿态，但需要多个摄像头。而对于已知物体，单个摄像头就能获得物体的姿态。我们将首先从单个摄像头开始，这也有助于理解多个摄像头的情形。

单摄像头姿态估计

针对已知物体，首先必须获得其诺干关键点，对应内容可以参考：Opencv 关键点和描述符 Keypoints and Descriptors 。之后就可以感觉可见的关键点的位置来判断物体的姿态。

cv::solvePnP() 或者 cv::solvePnPRansac() 能够解决任意的 N 点透视问题（Perspective N-Point, PNP），因此也能够被用来解决以上问题。如果 PNP 问题不总是存在特定的解的。当我们获得的特征点较少时，理论上不应小于 3，但实际应用中为了保证精度，这个数量通常需要达到 12 或更大。第二，物体离摄像机不能太远，这样会使得不同特征点的光线近乎于平行，使得很难求解。一个好的实践经验是根据所能见到的特征点，大体就能够获得物体的姿态，这样就能大大降低计算量。