From effaa5d075c5916bee2a5139ab46fc50767734b9 Mon Sep 17 00:00:00 2001 From: Xuanrui Zhu <634210954@qq.com> Date: Wed, 3 Dec 2025 15:26:59 +0000 Subject: [PATCH 01/29] init commit --- modules/slam/include/opencv2/slam.hpp | 1 + 1 file changed, 1 insertion(+) create mode 100644 modules/slam/include/opencv2/slam.hpp diff --git a/modules/slam/include/opencv2/slam.hpp b/modules/slam/include/opencv2/slam.hpp new file mode 100644 index 00000000000..6f70f09beec --- /dev/null +++ b/modules/slam/include/opencv2/slam.hpp @@ -0,0 +1 @@ +#pragma once From c4cb4d9c230ec644104a09c035cf40d3533f8630 Mon Sep 17 00:00:00 2001 From: Xuanrui Zhu <634210954@qq.com> Date: Wed, 3 Dec 2025 20:10:04 +0000 Subject: [PATCH 02/29] SLAM VO: a framework --- modules/slam/include/opencv2/slam.hpp | 28 +- .../slam/include/opencv2/slam/data_loader.hpp | 44 ++ modules/slam/include/opencv2/slam/feature.hpp | 38 ++ .../slam/include/opencv2/slam/initializer.hpp | 87 ++++ .../slam/include/opencv2/slam/keyframe.hpp | 21 + .../slam/include/opencv2/slam/localizer.hpp | 32 ++ modules/slam/include/opencv2/slam/map.hpp | 91 ++++ modules/slam/include/opencv2/slam/matcher.hpp | 34 ++ .../slam/include/opencv2/slam/optimizer.hpp | 69 +++ modules/slam/include/opencv2/slam/pose.hpp | 19 + .../slam/include/opencv2/slam/visualizer.hpp | 47 ++ modules/slam/include/opencv2/slam/vo.hpp | 114 +++++ modules/slam/src/data_loader.cpp | 103 +++++ modules/slam/src/feature.cpp | 167 +++++++ modules/slam/src/initializer.cpp | 406 ++++++++++++++++++ modules/slam/src/keyframe.cpp | 0 modules/slam/src/localizer.cpp | 157 +++++++ modules/slam/src/map.cpp | 319 ++++++++++++++ modules/slam/src/matcher.cpp | 132 ++++++ modules/slam/src/optimizer.cpp | 255 +++++++++++ modules/slam/src/pose.cpp | 43 ++ modules/slam/src/visualizer.cpp | 131 ++++++ modules/slam/src/vo.cpp | 389 +++++++++++++++++ 23 files changed, 2725 insertions(+), 1 deletion(-) create mode 100644 modules/slam/include/opencv2/slam/data_loader.hpp create mode 100644 modules/slam/include/opencv2/slam/feature.hpp create mode 100644 modules/slam/include/opencv2/slam/initializer.hpp create mode 100644 modules/slam/include/opencv2/slam/keyframe.hpp create mode 100644 modules/slam/include/opencv2/slam/localizer.hpp create mode 100644 modules/slam/include/opencv2/slam/map.hpp create mode 100644 modules/slam/include/opencv2/slam/matcher.hpp create mode 100644 modules/slam/include/opencv2/slam/optimizer.hpp create mode 100644 modules/slam/include/opencv2/slam/pose.hpp create mode 100644 modules/slam/include/opencv2/slam/visualizer.hpp create mode 100644 modules/slam/include/opencv2/slam/vo.hpp create mode 100644 modules/slam/src/data_loader.cpp create mode 100644 modules/slam/src/feature.cpp create mode 100644 modules/slam/src/initializer.cpp create mode 100644 modules/slam/src/keyframe.cpp create mode 100644 modules/slam/src/localizer.cpp create mode 100644 modules/slam/src/map.cpp create mode 100644 modules/slam/src/matcher.cpp create mode 100644 modules/slam/src/optimizer.cpp create mode 100644 modules/slam/src/pose.cpp create mode 100644 modules/slam/src/visualizer.cpp create mode 100644 modules/slam/src/vo.cpp diff --git a/modules/slam/include/opencv2/slam.hpp b/modules/slam/include/opencv2/slam.hpp index 6f70f09beec..334925ec9b7 100644 --- a/modules/slam/include/opencv2/slam.hpp +++ b/modules/slam/include/opencv2/slam.hpp @@ -1 +1,27 @@ -#pragma once +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html + +#ifndef __OPENCV_SLAM_HPP__ +#define __OPENCV_SLAM_HPP__ + +#include "opencv2/slam/data_loader.hpp" +#include "opencv2/slam/feature.hpp" +#include "opencv2/slam/initializer.hpp" +#include "opencv2/slam/keyframe.hpp" +#include "opencv2/slam/localizer.hpp" +#include "opencv2/slam/map.hpp" +#include "opencv2/slam/matcher.hpp" +#include "opencv2/slam/optimizer.hpp" +#include "opencv2/slam/pose.hpp" +#include "opencv2/slam/visualizer.hpp" +#include "opencv2/slam/vo.hpp" + + +/** @defgroup slam Simultaneous Localization and Mapping + @brief Simultaneous Localization and Mapping (SLAM) module +*/ + +#endif + +/* End of file. */ \ No newline at end of file diff --git a/modules/slam/include/opencv2/slam/data_loader.hpp b/modules/slam/include/opencv2/slam/data_loader.hpp new file mode 100644 index 00000000000..acc7795a1f8 --- /dev/null +++ b/modules/slam/include/opencv2/slam/data_loader.hpp @@ -0,0 +1,44 @@ +#pragma once +#include +#include +#include + +namespace cv{ +namespace vo{ + +class DataLoader { +public: + // 构造:传入图像目录(可以是相对或绝对路径) + DataLoader(const std::string &imageDir); + + // 获取下一张图像,成功返回 true 并填充 image 与 imagePath;到末尾返回 false + bool getNextImage(Mat &image, std::string &imagePath); + + // 重置到序列开始 + void reset(); + + // 是否还有图像 + bool hasNext() const; + + // 图像总数 + size_t size() const; + + // 尝试加载并返回相机内参(fx, fy, cx, cy),返回是否成功 + bool loadIntrinsics(const std::string &yamlPath); + + // 内参访问 + double fx() const { return fx_; } + double fy() const { return fy_; } + double cx() const { return cx_; } + double cy() const { return cy_; } + +private: + std::vector imageFiles; + size_t currentIndex; + + // 相机内参(若未加载则为回退值) + double fx_, fy_, cx_, cy_; +}; + +} // namespace vo +} // namespace cv diff --git a/modules/slam/include/opencv2/slam/feature.hpp b/modules/slam/include/opencv2/slam/feature.hpp new file mode 100644 index 00000000000..0ec86302a07 --- /dev/null +++ b/modules/slam/include/opencv2/slam/feature.hpp @@ -0,0 +1,38 @@ +#pragma once +#include +#include +#include + +namespace cv { +namespace vo { + +class FeatureExtractor { +public: + explicit FeatureExtractor(int nfeatures = 2000); + // detectAndCompute: detect keypoints and compute descriptors. + // If previous-frame data (prevGray, prevKp) is provided, a flow-aware grid allocation + // will be used (score = response * (1 + flow_lambda * normalized_flow)). Otherwise a + // simpler ANMS selection is used. The prev arguments have defaults so this function + // replaces the two previous overloads. + void detectAndCompute(const Mat &image, std::vector &kps, Mat &desc, + const Mat &prevGray = Mat(), const std::vector &prevKp = std::vector(), + double flow_lambda = 5.0); +private: + Ptr orb_; + int nfeatures_; +}; + +// Function to detect and compute features in an image +inline void detectAndComputeFeatures(const Mat &image, + std::vector &keypoints, + Mat &descriptors) { + // Create ORB detector and descriptor + auto orb = ORB::create(); + // Detect keypoints + orb->detect(image, keypoints); + // Compute descriptors + orb->compute(image, keypoints, descriptors); +} + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/include/opencv2/slam/initializer.hpp b/modules/slam/include/opencv2/slam/initializer.hpp new file mode 100644 index 00000000000..b0e8676a0f9 --- /dev/null +++ b/modules/slam/include/opencv2/slam/initializer.hpp @@ -0,0 +1,87 @@ +#pragma once +#include +#include +#include +#include "opencv2/slam/keyframe.hpp" +#include "opencv2/slam/map.hpp" + +namespace cv { +namespace vo { + +class Initializer { +public: + Initializer(); + + // Attempt initialization with two frames + // Returns true if initialization successful + bool initialize(const std::vector &kps1, + const std::vector &kps2, + const std::vector &matches, + double fx, double fy, double cx, double cy, + Mat &R, Mat &t, + std::vector &points3D, + std::vector &isTriangulated); + + // Check if frames have sufficient parallax for initialization + static bool checkParallax(const std::vector &kps1, + const std::vector &kps2, + const std::vector &matches, + double minMedianParallax = 15.0); + +private: + // Reconstruct from Homography + bool reconstructH(const std::vector &pts1, + const std::vector &pts2, + const Mat &H, + double fx, double fy, double cx, double cy, + Mat &R, Mat &t, + std::vector &points3D, + std::vector &isTriangulated, + float ¶llax); + + // Reconstruct from Fundamental/Essential + bool reconstructF(const std::vector &pts1, + const std::vector &pts2, + const Mat &F, + double fx, double fy, double cx, double cy, + Mat &R, Mat &t, + std::vector &points3D, + std::vector &isTriangulated, + float ¶llax); + + // Check reconstructed points quality + int checkRT(const Mat &R, const Mat &t, + const std::vector &pts1, + const std::vector &pts2, + const std::vector &points3D, + std::vector &isGood, + double fx, double fy, double cx, double cy, + float ¶llax); + + // Triangulate points + void triangulate(const Mat &P1, const Mat &P2, + const std::vector &pts1, + const std::vector &pts2, + std::vector &points3D); + + // Decompose Homography + void decomposeH(const Mat &H, std::vector &Rs, + std::vector &ts, std::vector &normals); + + // Compute homography score + float computeScore(const Mat &H21, const Mat &H12, + const std::vector &pts1, + const std::vector &pts2, + std::vector &inliersH, + float sigma = 1.0); + + // Compute fundamental score + float computeScoreF(const Mat &F21, + const std::vector &pts1, + const std::vector &pts2, + std::vector &inliersF, + float sigma = 1.0); +}; + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/include/opencv2/slam/keyframe.hpp b/modules/slam/include/opencv2/slam/keyframe.hpp new file mode 100644 index 00000000000..ce5d3a6b069 --- /dev/null +++ b/modules/slam/include/opencv2/slam/keyframe.hpp @@ -0,0 +1,21 @@ +#pragma once +#include +#include +#include + +namespace cv { +namespace vo { + +struct KeyFrame { + int id = -1; + Mat image; // optional + std::vector kps; + Mat desc; + // pose: rotation and translation to map coordinates + // X_world = R * X_cam + t + Mat R_w = Mat::eye(3,3,CV_64F); + Mat t_w = Mat::zeros(3,1,CV_64F); +}; + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/include/opencv2/slam/localizer.hpp b/modules/slam/include/opencv2/slam/localizer.hpp new file mode 100644 index 00000000000..c441737b930 --- /dev/null +++ b/modules/slam/include/opencv2/slam/localizer.hpp @@ -0,0 +1,32 @@ +#pragma once +#include +#include +#include "opencv2/slam/data_loader.hpp" +#include "opencv2/slam/map.hpp" + +namespace cv { +namespace vo { + +class Localizer { +public: + Localizer(float ratio = 0.7f); + + // Try to solve PnP against map points. Returns true if solved and fills R_out/t_out and inliers. + // Provide camera intrinsics and image dimensions explicitly (DataLoader doesn't expose width/height). + // Enhanced tryPnP with optional diagnostics output (frame id, image and output directory) + // out_preMatches and out_postMatches can be nullptr. If outDir provided, diagnostics images/logs will be saved there. + + bool tryPnP(const MapManager &map, const Mat &desc, const std::vector &kps, + double fx, double fy, double cx, double cy, int imgW, int imgH, + int min_inliers, + Mat &R_out, Mat &t_out, int &inliers_out, + int frame_id = -1, const Mat *image = nullptr, const std::string &outDir = "", + int *out_preMatches = nullptr, int *out_postMatches = nullptr, double *out_meanReproj = nullptr) const; + + float ratio() const { return ratio_; } +private: + float ratio_ = 0.7f; +}; + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/include/opencv2/slam/map.hpp b/modules/slam/include/opencv2/slam/map.hpp new file mode 100644 index 00000000000..38d8d7097b3 --- /dev/null +++ b/modules/slam/include/opencv2/slam/map.hpp @@ -0,0 +1,91 @@ +#pragma once +#include +#include +#include +#include "opencv2/slam/keyframe.hpp" + +namespace cv { +namespace vo { + +struct MapPoint { + int id = -1; // unique id for id-based lookups + Point3d p; // 3D position in world frame + std::vector> observations; // pairs of (keyframe id, keypoint idx) + + // Quality management fields + Mat descriptor; // Representative descriptor for matching + int nObs = 0; // Number of observations + bool isBad = false; // Flag for bad points to be culled + double minDistance = 0.0; // Min viewing distance + double maxDistance = 0.0; // Max viewing distance + + // Statistics + int nFound = 0; // Number of times found in tracking + int nVisible = 0; // Number of times visible + + // Constructor + MapPoint() = default; + MapPoint(const Point3d& pos) : p(pos) {} + + // Helper: compute found ratio + float getFoundRatio() const { + return nVisible > 0 ? static_cast(nFound) / nVisible : 0.0f; + } +}; + +class MapManager { +public: + MapManager(); + + void addKeyFrame(const KeyFrame &kf); + void addMapPoints(const std::vector &pts); + + const std::vector& keyframes() const { return keyframes_; } + const std::vector& mappoints() const { return mappoints_; } + + // Find candidate mappoint indices visible from pose (lastR, lastT) and inside image + std::vector findVisibleCandidates(const Mat &lastR, const Mat &lastT, + double fx, double fy, double cx, double cy, + int imgW, int imgH) const; + + // Triangulate given normalized coordinates between last keyframe and current + // Returns newly created map points (appended to internal list). + std::vector triangulateBetweenLastTwo(const std::vector &pts1n, + const std::vector &pts2n, + const std::vector &pts1_kp_idx, + const std::vector &pts2_kp_idx, + const KeyFrame &lastKf, const KeyFrame &curKf, + double fx, double fy, double cx, double cy); + + // Lookup keyframe index by id (-1 if not found) + int keyframeIndex(int id) const; + // Lookup mappoint index by id (-1 if not found) + int mapPointIndex(int id) const; + // Mutable access if caller legitimately needs to modify keyframes/mappoints. + // Prefer using the applyOptimized... APIs for controlled updates. + std::vector& keyframesMutable() { return keyframes_; } + std::vector& mappointsMutable() { return mappoints_; } + + // Apply optimized pose/point by id (safe writeback APIs for backend) + void applyOptimizedKeyframePose(int keyframeId, const Mat &R, const Mat &t); + void applyOptimizedMapPoint(int mappointId, const Point3d &p); + + // Quality management + void cullBadMapPoints(); + double computeReprojError(const MapPoint &mp, const KeyFrame &kf, + double fx, double fy, double cx, double cy) const; + void updateMapPointDescriptor(MapPoint &mp); + + // Statistics + int countGoodMapPoints() const; + +private: + std::vector keyframes_; + std::vector mappoints_; + std::unordered_map id2idx_; + std::unordered_map mpid2idx_; + int next_mappoint_id_ = 1; +}; + +} // namespace vo +} // namespace cv diff --git a/modules/slam/include/opencv2/slam/matcher.hpp b/modules/slam/include/opencv2/slam/matcher.hpp new file mode 100644 index 00000000000..215b8ea0b1f --- /dev/null +++ b/modules/slam/include/opencv2/slam/matcher.hpp @@ -0,0 +1,34 @@ +#pragma once +#include +#include +#include + +namespace cv { +namespace vo { + +class Matcher { +public: + explicit Matcher(float ratio = 0.75f); + // Match descriptors from prev -> curr, return good matches (queryIdx refers to prev, trainIdx to curr) + // Basic knn ratio test (backwards-compatible) + void knnMatch(const Mat &desc1, const Mat &desc2, std::vector &goodMatches); + + // Stronger match: knn + ratio test + optional mutual cross-check + optional spatial bucketing. + // desc1/desc2 are descriptors for prev/curr frames. + // kps1/kps2 are the corresponding keypoints (needed for spatial bucketing). + // imgCols/imgRows are used to size the bucketing grid. Defaults provide conservative values. + // maxTotal: if >0, final matches will be truncated to this count (keep smallest distances). + // enableMutual: enable/disable mutual cross-check. enableBucketing: enable/disable grid bucketing. + void match(const Mat &desc1, const Mat &desc2, + const std::vector &kps1, const std::vector &kps2, + std::vector &goodMatches, + int imgCols = 640, int imgRows = 480, + int bucketRows = 8, int bucketCols = 8, int topKPerBucket = 4, + int maxTotal = 0, bool enableMutual = true, bool enableBucketing = true); +private: + float ratio_; + BFMatcher bf_; +}; + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/include/opencv2/slam/optimizer.hpp b/modules/slam/include/opencv2/slam/optimizer.hpp new file mode 100644 index 00000000000..a46263d7b15 --- /dev/null +++ b/modules/slam/include/opencv2/slam/optimizer.hpp @@ -0,0 +1,69 @@ +#pragma once +#include +#include +#include "opencv2/slam/keyframe.hpp" +#include "opencv2/slam/map.hpp" + +namespace cv { +namespace vo { + +// Bundle Adjustment Optimizer using OpenCV-based Levenberg-Marquardt +// Note: For production, should use g2o or Ceres for better performance +class Optimizer { +public: + Optimizer(); + + // Local Bundle Adjustment + // Optimizes a window of recent keyframes and all observed map points + // fixedKFs: indices of keyframes to keep fixed during optimization +#ifdef USE_G2O + static void localBundleAdjustmentG2O( + std::vector &keyframes, + std::vector &mappoints, + const std::vector &localKfIndices, + const std::vector &fixedKfIndices, + double fx, double fy, double cx, double cy, + int iterations = 10); +#endif + static void localBundleAdjustmentSFM( + std::vector &keyframes, + std::vector &mappoints, + const std::vector &localKfIndices, + const std::vector &fixedKfIndices, + double fx, double fy, double cx, double cy, + int iterations = 10); + // Pose-only optimization (optimize camera pose given fixed 3D points) + static bool optimizePose( + KeyFrame &kf, + const std::vector &mappoints, + const std::vector &matchedMpIndices, + double fx, double fy, double cx, double cy, + std::vector &inliers, + int iterations = 10); + + // Global Bundle Adjustment (expensive, use after loop closure) + static void globalBundleAdjustmentSFM( + std::vector &keyframes, + std::vector &mappoints, + double fx, double fy, double cx, double cy, + int iterations = 20); + +private: + // Compute reprojection error and Jacobian + static double computeReprojectionError( + const Point3d &point3D, + const Mat &R, const Mat &t, + const Point2f &observed, + double fx, double fy, double cx, double cy, + Mat &jacobianPose, + Mat &jacobianPoint); + + // Project 3D point to image + static Point2f project( + const Point3d &point3D, + const Mat &R, const Mat &t, + double fx, double fy, double cx, double cy); +}; + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/include/opencv2/slam/pose.hpp b/modules/slam/include/opencv2/slam/pose.hpp new file mode 100644 index 00000000000..acf36159508 --- /dev/null +++ b/modules/slam/include/opencv2/slam/pose.hpp @@ -0,0 +1,19 @@ +#pragma once +#include +#include + +namespace cv { +namespace vo { + +class PoseEstimator { +public: + PoseEstimator() = default; + // Estimate relative pose from matched normalized image points. Returns true if pose recovered. + bool estimate(const std::vector &pts1, + const std::vector &pts2, + double fx, double fy, double cx, double cy, + Mat &R, Mat &t, Mat &mask, int &inliers); +}; + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/include/opencv2/slam/visualizer.hpp b/modules/slam/include/opencv2/slam/visualizer.hpp new file mode 100644 index 00000000000..22ffe94fd36 --- /dev/null +++ b/modules/slam/include/opencv2/slam/visualizer.hpp @@ -0,0 +1,47 @@ +#pragma once +#include +#include +#include "opencv2/slam/pose.hpp" +#include "opencv2/slam/feature.hpp" +#include "opencv2/slam/matcher.hpp" + +namespace cv { +namespace vo { + +class Tracker { +public: + Tracker(); + // Process a gray image, returns true if a pose was estimated. imgOut contains visualization (matches or keypoints) + bool processFrame(const Mat &gray, const std::string &imagePath, Mat &imgOut, Mat &R_out, Mat &t_out, std::string &info); +private: + FeatureExtractor feat_; + Matcher matcher_; + PoseEstimator poseEst_; + + Mat prevGray_, prevDesc_; + std::vector prevKp_; + int frame_id_; +}; + +class Visualizer { +public: + Visualizer(int W=1000, int H=800, double meters_per_pixel=0.02); + // 更新轨迹(传入 x,z 坐标) + void addPose(double x, double z); + // 返回帧绘制(matches 或 keypoints)到窗口 + void showFrame(const Mat &frame); + // 返回并显示俯视图 + void showTopdown(); + // 保存最终轨迹图像到文件 + bool saveTrajectory(const std::string &path); +private: + int W_, H_; + double meters_per_pixel_; + Size mapSize_; + Mat map_; + std::vector traj_; // 存储 (x,z) + Point worldToPixel(const Point2d &p) const; +}; + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/include/opencv2/slam/vo.hpp b/modules/slam/include/opencv2/slam/vo.hpp new file mode 100644 index 00000000000..9f08e203e3f --- /dev/null +++ b/modules/slam/include/opencv2/slam/vo.hpp @@ -0,0 +1,114 @@ +#pragma once + +#include +#include +#include +#include + +namespace cv { +namespace vo { + +struct CameraIntrinsics { + Mat K; // 3x3 camera matrix + Mat dist; // distortion coefficients (optional) +}; + +struct VOParams { + int minInitInliers = 80; + double ransacProb = 0.999; + double ransacThresh = 1.5; // pixels + double minParallaxDeg = 1.0; + double reprojErrThresh = 3.0; + int pnpMinInliers = 30; + double pnpReprojErr = 3.0; + int keyframeInterval = 10; // frames + double keyframeMinBaseline = 0.05; // relative scale + double keyframeMinRotationDeg = 5.0; + int localWindowSize = 7; // for BA + // Backend parameters + bool enableBackend = false; + int baMaxIters = 50; + // Backend type: 0=g2o (default), 1=opencv_sfm + int backendType = 0; +}; + +class VisualOdometry { +public: + VisualOdometry(const Ptr& feature, + const Ptr& matcher, + const CameraIntrinsics& intrinsics, + const VOParams& params = VOParams()); + + // Process a single frame. Returns true if tracking succeeded. + bool processFrame(const Mat& img, double timestamp); + + // Current camera pose (R|t) as 4x4 SE3 matrix. + Mat getCurrentPose() const; + + // Full trajectory as vector of 4x4 matrices. + std::vector getTrajectory() const; + + // Backend controls + void setEnableBackend(bool on); + void setWindowSize(int N); + void setBAParams(double reprojThresh, int maxIters); + void setBackendType(int type); + + // Loop closure & localization stubs (Milestone 4) + enum Mode { MODE_SLAM, MODE_LOCALIZATION }; + void setMode(Mode m); + bool saveMap(const std::string& path) const; + bool loadMap(const std::string& path); + void enableLoopClosure(bool on); + void setPlaceRecognizer(const Ptr& vocabFeature); + +private: + // internal helpers + bool initializeTwoView(const Mat& img0, const Mat& img1); + bool trackWithPnP(const Mat& img); + bool shouldInsertKeyframe() const; + void triangulateWithLastKeyframe(); + void runLocalBAIfEnabled(); + + // state + Ptr feature_; + Ptr matcher_; + CameraIntrinsics K_; + VOParams params_; + + bool initialized_ = false; + bool backendEnabled_ = false; + int frameCount_ = 0; + Mode mode_ = MODE_SLAM; + bool loopClosureEnabled_ = false; + + // cached + Mat lastImg_; + Mat currentPoseSE3_; // 4x4 + std::vector trajectory_; + + // minimal containers (placeholder, to be integrated with MapManager/KeyFrame) + std::vector mapPoints_; // simple storage for demo + + // keyframe state (latest KF only) + std::vector kf_keypoints_; + Mat kf_descriptors_; + std::vector kf_kp_to_map_; // size == kf_keypoints_.size(), -1 if not mapped + Mat kfPoseSE3_; // KF pose 4x4 + + // current frame cached features + std::vector cur_keypoints_; + Mat cur_descriptors_; + int lastPnPInliers_ = 0; + std::vector> curFeat_to_map_inliers_; // (cur_kp_idx, map_idx) + double unmatchedRatio_ = 0.0; // fraction of matched keyframe features without map points (for KF decision) + + // simple history for sliding window + std::vector keyframePoses_; // poses of historical KFs + std::vector> keyframeKeypoints_; + std::vector keyframeDescriptors_; + std::vector> keyframeKpToMap_; // per-KF mapping from kp idx to map point idx +}; + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/src/data_loader.cpp b/modules/slam/src/data_loader.cpp new file mode 100644 index 00000000000..1488e9a02a0 --- /dev/null +++ b/modules/slam/src/data_loader.cpp @@ -0,0 +1,103 @@ +#include "opencv2/slam/data_loader.hpp" +#include +#include +#include +#include +#include + +namespace cv { +namespace vo { + +DataLoader::DataLoader(const std::string &imageDir) + : currentIndex(0), fx_(700.0), fy_(700.0), cx_(0.5), cy_(0.5) +{ + // 使用 std::filesystem 先检查目录是否存在 + try { + std::filesystem::path p(imageDir); + if(!std::filesystem::exists(p) || !std::filesystem::is_directory(p)){ + std::cerr << "DataLoader: imageDir does not exist or is not a directory: " << imageDir << std::endl; + return; + } + } catch(const std::exception &e){ + std::cerr << "DataLoader: filesystem error checking imageDir: " << e.what() << std::endl; + // fallthrough to try glob + } + + // 使用 glob 列出文件,捕获可能的 OpenCV 异常 + try { + glob(imageDir + "/*", imageFiles, false); + } catch(const Exception &e){ + std::cerr << "DataLoader: glob failed for '" << imageDir << "': " << e.what() << std::endl; + imageFiles.clear(); + return; + } + + if(imageFiles.empty()){ + std::cerr << "DataLoader: no image files found in " << imageDir << std::endl; + return; + } + + // 尝试在 imageDir 或其上一级目录寻找 sensor.yaml + std::filesystem::path p(imageDir); + std::string yaml1 = (p / "sensor.yaml").string(); + std::string yaml2 = (p.parent_path() / "sensor.yaml").string(); + if(!loadIntrinsics(yaml1)){ + loadIntrinsics(yaml2); // best-effort + } +} + +bool DataLoader::loadIntrinsics(const std::string &yamlPath){ + std::ifstream ifs(yamlPath); + if(!ifs.is_open()) return false; + std::string line; + while(std::getline(ifs, line)){ + auto pos = line.find("intrinsics:"); + if(pos != std::string::npos){ + size_t lb = line.find('[', pos); + size_t rb = line.find(']', pos); + std::string nums; + if(lb != std::string::npos && rb != std::string::npos && rb > lb){ + nums = line.substr(lb+1, rb-lb-1); + } else { + std::string rest; + while(std::getline(ifs, rest)){ + nums += rest + " "; + if(rest.find(']') != std::string::npos) break; + } + } + for(char &c: nums) if(c == ',' || c == '[' || c == ']') c = ' '; + std::stringstream ss(nums); + std::vector vals; + double v; + while(ss >> v) vals.push_back(v); + if(vals.size() >= 4){ + fx_ = vals[0]; fy_ = vals[1]; cx_ = vals[2]; cy_ = vals[3]; + std::cerr << "DataLoader: loaded intrinsics from " << yamlPath << std::endl; + return true; + } + } + } + return false; +} + +bool DataLoader::hasNext() const { return currentIndex < imageFiles.size(); } + +size_t DataLoader::size() const { return imageFiles.size(); } + +void DataLoader::reset(){ currentIndex = 0; } + +bool DataLoader::getNextImage(Mat &image, std::string &imagePath){ + if(currentIndex >= imageFiles.size()) return false; + imagePath = imageFiles[currentIndex]; + image = imread(imagePath, IMREAD_UNCHANGED); + if(image.empty()){ + std::cerr << "DataLoader: couldn't read " << imagePath << ", skipping" << std::endl; + currentIndex++; + return getNextImage(image, imagePath); // try next + } + currentIndex++; + return true; +} + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/src/feature.cpp b/modules/slam/src/feature.cpp new file mode 100644 index 00000000000..e2eb7dde507 --- /dev/null +++ b/modules/slam/src/feature.cpp @@ -0,0 +1,167 @@ +#include "opencv2/slam/feature.hpp" +#include +#include +#include +#include + +namespace cv { +namespace vo { + +FeatureExtractor::FeatureExtractor(int nfeatures) + : nfeatures_(nfeatures) +{ + orb_ = ORB::create(nfeatures_); +} + +// Adaptive Non-Maximal Suppression (ANMS) +static void anms(const std::vector &in, std::vector &out, int maxFeatures) +{ + out.clear(); + if(in.empty()) return; + int N = (int)in.size(); + if(maxFeatures <= 0 || N <= maxFeatures){ out = in; return; } + + // For each keypoint, find distance to the nearest keypoint with strictly higher response + std::vector radius(N, std::numeric_limits::infinity()); + for(int i=0;i in[i].response){ + float dx = in[i].pt.x - in[j].pt.x; + float dy = in[i].pt.y - in[j].pt.y; + float d2 = dx*dx + dy*dy; + if(d2 < radius[i]) radius[i] = d2; + } + } + // if no stronger keypoint exists, radius[i] stays INF + } + + // Now pick top maxFeatures by radius (larger radius preferred). If radius==INF, treat as large. + std::vector idx(N); + for(int i=0;i in[b].response; // tie-break by response + if(std::isinf(ra)) return true; + if(std::isinf(rb)) return false; + if(ra == rb) return in[a].response > in[b].response; + return ra > rb; + }); + + int take = std::min(maxFeatures, N); + out.reserve(take); + for(int i=0;i &kps, Mat &desc, + const Mat &prevGray, const std::vector &prevKp, + double flow_lambda) +{ + kps.clear(); desc.release(); + if(image.empty()) return; + + // detect candidates with ORB + std::vector candidates; + orb_->detect(image, candidates); + if(candidates.empty()) return; + + // If no previous-frame info is provided, use simple ANMS + descriptor computation + if(prevGray.empty() || prevKp.empty()){ + std::vector selected; + anms(candidates, selected, nfeatures_); + if(selected.empty()) return; + orb_->compute(image, selected, desc); + kps = std::move(selected); + return; + } + + // Flow-aware scoring path ------------------------------------------------- + // 1) track previous keypoints into current frame to estimate flows + std::vector trackedPts; + std::vector status; + std::vector err; + std::vector trackedFlows; // aligned with prevKp + std::vector prevPts; prevPts.reserve(prevKp.size()); + for(const auto &kp: prevKp) prevPts.push_back(kp.pt); + trackedPts.resize(prevPts.size()); status.resize(prevPts.size()); err.resize(prevPts.size()); + try{ + calcOpticalFlowPyrLK(prevGray, image, prevPts, trackedPts, status, err, Size(21,21), 3, + TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 30, 0.01), 0, 1e-4); + } catch(...) { status.clear(); } + trackedFlows.resize(prevPts.size()); + for(size_t i=0;i scored; scored.reserve(candidates.size()); + for(size_t i=0;i= 0) flow = trackedFlows[besti]; + } + double resp = candidates[i].response; + double norm_flow = diag > 0.0 ? (flow / diag) : flow; + double score = resp * (1.0 + flow_lambda * norm_flow); + scored.push_back({score, flow, (int)i}); + } + + // 3) grid allocation (ORB-style): split into grid and take top per-cell + const int GRID_ROWS = 8; + const int GRID_COLS = 8; + int cellW = std::max(1, image.cols / GRID_COLS); + int cellH = std::max(1, image.rows / GRID_ROWS); + int cellCap = (nfeatures_ + GRID_ROWS*GRID_COLS - 1) / (GRID_ROWS*GRID_COLS); + std::vector> buckets(GRID_ROWS * GRID_COLS); + for(const auto &c: scored){ + const KeyPoint &kp = candidates[c.idx]; + int cx = std::min(GRID_COLS-1, std::max(0, int(kp.pt.x) / cellW)); + int cy = std::min(GRID_ROWS-1, std::max(0, int(kp.pt.y) / cellH)); + buckets[cy*GRID_COLS + cx].push_back(c); + } + + std::vector selected; selected.reserve(nfeatures_); + for(auto &b: buckets){ + if(b.empty()) continue; + std::sort(b.begin(), b.end(), [](const CandScore &a, const CandScore &b){ return a.score > b.score; }); + int take = std::min((int)b.size(), cellCap); + for(int i=0;i all = scored; + std::sort(all.begin(), all.end(), [](const CandScore &a, const CandScore &b){ return a.score > b.score; }); + for(const auto &c: all){ + if((int)selected.size() >= nfeatures_) break; + bool dup = false; + for(const auto &s: selected){ if(norm(s.pt - candidates[c.idx].pt) < 1.0){ dup = true; break; } } + if(!dup) selected.push_back(candidates[c.idx]); + } + } + + if(selected.empty()) return; + orb_->compute(image, selected, desc); + kps = std::move(selected); +} + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/src/initializer.cpp b/modules/slam/src/initializer.cpp new file mode 100644 index 00000000000..2bdd5a99241 --- /dev/null +++ b/modules/slam/src/initializer.cpp @@ -0,0 +1,406 @@ +#include "opencv2/slam/initializer.hpp" +#include +#include +#include + +namespace cv { +namespace vo { + +Initializer::Initializer() {} + +bool Initializer::initialize(const std::vector &kps1, + const std::vector &kps2, + const std::vector &matches, + double fx, double fy, double cx, double cy, + Mat &R, Mat &t, + std::vector &points3D, + std::vector &isTriangulated) { + + if(matches.size() < 50) { + std::cout << "Initializer: too few matches (" << matches.size() << ")" << std::endl; + return false; + } + + // Extract matched points + std::vector pts1, pts2; + pts1.reserve(matches.size()); + pts2.reserve(matches.size()); + + for(const auto &m : matches) { + pts1.push_back(kps1[m.queryIdx].pt); + pts2.push_back(kps2[m.trainIdx].pt); + } + + // Estimate both Homography and Fundamental + std::vector inliersH, inliersF; + Mat H = findHomography(pts1, pts2, RANSAC, 3.0, inliersH, 2000, 0.999); + Mat F = findFundamentalMat(pts1, pts2, FM_RANSAC, 3.0, 0.999, inliersF); + + if(H.empty() || F.empty()) { + std::cout << "Initializer: failed to compute H or F" << std::endl; + return false; + } + + // Compute scores + std::vector inlH, inlF; + float scoreH = computeScore(H, H.inv(), pts1, pts2, inlH, 1.0); + float scoreF = computeScoreF(F, pts1, pts2, inlF, 1.0); + + float ratio = scoreH / (scoreH + scoreF); + + std::cout << "Initializer: H score=" << scoreH << " F score=" << scoreF + << " ratio=" << ratio << std::endl; + + // Decide between H and F + // If ratio > 0.45, scene is likely planar, use H + // Otherwise use F for general scenes + bool useH = (ratio > 0.45); + + Mat R_out, t_out; + std::vector pts3D; + std::vector isTri; + float parallax = 0.0f; + + bool success = false; + if(useH) { + std::cout << "Initializer: using Homography" << std::endl; + success = reconstructH(pts1, pts2, H, fx, fy, cx, cy, R_out, t_out, pts3D, isTri, parallax); + } else { + std::cout << "Initializer: using Fundamental" << std::endl; + success = reconstructF(pts1, pts2, F, fx, fy, cx, cy, R_out, t_out, pts3D, isTri, parallax); + } + + if(!success) { + std::cout << "Initializer: reconstruction failed" << std::endl; + return false; + } + + // Count good triangulated points + int goodCount = 0; + for(bool b : isTri) if(b) goodCount++; + + std::cout << "Initializer: triangulated " << goodCount << "/" << pts3D.size() + << " points, parallax=" << parallax << std::endl; + + // Check quality: need enough good points + if(goodCount < 50) { + std::cout << "Initializer: too few good points (" << goodCount << ")" << std::endl; + return false; + } + + // Check parallax + if(parallax < 1.0f) { + std::cout << "Initializer: insufficient parallax (" << parallax << ")" << std::endl; + return false; + } + + // Success + R = R_out; + t = t_out; + points3D = pts3D; + isTriangulated = isTri; + + std::cout << "Initializer: SUCCESS!" << std::endl; + return true; +} + +bool Initializer::checkParallax(const std::vector &kps1, + const std::vector &kps2, + const std::vector &matches, + double minMedianParallax) { + if(matches.empty()) return false; + + std::vector parallaxes; + parallaxes.reserve(matches.size()); + + for(const auto &m : matches) { + Point2f p1 = kps1[m.queryIdx].pt; + Point2f p2 = kps2[m.trainIdx].pt; + double dx = p2.x - p1.x; + double dy = p2.y - p1.y; + parallaxes.push_back(std::sqrt(dx*dx + dy*dy)); + } + + std::sort(parallaxes.begin(), parallaxes.end()); + double median = parallaxes[parallaxes.size()/2]; + + return median >= minMedianParallax; +} + +bool Initializer::reconstructF(const std::vector &pts1, + const std::vector &pts2, + const Mat &F, + double fx, double fy, double cx, double cy, + Mat &R, Mat &t, + std::vector &points3D, + std::vector &isTriangulated, + float ¶llax) { + + // Compute Essential matrix from F + Mat K = (Mat_(3,3) << fx, 0, cx, 0, fy, cy, 0, 0, 1); + Mat E = K.t() * F * K; + + // Recover pose from E + Mat mask; + int inliers = recoverPose(E, pts1, pts2, K, R, t, mask); + + if(inliers < 30) return false; + + // Triangulate + points3D.resize(pts1.size()); + isTriangulated.resize(pts1.size(), false); + + Mat P1 = Mat::eye(3, 4, CV_64F); + Mat P2(3, 4, CV_64F); + for(int i = 0; i < 3; ++i) { + for(int j = 0; j < 3; ++j) P2.at(i,j) = R.at(i,j); + P2.at(i, 3) = t.at(i, 0); + } + + triangulate(P1, P2, pts1, pts2, points3D); + + // Check quality + std::vector isGood; + int nGood = checkRT(R, t, pts1, pts2, points3D, isGood, fx, fy, cx, cy, parallax); + + isTriangulated = isGood; + return nGood >= 30; +} + +bool Initializer::reconstructH(const std::vector &pts1, + const std::vector &pts2, + const Mat &H, + double fx, double fy, double cx, double cy, + Mat &R, Mat &t, + std::vector &points3D, + std::vector &isTriangulated, + float ¶llax) { + + // Decompose H to get multiple solutions + std::vector Rs, ts, normals; + decomposeH(H, Rs, ts, normals); + + // Try all solutions and pick best + int bestGood = 0; + int bestIdx = -1; + std::vector> allPoints(Rs.size()); + std::vector> allGood(Rs.size()); + std::vector allParallax(Rs.size()); + + for(size_t i = 0; i < Rs.size(); ++i) { + std::vector pts3D; + std::vector isGood; + float par = 0.0f; + + Mat P1 = Mat::eye(3, 4, CV_64F); + Mat P2(3, 4, CV_64F); + for(int r = 0; r < 3; ++r) { + for(int c = 0; c < 3; ++c) P2.at(r,c) = Rs[i].at(r,c); + P2.at(r, 3) = ts[i].at(r, 0); + } + + triangulate(P1, P2, pts1, pts2, pts3D); + int nGood = checkRT(Rs[i], ts[i], pts1, pts2, pts3D, isGood, fx, fy, cx, cy, par); + + allPoints[i] = pts3D; + allGood[i] = isGood; + allParallax[i] = par; + + if(nGood > bestGood) { + bestGood = nGood; + bestIdx = i; + } + } + + if(bestIdx < 0 || bestGood < 30) return false; + + R = Rs[bestIdx]; + t = ts[bestIdx]; + points3D = allPoints[bestIdx]; + isTriangulated = allGood[bestIdx]; + parallax = allParallax[bestIdx]; + + return true; +} + +int Initializer::checkRT(const Mat &R, const Mat &t, + const std::vector &pts1, + const std::vector &pts2, + const std::vector &points3D, + std::vector &isGood, + double fx, double fy, double cx, double cy, + float ¶llax) { + + isGood.resize(points3D.size(), false); + + Mat R1 = Mat::eye(3, 3, CV_64F); + Mat t1 = Mat::zeros(3, 1, CV_64F); + Mat R2 = R; + Mat t2 = t; + + int nGood = 0; + std::vector cosParallaxes; + cosParallaxes.reserve(points3D.size()); + + for(size_t i = 0; i < points3D.size(); ++i) { + const Point3d &p3d = points3D[i]; + + // Check depth in first camera + Mat p3dMat = (Mat_(3,1) << p3d.x, p3d.y, p3d.z); + Mat p1 = R1 * p3dMat + t1; + double z1 = p1.at(2, 0); + + if(z1 <= 0) continue; + + // Check depth in second camera + Mat p2 = R2 * p3dMat + t2; + double z2 = p2.at(2, 0); + + if(z2 <= 0) continue; + + // Check reprojection error in first camera + double u1 = fx * p1.at(0,0)/z1 + cx; + double v1 = fy * p1.at(1,0)/z1 + cy; + double err1 = std::sqrt(std::pow(u1 - pts1[i].x, 2) + std::pow(v1 - pts1[i].y, 2)); + + if(err1 > 4.0) continue; + + // Check reprojection error in second camera + double u2 = fx * p2.at(0,0)/z2 + cx; + double v2 = fy * p2.at(1,0)/z2 + cy; + double err2 = std::sqrt(std::pow(u2 - pts2[i].x, 2) + std::pow(v2 - pts2[i].y, 2)); + + if(err2 > 4.0) continue; + + // Check parallax + Mat normal1 = p3dMat / norm(p3dMat); + Mat normal2 = (p3dMat - t2) / norm(p3dMat - t2); + double cosParallax = normal1.dot(normal2); + + cosParallaxes.push_back(cosParallax); + + isGood[i] = true; + nGood++; + } + + if(!cosParallaxes.empty()) { + std::sort(cosParallaxes.begin(), cosParallaxes.end()); + float medianCos = cosParallaxes[cosParallaxes.size()/2]; + parallax = std::acos(medianCos) * 180.0 / CV_PI; + } + + return nGood; +} + +void Initializer::triangulate(const Mat &P1, const Mat &P2, + const std::vector &pts1, + const std::vector &pts2, + std::vector &points3D) { + + points3D.resize(pts1.size()); + + Mat pts4D; + triangulatePoints(P1, P2, pts1, pts2, pts4D); + + for(int i = 0; i < pts4D.cols; ++i) { + Mat x = pts4D.col(i); + x /= x.at(3, 0); + points3D[i] = Point3d(x.at(0,0), x.at(1,0), x.at(2,0)); + } +} + +void Initializer::decomposeH(const Mat &H, std::vector &Rs, + std::vector &ts, std::vector &normals) { + + Mat H_normalized = H / H.at(2,2); + + std::vector rotations, translations, normalsOut; + int solutions = decomposeHomographyMat(H_normalized, Mat::eye(3,3,CV_64F), + rotations, translations, normalsOut); + + Rs = rotations; + ts = translations; + normals = normalsOut; +} + +float Initializer::computeScore(const Mat &H21, const Mat &H12, + const std::vector &pts1, + const std::vector &pts2, + std::vector &inliersH, + float sigma) { + + const float th = 5.991 * sigma; + inliersH.resize(pts1.size(), false); + + float score = 0.0f; + const float thSq = th * th; + + for(size_t i = 0; i < pts1.size(); ++i) { + // Forward error + Mat p1 = (Mat_(3,1) << pts1[i].x, pts1[i].y, 1.0); + Mat p2pred = H21 * p1; + p2pred /= p2pred.at(2,0); + + float dx = pts2[i].x - p2pred.at(0,0); + float dy = pts2[i].y - p2pred.at(1,0); + float errSq = dx*dx + dy*dy; + + if(errSq < thSq) { + score += thSq - errSq; + } + + // Backward error + Mat p2 = (Mat_(3,1) << pts2[i].x, pts2[i].y, 1.0); + Mat p1pred = H12 * p2; + p1pred /= p1pred.at(2,0); + + dx = pts1[i].x - p1pred.at(0,0); + dy = pts1[i].y - p1pred.at(1,0); + errSq = dx*dx + dy*dy; + + if(errSq < thSq) { + score += thSq - errSq; + inliersH[i] = true; + } + } + + return score; +} + +float Initializer::computeScoreF(const Mat &F21, + const std::vector &pts1, + const std::vector &pts2, + std::vector &inliersF, + float sigma) { + + const float th = 3.841 * sigma; + const float thSq = th * th; + + inliersF.resize(pts1.size(), false); + float score = 0.0f; + + for(size_t i = 0; i < pts1.size(); ++i) { + Mat p1 = (Mat_(3,1) << pts1[i].x, pts1[i].y, 1.0); + Mat p2 = (Mat_(3,1) << pts2[i].x, pts2[i].y, 1.0); + + // Epipolar line in second image + Mat l2 = F21 * p1; + float a2 = l2.at(0,0); + float b2 = l2.at(1,0); + float c2 = l2.at(2,0); + + float num2 = a2*pts2[i].x + b2*pts2[i].y + c2; + float den2 = a2*a2 + b2*b2; + float distSq2 = (num2*num2) / den2; + + if(distSq2 < thSq) { + score += thSq - distSq2; + inliersF[i] = true; + } + } + + return score; +} + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/src/keyframe.cpp b/modules/slam/src/keyframe.cpp new file mode 100644 index 00000000000..e69de29bb2d diff --git a/modules/slam/src/localizer.cpp b/modules/slam/src/localizer.cpp new file mode 100644 index 00000000000..40647f17518 --- /dev/null +++ b/modules/slam/src/localizer.cpp @@ -0,0 +1,157 @@ +#include "opencv2/slam/localizer.hpp" +#include "opencv2/slam/matcher.hpp" +#include +#include +#include +#include +#include + +namespace cv { +namespace vo { + +Localizer::Localizer(float ratio) : ratio_(ratio) {} + +bool Localizer::tryPnP(const MapManager &map, const Mat &desc, const std::vector &kps, + double fx, double fy, double cx, double cy, int imgW, int imgH, + int min_inliers, + Mat &R_out, Mat &t_out, int &inliers_out, + int frame_id, const Mat *image, const std::string &outDir, + int *out_preMatches, int *out_postMatches, double *out_meanReproj) const { + inliers_out = 0; R_out.release(); t_out.release(); + const auto &mappoints = map.mappoints(); + const auto &keyframes = map.keyframes(); + if(mappoints.empty() || keyframes.empty() || desc.empty()) return false; + + // Use last keyframe as prior + const KeyFrame &last = keyframes.back(); + Mat lastR = last.R_w, lastT = last.t_w; + + // select visible candidates + std::vector candidates = map.findVisibleCandidates(lastR, lastT, fx, fy, cx, cy, imgW, imgH); + if(candidates.empty()) return false; + + // gather descriptors from map (prefer mp.descriptor if available) + Mat trainDesc; + std::vector objPts; objPts.reserve(candidates.size()); + std::vector trainMpIds; trainMpIds.reserve(candidates.size()); + for(int idx: candidates){ + const auto &mp = mappoints[idx]; + if(mp.observations.empty()) continue; + // prefer representative descriptor on MapPoint + if(!mp.descriptor.empty()){ + trainDesc.push_back(mp.descriptor.row(0)); + } else { + auto ob = mp.observations.front(); + int kfid = ob.first; int kpidx = ob.second; + int kfIdx = map.keyframeIndex(kfid); + if(kfIdx < 0) continue; + const KeyFrame &kf = keyframes[kfIdx]; + if(kf.desc.empty() || kpidx < 0 || kpidx >= kf.desc.rows) continue; + trainDesc.push_back(kf.desc.row(kpidx)); + } + objPts.emplace_back((float)mp.p.x, (float)mp.p.y, (float)mp.p.z); + trainMpIds.push_back(mp.id); + } + if(trainDesc.empty()) return false; + + // Forward and backward knn to enable mutual cross-check + BFMatcher bf(NORM_HAMMING); + std::vector> knnF, knnB; + bf.knnMatch(desc, trainDesc, knnF, 2); + bf.knnMatch(trainDesc, desc, knnB, 1); + + int preMatches = static_cast(knnF.size()); + if(out_preMatches) *out_preMatches = preMatches; + + // Ratio + mutual + const float RATIO = ratio_; + std::vector goodMatches; + goodMatches.reserve(knnF.size()); + for(size_t q=0;q= 2){ + const DMatch &m1 = knnF[q][1]; + if(m0.distance > RATIO * m1.distance) continue; + } + int trainIdx = m0.trainIdx; + // mutual check: ensure best match of trainIdx points back to this query + if(trainIdx < 0 || trainIdx >= static_cast(knnB.size())) continue; + if(knnB[trainIdx].empty()) continue; + int backIdx = knnB[trainIdx][0].trainIdx; // index in desc + if(backIdx != static_cast(q)) continue; + // passed ratio and mutual + goodMatches.push_back(DMatch(static_cast(q), trainIdx, m0.distance)); + } + + if(out_postMatches) *out_postMatches = static_cast(goodMatches.size()); + + if(goodMatches.size() < static_cast(std::max(6, min_inliers))) return false; + + // build PnP inputs + std::vector obj; std::vector imgpts; obj.reserve(goodMatches.size()); imgpts.reserve(goodMatches.size()); + std::vector matchedMpIds; matchedMpIds.reserve(goodMatches.size()); + for(const auto &m: goodMatches){ + int q = m.queryIdx; int t = m.trainIdx; + if(t < 0 || t >= static_cast(objPts.size()) || q < 0 || q >= static_cast(kps.size())) continue; + obj.push_back(objPts[t]); + imgpts.push_back(kps[q].pt); + matchedMpIds.push_back(trainMpIds[t]); + } + + if(obj.size() < static_cast(std::max(6, min_inliers))) return false; + + Mat camera = (Mat_(3,3) << fx,0,cx, 0,fy,cy, 0,0,1); + Mat dist = Mat::zeros(4,1,CV_64F); + std::vector inliersIdx; + bool ok = solvePnPRansac(obj, imgpts, camera, dist, R_out, t_out, false, + 100, 8.0, 0.99, inliersIdx, SOLVEPNP_ITERATIVE); + if(!ok) return false; + inliers_out = static_cast(inliersIdx.size()); + + // compute mean reprojection error on inliers + double meanReproj = 0.0; + if(!inliersIdx.empty()){ + std::vector proj; + projectPoints(obj, R_out, t_out, camera, dist, proj); + double sum = 0.0; + for(int idx: inliersIdx){ + double e = std::hypot(proj[idx].x - imgpts[idx].x, proj[idx].y - imgpts[idx].y); + sum += e; + } + meanReproj = sum / inliersIdx.size(); + } + if(out_meanReproj) *out_meanReproj = meanReproj; + + // Diagnostics: draw matches and inliers if requested + if(!outDir.empty() && image){ + try{ + std::filesystem::create_directories(outDir); + Mat vis; + if(image->channels() == 1) cvtColor(*image, vis, COLOR_GRAY2BGR); + else vis = image->clone(); + // draw all good matches as small circles; inliers green + for(size_t i=0;i(i)) != inliersIdx.end(); + Scalar col = isInlier ? Scalar(0,255,0) : Scalar(0,0,255); + circle(vis, p, 3, col, 2, LINE_AA); + } + std::ostringstream name; name << outDir << "/pnp_frame_" << frame_id << ".png"; + putText(vis, "pre=" + std::to_string(preMatches) + " post=" + std::to_string(goodMatches.size()) + " inliers=" + std::to_string(inliers_out) + " mean_px=" + std::to_string(meanReproj), + Point(10,20), FONT_HERSHEY_SIMPLEX, 0.6, Scalar(255,255,255), 2); + imwrite(name.str(), vis); + // append a small CSV-like log + std::ofstream logf((outDir + "/pnp_stats.csv"), std::ios::app); + if(logf){ + logf << frame_id << "," << preMatches << "," << goodMatches.size() << "," << inliers_out << "," << meanReproj << "\n"; + logf.close(); + } + } catch(...) { /* don't crash on diagnostics */ } + } + + return inliers_out >= min_inliers; +} + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/src/map.cpp b/modules/slam/src/map.cpp new file mode 100644 index 00000000000..670eaff63ca --- /dev/null +++ b/modules/slam/src/map.cpp @@ -0,0 +1,319 @@ +#include "opencv2/slam/map.hpp" +#include +#include +#include +#include + +namespace cv { +namespace vo { + +MapManager::MapManager() {} + +void MapManager::addKeyFrame(const KeyFrame &kf){ + keyframes_.push_back(kf); + id2idx_[kf.id] = static_cast(keyframes_.size()) - 1; +} + +void MapManager::addMapPoints(const std::vector &pts){ + for(auto p: pts) { + if(p.id <= 0) p.id = next_mappoint_id_++; + mpid2idx_[p.id] = static_cast(mappoints_.size()); + mappoints_.push_back(p); + } +} + +std::vector MapManager::findVisibleCandidates(const Mat &lastR, const Mat &lastT, + double fx, double fy, double cx, double cy, + int imgW, int imgH) const { + std::vector candidates; candidates.reserve(mappoints_.size()); + for(size_t mi=0; mi(3,1) << mp.p.x, mp.p.y, mp.p.z); + Mat Xc = lastR.t() * (Xw - lastT); + double z = Xc.at(2,0); + if(z <= 0) continue; + double u = fx * (Xc.at(0,0)/z) + cx; + double v = fy * (Xc.at(1,0)/z) + cy; + if(u >= 0 && u < imgW && v >= 0 && v < imgH) candidates.push_back((int)mi); + } + return candidates; +} + +std::vector MapManager::triangulateBetweenLastTwo(const std::vector &pts1n, + const std::vector &pts2n, + const std::vector &pts1_kp_idx, + const std::vector &pts2_kp_idx, + const KeyFrame &lastKf, const KeyFrame &curKf, + double fx, double fy, double cx, double cy){ + std::vector newPoints; + if(pts1n.empty() || pts2n.empty()) return newPoints; + // Relative pose from last -> current (we expect lastKf and curKf poses to be in world) + // compute relative transformation + // P1 = [I|0], P2 = [R_rel|t_rel] where R_rel = R_last^{-1} * R_cur, t_rel = R_last^{-1}*(t_cur - t_last) + Mat R_last = lastKf.R_w, t_last = lastKf.t_w; + Mat R_cur = curKf.R_w, t_cur = curKf.t_w; + Mat R_rel = R_last.t() * R_cur; + Mat t_rel = R_last.t() * (t_cur - t_last); + Mat P1 = Mat::eye(3,4,CV_64F); + Mat P2(3,4,CV_64F); + for(int r=0;r<3;++r){ + for(int c=0;c<3;++c) P2.at(r,c) = R_rel.at(r,c); + P2.at(r,3) = t_rel.at(r,0); + } + Mat points4D; + try{ triangulatePoints(P1, P2, pts1n, pts2n, points4D); } + catch(...) { points4D.release(); } + if(points4D.empty()) return newPoints; + Mat p4d64; + if(points4D.type() != CV_64F) points4D.convertTo(p4d64, CV_64F); else p4d64 = points4D; + for(int c=0;c(3,c); + if(std::abs(w) < 1e-8) continue; + double Xx = p4d64.at(0,c)/w; + double Xy = p4d64.at(1,c)/w; + double Xz = p4d64.at(2,c)/w; + if(Xz <= 0) continue; + Mat Xc = (Mat_(3,1) << Xx, Xy, Xz); + Mat Xw = lastKf.R_w * Xc + lastKf.t_w; + MapPoint mp; mp.p = Point3d(Xw.at(0,0), Xw.at(1,0), Xw.at(2,0)); + // compute reprojection error in both views (pixel coords) + // project into last + Mat Xc_last = Xc; + double u1 = fx * (Xc_last.at(0,0)/Xc_last.at(2,0)) + cx; + double v1 = fy * (Xc_last.at(1,0)/Xc_last.at(2,0)) + cy; + // project into current: Xc_cur = R_rel * Xc + t_rel (we computed P2 earlier) + Mat Xc_cur = R_rel * Xc + t_rel; + double u2 = fx * (Xc_cur.at(0,0)/Xc_cur.at(2,0)) + cx; + double v2 = fy * (Xc_cur.at(1,0)/Xc_cur.at(2,0)) + cy; + // obtain observed pixel locations + double obs_u1 = -1, obs_v1 = -1, obs_u2 = -1, obs_v2 = -1; + if(c < static_cast(pts1_kp_idx.size())){ + // pts1n/pts2n are normalized coords; but we were given kp indices - use them if valid + int kp1 = pts1_kp_idx[c]; + if(kp1 >= 0 && kp1 < static_cast(lastKf.kps.size())){ + obs_u1 = lastKf.kps[kp1].pt.x; obs_v1 = lastKf.kps[kp1].pt.y; + } + } + if(c < static_cast(pts2_kp_idx.size())){ + int kp2 = pts2_kp_idx[c]; + if(kp2 >= 0 && kp2 < static_cast(curKf.kps.size())){ + obs_u2 = curKf.kps[kp2].pt.x; obs_v2 = curKf.kps[kp2].pt.y; + } + } + // if we have observed pixel locations, check reprojection error + bool pass = true; + const double MAX_REPROJ_PX = 2.0; + if(obs_u1 >= 0 && obs_v1 >= 0){ + double e1 = std::hypot(u1 - obs_u1, v1 - obs_v1); + if(e1 > MAX_REPROJ_PX) pass = false; + } + if(obs_u2 >= 0 && obs_v2 >= 0){ + double e2 = std::hypot(u2 - obs_u2, v2 - obs_v2); + if(e2 > MAX_REPROJ_PX) pass = false; + } + // parallax check: angle between viewing rays (in last frame) + Mat ray1 = Xc_last / norm(Xc_last); + Mat ray2 = Xc_cur / norm(Xc_cur); + double cos_par = ray1.dot(ray2); + double parallax = std::acos(std::min(1.0, std::max(-1.0, cos_par))); + const double MIN_PARALLAX_RAD = 1.0 * CV_PI / 180.0; // 1 degree + if(parallax < MIN_PARALLAX_RAD) pass = false; + if(!pass) continue; + // attach observations using provided keypoint indices when available + int kp1idx = (c < static_cast(pts1_kp_idx.size())) ? pts1_kp_idx[c] : -1; + int kp2idx = (c < static_cast(pts2_kp_idx.size())) ? pts2_kp_idx[c] : -1; + if(kp1idx >= 0) mp.observations.emplace_back(lastKf.id, kp1idx); + if(kp2idx >= 0) mp.observations.emplace_back(curKf.id, kp2idx); + // assign id + mp.id = next_mappoint_id_++; + mpid2idx_[mp.id] = static_cast(mappoints_.size()) + static_cast(newPoints.size()); + newPoints.push_back(mp); + } + // append to internal map + for(const auto &p: newPoints) mappoints_.push_back(p); + return newPoints; +} + +int MapManager::keyframeIndex(int id) const{ + auto it = id2idx_.find(id); + if(it == id2idx_.end()) return -1; + return it->second; +} + +int MapManager::mapPointIndex(int id) const{ + auto it = mpid2idx_.find(id); + if(it == mpid2idx_.end()) return -1; + return it->second; +} + +void MapManager::applyOptimizedKeyframePose(int keyframeId, const Mat &R, const Mat &t){ + int idx = keyframeIndex(keyframeId); + if(idx < 0 || idx >= static_cast(keyframes_.size())) return; + keyframes_[idx].R_w = R.clone(); + keyframes_[idx].t_w = t.clone(); +} + +void MapManager::applyOptimizedMapPoint(int mappointId, const Point3d &p){ + int idx = mapPointIndex(mappointId); + if(idx < 0 || idx >= static_cast(mappoints_.size())) return; + mappoints_[idx].p = p; +} + +void MapManager::cullBadMapPoints() { + const double MAX_REPROJ_ERROR = 3.0; // pixels + const int MIN_OBSERVATIONS = 2; + const float MIN_FOUND_RATIO = 0.25f; + + for(auto &mp : mappoints_) { + if(mp.isBad) continue; + + // 1. Check observation count + mp.nObs = static_cast(mp.observations.size()); + if(mp.nObs < MIN_OBSERVATIONS) { + mp.isBad = true; + continue; + } + + // 2. Check found ratio (avoid points rarely tracked) + if(mp.nVisible > 10 && mp.getFoundRatio() < MIN_FOUND_RATIO) { + mp.isBad = true; + continue; + } + + // 3. Check reprojection error across observations + // Sample a few keyframes to check reprojection + int errorCount = 0; + int checkCount = 0; + for(const auto &obs : mp.observations) { + int kfId = obs.first; + int kfIdx = keyframeIndex(kfId); + if(kfIdx < 0 || kfIdx >= static_cast(keyframes_.size())) continue; + + const KeyFrame &kf = keyframes_[kfIdx]; + // Use default camera params (should be passed in production code) + double fx = 500.0, fy = 500.0, cx = 320.0, cy = 240.0; + double error = computeReprojError(mp, kf, fx, fy, cx, cy); + + checkCount++; + if(error > MAX_REPROJ_ERROR) { + errorCount++; + } + + // Sample up to 3 observations for efficiency + if(checkCount >= 3) break; + } + + // If majority of samples have high error, mark as bad + if(checkCount > 0 && errorCount > checkCount / 2) { + mp.isBad = true; + } + } + + // Remove bad points + size_t before = mappoints_.size(); + mappoints_.erase( + std::remove_if(mappoints_.begin(), mappoints_.end(), + [](const MapPoint &p) { return p.isBad; }), + mappoints_.end() + ); + size_t after = mappoints_.size(); + + if(before - after > 0) { + std::cout << "MapManager: culled " << (before - after) << " bad map points (" + << after << " remain)" << std::endl; + } +} + +double MapManager::computeReprojError(const MapPoint &mp, const KeyFrame &kf, + double fx, double fy, double cx, double cy) const { + // Transform world point to camera frame + Mat Xw = (Mat_(3,1) << mp.p.x, mp.p.y, mp.p.z); + Mat Xc = kf.R_w.t() * (Xw - kf.t_w); + + double z = Xc.at(2, 0); + if(z <= 0) return std::numeric_limits::max(); + + // Project to image + double u = fx * (Xc.at(0, 0) / z) + cx; + double v = fy * (Xc.at(1, 0) / z) + cy; + + // Find corresponding observation in this keyframe + Point2f observed(-1, -1); + for(const auto &obs : mp.observations) { + if(obs.first == kf.id) { + int kpIdx = obs.second; + if(kpIdx >= 0 && kpIdx < static_cast(kf.kps.size())) { + observed = kf.kps[kpIdx].pt; + break; + } + } + } + + if(observed.x < 0) return std::numeric_limits::max(); + + // Compute reprojection error + double dx = u - observed.x; + double dy = v - observed.y; + return std::sqrt(dx * dx + dy * dy); +} + +void MapManager::updateMapPointDescriptor(MapPoint &mp) { + if(mp.observations.empty()) return; + + // Collect all descriptors from observations + std::vector descriptors; + for(const auto &obs : mp.observations) { + int kfIdx = keyframeIndex(obs.first); + if(kfIdx < 0) continue; + + const KeyFrame &kf = keyframes_[kfIdx]; + int kpIdx = obs.second; + if(kpIdx >= 0 && kpIdx < kf.desc.rows) { + descriptors.push_back(kf.desc.row(kpIdx)); + } + } + + if(descriptors.empty()) return; + + // Compute median descriptor (for binary descriptors, use majority voting per bit) + if(descriptors[0].type() == CV_8U) { + // Binary descriptor (ORB) + int bytes = descriptors[0].cols; + Mat median = Mat::zeros(1, bytes, CV_8U); + + for(int b = 0; b < bytes; ++b) { + int bitCount[8] = {0}; + for(const auto &desc : descriptors) { + uchar byte = desc.at(0, b); + for(int bit = 0; bit < 8; ++bit) { + if(byte & (1 << bit)) bitCount[bit]++; + } + } + + uchar medianByte = 0; + int threshold = descriptors.size() / 2; + for(int bit = 0; bit < 8; ++bit) { + if(bitCount[bit] > threshold) { + medianByte |= (1 << bit); + } + } + median.at(0, b) = medianByte; + } + + mp.descriptor = median; + } else { + // Fallback: use first descriptor + mp.descriptor = descriptors[0].clone(); + } +} + +int MapManager::countGoodMapPoints() const { + int count = 0; + for(const auto &mp : mappoints_) { + if(!mp.isBad) count++; + } + return count; +} + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/src/matcher.cpp b/modules/slam/src/matcher.cpp new file mode 100644 index 00000000000..e0579793d31 --- /dev/null +++ b/modules/slam/src/matcher.cpp @@ -0,0 +1,132 @@ +#include "opencv2/slam/matcher.hpp" + +namespace cv { +namespace vo { + +Matcher::Matcher(float ratio) + : ratio_(ratio), bf_(NORM_HAMMING) +{ +} + +void Matcher::knnMatch(const Mat &desc1, const Mat &desc2, std::vector &goodMatches) +{ + goodMatches.clear(); + if(desc1.empty() || desc2.empty()) return; + std::vector> knn; + bf_.knnMatch(desc1, desc2, knn, 2); + for(size_t i=0;i &kps1, const std::vector &kps2, + std::vector &goodMatches, + int imgCols, int imgRows, + int bucketRows, int bucketCols, int topKPerBucket, + int maxTotal, bool enableMutual, bool enableBucketing) +{ + goodMatches.clear(); + if(desc1.empty() || desc2.empty()) return; + // 1) knn match desc1 -> desc2 and apply ratio test + std::vector> knn12; + bf_.knnMatch(desc1, desc2, knn12, 2); + const int n1 = static_cast(knn12.size()); + + std::vector best12(n1, -1); // best trainIdx for each query if ratio test passed + for(int i=0;i best21; // only filled if enableMutual + int n2 = 0; + if(enableMutual){ + std::vector> knn21; + bf_.knnMatch(desc2, desc1, knn21, 1); + n2 = static_cast(knn21.size()); + best21.assign(n2, -1); + for(int j=0;j 0 ? desc2.rows : 0; + } + + // 3) Collect candidate matches according to whether mutual check is enabled + struct Cand { DMatch m; float x; float y; }; + std::vector candidates; + candidates.reserve(n1); + for(int i=0;i= n2) continue; + if(best21[j] != i) continue; // mutual check + } + // safe distance value from knn12 + float dist = (knn12[i].size() ? knn12[i][0].distance : 0.0f); + DMatch dm(i, j, dist); + // keypoint location on current frame (kps2) + float x = 0, y = 0; + if(j >= 0 && j < (int)kps2.size()){ x = kps2[j].pt.x; y = kps2[j].pt.y; } + candidates.push_back({dm, x, y}); + } + + if(candidates.empty()) return; + + // 4) Spatial bucketing on current-frame locations to promote spatially-distributed matches + std::vector interimMatches; + interimMatches.reserve(candidates.size()); + if(enableBucketing){ + int cols = std::max(1, bucketCols); + int rows = std::max(1, bucketRows); + float cellW = (imgCols > 0) ? (float)imgCols / cols : 1.0f; + float cellH = (imgRows > 0) ? (float)imgRows / rows : 1.0f; + + // buckets: for each cell keep topKPerBucket smallest-distance matches + std::vector> buckets(cols * rows); + for(const auto &c: candidates){ + int bx = 0, by = 0; + if(cellW > 0) bx = std::min(cols - 1, std::max(0, (int)std::floor(c.x / cellW))); + if(cellH > 0) by = std::min(rows - 1, std::max(0, (int)std::floor(c.y / cellH))); + int idx = by * cols + bx; + buckets[idx].push_back(c); + } + + // sort each bucket and take top K + for(auto &vec: buckets){ + if(vec.empty()) continue; + std::sort(vec.begin(), vec.end(), [](const Cand &a, const Cand &b){ return a.m.distance < b.m.distance; }); + int take = std::min((int)vec.size(), topKPerBucket); + for(int i=0;i 0 && (int)interimMatches.size() > maxTotal){ + std::nth_element(interimMatches.begin(), interimMatches.begin() + maxTotal, interimMatches.end(), + [](const DMatch &a, const DMatch &b){ return a.distance < b.distance; }); + interimMatches.resize(maxTotal); + } + + // final sort by distance + std::sort(interimMatches.begin(), interimMatches.end(), [](const DMatch &a, const DMatch &b){ return a.distance < b.distance; }); + goodMatches = std::move(interimMatches); +} + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/src/optimizer.cpp b/modules/slam/src/optimizer.cpp new file mode 100644 index 00000000000..fe138c718cc --- /dev/null +++ b/modules/slam/src/optimizer.cpp @@ -0,0 +1,255 @@ +#include "opencv2/slam/optimizer.hpp" +#include +#include +#include + +// If g2o is enabled (CMake defines USE_G2O), compile and use the g2o-based +// implementation. Otherwise fall back to a simplified OpenCV-based implementation. + +#ifdef USE_G2O +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#else +#include "opencv2/sfm.hpp" +#endif + +#include + +namespace cv { +namespace vo { + +Optimizer::Optimizer() {} + +#ifdef USE_G2O +void Optimizer::localBundleAdjustment( + std::vector &keyframes, + std::vector &mappoints, + const std::vector &localKfIndices, + const std::vector &fixedKfIndices, + double fx, double fy, double cx, double cy, + int iterations) { + + if(localKfIndices.empty()) return; + + typedef g2o::BlockSolver< g2o::BlockSolverTraits<6,3> > Block; + auto linearSolver = std::make_unique>(); + auto blockSolver = std::make_unique(std::move(linearSolver)); + auto solver = new g2o::OptimizationAlgorithmLevenberg(std::move(blockSolver)); + + g2o::SparseOptimizer optimizer; + optimizer.setAlgorithm(solver); + optimizer.setVerbose(false); + + // Camera parameter (id = 0) + g2o::CameraParameters* cam = new g2o::CameraParameters(fx, Eigen::Vector2d(cx, cy), 0); + cam->setId(0); + optimizer.addParameter(cam); + + // Poses + std::map poseVertices; + for (int idx : localKfIndices) { + if (idx < 0 || idx >= static_cast(keyframes.size())) continue; + KeyFrame &kf = keyframes[idx]; + Eigen::Matrix3d R; + cv2eigen(kf.R_w, R); + Eigen::Vector3d t; + cv2eigen(kf.t_w, t); + g2o::SE3Quat pose(R, t); + auto *vSE3 = new g2o::VertexSE3Expmap(); + vSE3->setId(idx); + vSE3->setEstimate(pose); + if (std::find(fixedKfIndices.begin(), fixedKfIndices.end(), idx) != fixedKfIndices.end()) vSE3->setFixed(true); + optimizer.addVertex(vSE3); + poseVertices[idx] = vSE3; + } + + // Points + const int POINT_ID_OFFSET = 10000; + std::vector mpVertexIds(mappoints.size(), -1); + for (size_t i = 0; i < mappoints.size(); ++i) { + const MapPoint &mp = mappoints[i]; + auto *vPoint = new g2o::VertexPointXYZ(); + vPoint->setEstimate(Eigen::Vector3d(mp.p.x, mp.p.y, mp.p.z)); + int vid = POINT_ID_OFFSET + static_cast(i); + vPoint->setId(vid); + vPoint->setMarginalized(true); + optimizer.addVertex(vPoint); + mpVertexIds[i] = vid; + } + + // Observations / edges + for (size_t i = 0; i < mappoints.size(); ++i) { + const MapPoint &mp = mappoints[i]; + int pid = mpVertexIds[i]; + if (pid < 0) continue; + for (const auto &obs : mp.observations) { + int kfIdx = obs.first; + int kpIdx = obs.second; + if (poseVertices.find(kfIdx) == poseVertices.end()) continue; + if (kfIdx < 0 || kfIdx >= static_cast(keyframes.size())) continue; + KeyFrame &kf = keyframes[kfIdx]; + if (kpIdx < 0 || kpIdx >= static_cast(kf.kps.size())) continue; + + const KeyPoint &kp = kf.kps[kpIdx]; + Eigen::Vector2d meas(kp.pt.x, kp.pt.y); + + auto *edge = new g2o::EdgeProjectXYZ2UV(); + edge->setVertex(0, optimizer.vertex(pid)); + edge->setVertex(1, optimizer.vertex(kfIdx)); + edge->setMeasurement(meas); + edge->information() = Eigen::Matrix2d::Identity(); + edge->setParameterId(0, cam->id()); + auto *rk = new g2o::RobustKernelHuber(); + rk->setDelta(1.0); + edge->setRobustKernel(rk); + optimizer.addEdge(edge); + } + } + + optimizer.initializeOptimization(); + optimizer.optimize(iterations); + + // Write back poses + for (auto &kv : poseVertices) { + int idx = kv.first; + g2o::VertexSE3Expmap* v = kv.second; + g2o::SE3Quat est = v->estimate(); + Eigen::Matrix3d R = est.rotation().toRotationMatrix(); + Eigen::Vector3d t = est.translation(); + Mat Rcv, tcv; + eigen2cv(R, Rcv); + eigen2cv(t, tcv); + keyframes[idx].R_w = Rcv.clone(); + keyframes[idx].t_w = tcv.clone(); + } + + // Write back points + for (size_t i = 0; i < mappoints.size(); ++i) { + int pid = mpVertexIds[i]; + if (pid < 0) continue; + auto *v = dynamic_cast(optimizer.vertex(pid)); + if (!v) continue; + Eigen::Vector3d p = v->estimate(); + mappoints[i].p.x = p[0]; + mappoints[i].p.y = p[1]; + mappoints[i].p.z = p[2]; + } +} +#endif +void Optimizer::localBundleAdjustmentSFM( + std::vector &keyframes, + std::vector &mappoints, + const std::vector &localKfIndices, + const std::vector &fixedKfIndices, + double fx, double fy, double cx, double cy, + int iterations) { + //TODO: Implement SFM-based local BA +} + + +bool Optimizer::optimizePose( + KeyFrame &kf, + const std::vector &mappoints, + const std::vector &matchedMpIndices, + double fx, double fy, double cx, double cy, + std::vector &inliers, + int iterations) { + + if(matchedMpIndices.empty()) return false; + inliers.assign(matchedMpIndices.size(), false); + + std::vector objectPoints; + std::vector imagePoints; + for(size_t i = 0; i < matchedMpIndices.size(); ++i) { + int mpIdx = matchedMpIndices[i]; + if(mpIdx < 0 || mpIdx >= static_cast(mappoints.size())) continue; + const MapPoint &mp = mappoints[mpIdx]; + if(mp.isBad) continue; + if(i < kf.kps.size()) { + objectPoints.emplace_back(mp.p.x, mp.p.y, mp.p.z); + imagePoints.push_back(kf.kps[i].pt); + } + } + if(objectPoints.size() < 4) return false; + + Mat K = (Mat_(3,3) << fx, 0, cx, 0, fy, cy, 0, 0, 1); + Mat rvec, tvec, inliersMask; + bool success = solvePnPRansac(objectPoints, imagePoints, K, Mat(), rvec, tvec, + false, iterations, 2.0, 0.99, inliersMask); + if(!success) return false; + Mat R; + Rodrigues(rvec, R); + kf.R_w = R; + kf.t_w = tvec; + for(int i = 0; i < inliersMask.rows && i < static_cast(inliers.size()); ++i) + inliers[i] = (inliersMask.at(i,0) != 0); + + return true; +} + +void Optimizer::globalBundleAdjustmentSFM( + std::vector &keyframes, + std::vector &mappoints, + double fx, double fy, double cx, double cy, + int iterations) { + + std::cout << "Optimizer: Global BA with " << keyframes.size() + << " KFs and " << mappoints.size() << " map points" << std::endl; + std::vector localKfIndices; + for(size_t i = 1; i < keyframes.size(); ++i) localKfIndices.push_back(static_cast(i)); + std::vector fixedKfIndices = {0}; + localBundleAdjustmentSFM(keyframes, mappoints, localKfIndices, fixedKfIndices, fx, fy, cx, cy, iterations); + std::cout << "Optimizer: Global BA completed" << std::endl; +} + +double Optimizer::computeReprojectionError( + const Point3d &point3D, + const Mat &R, const Mat &t, + const Point2f &observed, + double fx, double fy, double cx, double cy, + Mat &jacobianPose, + Mat &jacobianPoint) { + + Mat Xw = (Mat_(3,1) << point3D.x, point3D.y, point3D.z); + Mat Xc = R.t() * (Xw - t); + double x = Xc.at(0,0), y = Xc.at(1,0), z = Xc.at(2,0); + if(z <= 0) return std::numeric_limits::max(); + double u = fx * (x / z) + cx; + double v = fy * (y / z) + cy; + double du = u - observed.x, dv = v - observed.y; + jacobianPose = Mat::zeros(2,6,CV_64F); + jacobianPoint = Mat::zeros(2,3,CV_64F); + double invZ = 1.0 / z; double invZ2 = invZ * invZ; + jacobianPoint.at(0,0) = fx * invZ; + jacobianPoint.at(0,1) = 0; + jacobianPoint.at(0,2) = -fx * x * invZ2; + jacobianPoint.at(1,0) = 0; + jacobianPoint.at(1,1) = fy * invZ; + jacobianPoint.at(1,2) = -fy * y * invZ2; + return std::sqrt(du*du + dv*dv); +} + +Point2f Optimizer::project( + const Point3d &point3D, + const Mat &R, const Mat &t, + double fx, double fy, double cx, double cy) { + + Mat Xw = (Mat_(3,1) << point3D.x, point3D.y, point3D.z); + Mat Xc = R.t() * (Xw - t); + double z = Xc.at(2,0); + if(z <= 0) return Point2f(-1,-1); + float u = static_cast(fx * (Xc.at(0,0) / z) + cx); + float v = static_cast(fy * (Xc.at(1,0) / z) + cy); + return Point2f(u,v); +} + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/src/pose.cpp b/modules/slam/src/pose.cpp new file mode 100644 index 00000000000..728e27f313b --- /dev/null +++ b/modules/slam/src/pose.cpp @@ -0,0 +1,43 @@ +#include "opencv2/slam/pose.hpp" +#include + +namespace cv { +namespace vo { + + bool PoseEstimator::estimate(const std::vector &pts1, + const std::vector &pts2, + double fx, double fy, double cx, double cy, + Mat &R, Mat &t, Mat &mask, int &inliers) +{ + if(pts1.size() < 8 || pts2.size() < 8) { inliers = 0; return false; } + double focal = (fx + fy) * 0.5; + Point2d pp(cx, cy); + // If provided principal point looks invalid (e.g. zeros or tiny values), + // fall back to the approximate center of the matched points' bounding box. + // This is better than leaving (0,0) which can break essential-matrix estimation. + if((pp.x <= 2.0 || pp.y <= 2.0) && !pts1.empty()){ + float minx = std::numeric_limits::max(); + float miny = std::numeric_limits::max(); + float maxx = std::numeric_limits::lowest(); + float maxy = std::numeric_limits::lowest(); + for(size_t i=0;i +#include +#include + +namespace cv { +namespace vo { + +Tracker::Tracker() + : feat_(), matcher_(), poseEst_(), frame_id_(0) +{ +} + +bool Tracker::processFrame(const Mat &gray, const std::string &imagePath, Mat &imgOut, Mat &R_out, Mat &t_out, std::string &info) +{ + if(gray.empty()) return false; + // detect + std::vector kps; + Mat desc; + feat_.detectAndCompute(gray, kps, desc); + + if(!prevGray_.empty() && !prevDesc_.empty() && !desc.empty()){ + // match + std::vector goodMatches; + matcher_.knnMatch(prevDesc_, desc, goodMatches); + + // draw matches for visualization + drawMatches(prevGray_, prevKp_, gray, kps, goodMatches, imgOut, + Scalar::all(-1), Scalar::all(-1), std::vector(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS); + + // prepare points + std::vector pts1, pts2; + for(const auto &m: goodMatches){ + pts1.push_back(prevKp_[m.queryIdx].pt); + pts2.push_back(kps[m.trainIdx].pt); + } + + if(pts1.size() >= 8){ + Mat R, t, mask; + int inliers = 0; + // Note: we don't have intrinsics here; caller should provide via global or arguments. For now, caller will use PoseEstimator directly if needed. + // We'll estimate using default focal/pp later (caller will adapt). Return false for now so caller can invoke PoseEstimator separately. + // But to keep compatibility, leave R_out/t_out empty and set info. + info = "matches=" + std::to_string(goodMatches.size()) + ", inliers=" + std::to_string(inliers); + // update prev buffers below + } else { + info = "matches=" + std::to_string(goodMatches.size()) + ", inliers=0"; + } + } else { + // first frame: draw keypoints + drawKeypoints(gray, kps, imgOut, Scalar::all(-1), DrawMatchesFlags::DRAW_RICH_KEYPOINTS); + info = "first_frame"; + } + + // update prev + prevGray_ = gray.clone(); + prevKp_ = kps; + prevDesc_ = desc.clone(); + frame_id_++; + return true; +} + +Visualizer::Visualizer(int W, int H, double meters_per_pixel) + : W_(W), H_(H), meters_per_pixel_(meters_per_pixel), mapSize_(W,H) +{ + map_ = Mat::zeros(mapSize_, CV_8UC3); +} + +Point Visualizer::worldToPixel(const Point2d &p) const { + Point2d origin(mapSize_.width/2.0, mapSize_.height/2.0); + int x = int(origin.x + p.x / meters_per_pixel_); + int y = int(origin.y - p.y / meters_per_pixel_); + return Point(x,y); +} + +void Visualizer::addPose(double x, double z){ + traj_.emplace_back(x,z); +} + +void Visualizer::showFrame(const Mat &frame){ + if(frame.empty()) return; + // Do not draw heading overlay on video frames; only show raw frame. + imshow("frame", frame); +} + +void Visualizer::showTopdown(){ + map_ = Mat::zeros(mapSize_, CV_8UC3); + for (int gx = 0; gx < mapSize_.width; gx += 50) line(map_, Point(gx,0), Point(gx,mapSize_.height), Scalar(30,30,30), 1); + for (int gy = 0; gy < mapSize_.height; gy += 50) line(map_, Point(0,gy), Point(mapSize_.width,gy), Scalar(30,30,30), 1); + for(size_t i=1;i= 2){ + int K = std::min(5, traj_.size()-1); + double dx = 0.0, dz = 0.0; + for(int i=0;i 1e-6){ + dx /= norm; dz /= norm; + // arrow length in world meters + double arrow_m = 0.5; // 0.5 meters + // tail is behind the current position by arrow_m, head (tip) at current position + Point2d tail_world(traj_.back().x - dx * arrow_m, traj_.back().y - dz * arrow_m); + Point tail_px = worldToPixel(tail_world); + arrowedLine(map_, tail_px, p, Scalar(0,0,255), 2, LINE_AA, 0, 0.3); + } + } + // label near current position + putText(map_, "Robot", p + Point(10,-10), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(255,255,255), 1); + } + imshow("topdown", map_); +} + +bool Visualizer::saveTrajectory(const std::string &path){ + if(map_.empty()) showTopdown(); + return imwrite(path, map_); +} + +} // namespace vo +} // namespace cv \ No newline at end of file diff --git a/modules/slam/src/vo.cpp b/modules/slam/src/vo.cpp new file mode 100644 index 00000000000..439b8bba144 --- /dev/null +++ b/modules/slam/src/vo.cpp @@ -0,0 +1,389 @@ +#include "opencv2/slam/vo.hpp" +#include "opencv2/slam/optimizer.hpp" +#include +#include +#include +#include + +namespace cv { +namespace vo { + +static Mat makeSE3(const Mat& R, const Mat& t) { + Mat T = Mat::eye(4, 4, CV_64F); + R.copyTo(T(Rect(0,0,3,3))); + t.copyTo(T(Rect(3,0,1,3))); + return T; +} + +VisualOdometry::VisualOdometry(const Ptr& feature, + const Ptr& matcher, + const CameraIntrinsics& intrinsics, + const VOParams& params) + : feature_(feature), matcher_(matcher), K_(intrinsics), params_(params) { + currentPoseSE3_ = Mat::eye(4,4,CV_64F); +} + +bool VisualOdometry::processFrame(const Mat& img, double /*timestamp*/) { + frameCount_++; + if (!initialized_) { + if (lastImg_.empty()) { + lastImg_ = img.clone(); + return false; + } + initialized_ = initializeTwoView(lastImg_, img); + lastImg_ = img.clone(); + if (initialized_) trajectory_.push_back(currentPoseSE3_.clone()); + return initialized_; + } + + bool ok = trackWithPnP(img); + lastImg_ = img.clone(); + if (ok) { + trajectory_.push_back(currentPoseSE3_.clone()); + if (shouldInsertKeyframe()) { + // promote current frame to new keyframe using current features and PnP inliers + kfPoseSE3_ = currentPoseSE3_.clone(); + kf_keypoints_ = cur_keypoints_; + kf_descriptors_ = cur_descriptors_.clone(); + kf_kp_to_map_.assign((int)kf_keypoints_.size(), -1); + keyframePoses_.push_back(kfPoseSE3_.clone()); + keyframeKeypoints_.push_back(kf_keypoints_); + keyframeDescriptors_.push_back(kf_descriptors_.clone()); + // seed mapping for tracked correspondences + for (auto &pr : curFeat_to_map_inliers_) { + int curIdx = pr.first; int mpIdx = pr.second; + if (curIdx >=0 && curIdx < (int)kf_kp_to_map_.size()) kf_kp_to_map_[curIdx] = mpIdx; + } + // push kp->map history for BA + keyframeKpToMap_.push_back(kf_kp_to_map_); + triangulateWithLastKeyframe(); + runLocalBAIfEnabled(); + } + } + return ok; +} + +Mat VisualOdometry::getCurrentPose() const { return currentPoseSE3_.clone(); } +std::vector VisualOdometry::getTrajectory() const { return trajectory_; } + +void VisualOdometry::setEnableBackend(bool on) { backendEnabled_ = on; } +void VisualOdometry::setWindowSize(int N) { params_.localWindowSize = N; } +void VisualOdometry::setBAParams(double reprojThresh, int maxIters) { + params_.reprojErrThresh = reprojThresh; + params_.baMaxIters = std::max(1, maxIters); +} +void VisualOdometry::setBackendType(int type) { params_.backendType = type; } +void VisualOdometry::setMode(Mode m) { mode_ = m; } +bool VisualOdometry::saveMap(const std::string& /*path*/) const { return false; } +bool VisualOdometry::loadMap(const std::string& /*path*/) { return false; } +void VisualOdometry::enableLoopClosure(bool on) { loopClosureEnabled_ = on; } +void VisualOdometry::setPlaceRecognizer(const Ptr& /*vocabFeature*/) {} + +bool VisualOdometry::initializeTwoView(const Mat& img0, const Mat& img1) { + std::vector k0, k1; Mat d0, d1; + feature_->detectAndCompute(img0, noArray(), k0, d0); + feature_->detectAndCompute(img1, noArray(), k1, d1); + if (k0.size() < 50 || k1.size() < 50) return false; + + std::vector> knn01; matcher_->knnMatch(d0, d1, knn01, 2); + std::vector good; + for (auto& v : knn01) if (v.size()>=2 && v[0].distance < 0.75 * v[1].distance) good.push_back(v[0]); + if (good.size() < 80) return false; + + std::vector p0, p1; + p0.reserve(good.size()); p1.reserve(good.size()); + for (auto& m : good) { p0.push_back(k0[m.queryIdx].pt); p1.push_back(k1[m.trainIdx].pt); } + + Mat mask; + Mat E = findEssentialMat(p0, p1, K_.K, RANSAC, params_.ransacProb, params_.ransacThresh, mask); + if (E.empty()) return false; + + Mat R, t; + int inliers = recoverPose(E, p0, p1, K_.K, R, t, mask); + if (inliers < params_.minInitInliers) return false; + + // Set first pose = Identity, second = (R,t) + currentPoseSE3_ = makeSE3(R, t); + + // Minimal triangulation demo (no storage of per-point observations yet) + Mat P0 = K_.K * Mat::eye(3,4,CV_64F); + Mat P1(3,4,CV_64F); R.copyTo(P1(Rect(0,0,3,3))); t.copyTo(P1(Rect(3,0,1,3))); P1 = K_.K * P1; + + std::vector tp0, tp1; tp0.reserve(inliers); tp1.reserve(inliers); + std::vector inlier_trainIdx; inlier_trainIdx.reserve(inliers); + for (size_t i=0;i(int(i))) { + tp0.push_back(p0[i]); tp1.push_back(p1[i]); + inlier_trainIdx.push_back(good[i].trainIdx); + } + if (tp0.size() >= 20) { + Mat X4; + triangulatePoints(P0, P1, tp0, tp1, X4); + // initialize KF as second frame + kf_keypoints_ = k1; + kf_descriptors_ = d1.clone(); + kf_kp_to_map_.assign((int)kf_keypoints_.size(), -1); + kfPoseSE3_ = currentPoseSE3_.clone(); + for (int i=0;i 0) { + int mpIndex = (int)mapPoints_.size(); + mapPoints_.push_back(X); + int trainIdx = inlier_trainIdx[i]; + if (trainIdx >=0 && trainIdx < (int)kf_kp_to_map_.size()) + kf_kp_to_map_[trainIdx] = mpIndex; + } + } + } + return true; +} + +bool VisualOdometry::trackWithPnP(const Mat& img) { + cur_keypoints_.clear(); + feature_->detectAndCompute(img, noArray(), cur_keypoints_, cur_descriptors_); + if (kf_descriptors_.empty() || cur_descriptors_.empty()) return false; + + // Match KF -> current + std::vector> knn; + matcher_->knnMatch(kf_descriptors_, cur_descriptors_, knn, 2); + std::vector good; + good.reserve(knn.size()); + for (auto &v : knn) if (v.size()>=2 && v[0].distance < 0.75*v[1].distance) good.push_back(v[0]); + + std::vector objPts; objPts.reserve(good.size()); + std::vector imgPts; imgPts.reserve(good.size()); + std::vector curIdxOfCorr; curIdxOfCorr.reserve(good.size()); + std::vector mapIdxOfCorr; mapIdxOfCorr.reserve(good.size()); + + int unmatchedCount = 0; + for (auto &m : good) { + int kf_idx = m.queryIdx; + int mp_idx = (kf_idx >=0 && kf_idx < (int)kf_kp_to_map_.size()) ? kf_kp_to_map_[kf_idx] : -1; + if (mp_idx >= 0 && mp_idx < (int)mapPoints_.size()) { + const Point3d &Xd = mapPoints_[mp_idx]; + objPts.emplace_back((float)Xd.x,(float)Xd.y,(float)Xd.z); + imgPts.push_back(cur_keypoints_[m.trainIdx].pt); + curIdxOfCorr.push_back(m.trainIdx); + mapIdxOfCorr.push_back(mp_idx); + } else { + unmatchedCount++; + } + } + unmatchedRatio_ = good.empty() ? 0.0 : (double)unmatchedCount / (double)good.size(); + if (objPts.size() < 4) return false; + + Mat rvec, tvec, R; + std::vector inliers; + bool ok = solvePnPRansac(objPts, imgPts, K_.K, K_.dist, rvec, tvec, false, + 100, params_.pnpReprojErr, 0.99, inliers, SOLVEPNP_ITERATIVE); + if (!ok || (int)inliers.size() < std::max(4, params_.pnpMinInliers)) return false; + + Rodrigues(rvec, R); + currentPoseSE3_ = makeSE3(R, tvec); + lastPnPInliers_ = (int)inliers.size(); + + // keep inlier correspondences for KF mapping when inserting a new KF + curFeat_to_map_inliers_.clear(); + curFeat_to_map_inliers_.reserve(inliers.size()); + for (int idx : inliers) { + int curIdx = curIdxOfCorr[idx]; + int mpIdx = mapIdxOfCorr[idx]; + curFeat_to_map_inliers_.emplace_back(curIdx, mpIdx); + } + return true; +} + +bool VisualOdometry::shouldInsertKeyframe() const { + // simple: periodic or tracking weakening + bool periodic = (frameCount_ % params_.keyframeInterval) == 0; + bool weakTracking = (lastPnPInliers_ > 0 && lastPnPInliers_ < params_.pnpMinInliers + 10); + + // baseline & rotation criteria + auto extractRt = [](const Mat& T, Mat& R, Mat& t){ R = T(Rect(0,0,3,3)).clone(); t = T(Rect(3,0,1,3)).clone(); }; + Mat R_prev, t_prev, R_cur, t_cur; extractRt(kfPoseSE3_, R_prev, t_prev); extractRt(currentPoseSE3_, R_cur, t_cur); + double baseline = norm(t_cur - t_prev); + Mat R_delta = R_prev.t() * R_cur; + double traceVal = R_delta.at(0,0) + R_delta.at(1,1) + R_delta.at(2,2); + double rotAngle = std::acos(std::min(1.0, std::max(-1.0, (traceVal - 1.0) / 2.0))) * 180.0 / CV_PI; + bool baselineEnough = baseline > params_.keyframeMinBaseline; + bool rotationEnough = rotAngle > params_.keyframeMinRotationDeg; + + // mapping expansion need: many unmatched candidate features + bool needExpansion = unmatchedRatio_ > 0.5; // heuristic + + return periodic || weakTracking || baselineEnough || rotationEnough || needExpansion; +} + +void VisualOdometry::triangulateWithLastKeyframe() { + if (kf_descriptors_.empty() || cur_descriptors_.empty()) return; + + // Match KF -> current again to find candidates without map points + std::vector> knn; + matcher_->knnMatch(kf_descriptors_, cur_descriptors_, knn, 2); + std::vector kfPts, curPts; + std::vector kfIdxs; + for (auto &v : knn) { + if (v.size()>=2 && v[0].distance < 0.75*v[1].distance) { + int qi = v[0].queryIdx; int ti = v[0].trainIdx; + if (qi >=0 && qi < (int)kf_kp_to_map_.size() && kf_kp_to_map_[qi] == -1) { + kfPts.push_back(kf_keypoints_[qi].pt); + curPts.push_back(cur_keypoints_[ti].pt); + kfIdxs.push_back(qi); + } + } + } + if (kfPts.size() < 15) return; + + auto extractRt = [](const Mat& T, Mat& R, Mat& t){ + R = T(Rect(0,0,3,3)).clone(); + t = T(Rect(3,0,1,3)).clone(); + }; + Mat R1,t1,R2,t2; extractRt(kfPoseSE3_, R1,t1); extractRt(currentPoseSE3_, R2,t2); + + Mat P1(3,4,CV_64F), P2(3,4,CV_64F); + R1.copyTo(P1(Rect(0,0,3,3))); t1.copyTo(P1(Rect(3,0,1,3))); + R2.copyTo(P2(Rect(0,0,3,3))); t2.copyTo(P2(Rect(3,0,1,3))); + P1 = K_.K * P1; P2 = K_.K * P2; + + Mat X4; triangulatePoints(P1, P2, kfPts, curPts, X4); + + auto reprojErr = [&](const Point3d& X, const Mat& R, const Mat& t, const Point2f& uv){ + Mat Xv = (Mat_(3,1) << X.x,X.y,X.z); + Mat x = K_.K * (R*Xv + t); + double u = x.at(0)/x.at(2); + double v = x.at(1)/x.at(2); + double du = u - uv.x; double dv = v - uv.y; return std::sqrt(du*du+dv*dv); + }; + + for (int i=0;i(0,i); + double hy = X4.at(1,i); + double hz = X4.at(2,i); + double hw = X4.at(3,i); + if (std::abs(hw) < 1e-8) continue; + Point3d X(hx/hw, hy/hw, hz/hw); + // positive depth check (in both views) + Mat R1_,t1_,R2_,t2_; + R1_ = R1; t1_ = t1; R2_ = R2; t2_ = t2; + Mat Xv = (Mat_(3,1) << X.x,X.y,X.z); + Mat Y1 = R1_*Xv + t1_; + Mat Y2 = R2_*Xv + t2_; + double z1 = Y1.ptr(2)[0]; + double z2 = Y2.ptr(2)[0]; + if (z1 <= 0 || z2 <= 0) continue; + double e1 = reprojErr(X, R1, t1, kfPts[i]); + double e2 = reprojErr(X, R2, t2, curPts[i]); + if (e1 > params_.reprojErrThresh || e2 > params_.reprojErrThresh) continue; + int mpIndex = (int)mapPoints_.size(); + mapPoints_.push_back(X); + int kf_kp = kfIdxs[i]; + if (kf_kp >=0 && kf_kp < (int)kf_kp_to_map_.size()) kf_kp_to_map_[kf_kp] = mpIndex; + } +} + +void VisualOdometry::runLocalBAIfEnabled() { + if (!backendEnabled_) return; + // Collect recent keyframes and build observations for map points + int N = std::max(2, params_.localWindowSize); + int totalKFs = (int)keyframePoses_.size(); + if (totalKFs < 2) return; + int startIdx = std::max(0, totalKFs - N); + + // Build KeyFrame vector for optimizer + std::vector kfs; + kfs.reserve(totalKFs - startIdx); + for (int i = startIdx; i < totalKFs; ++i) { + KeyFrame kf; + kf.id = i; // use history index as id + kf.kps = keyframeKeypoints_[i]; + kf.desc = keyframeDescriptors_[i]; + // extract R,t from SE3 4x4 + const Mat &T = keyframePoses_[i]; + Mat R = T(Rect(0,0,3,3)).clone(); + Mat t = T(Rect(3,0,1,3)).clone(); + kf.R_w = R; kf.t_w = t; + kfs.push_back(kf); + } + + // Build MapPoint vector with observations from selected window + std::vector mps; + mps.reserve(mapPoints_.size()); + // Track which points are observed within the window + std::vector pointObserved(mapPoints_.size(), 0); + for (int i = startIdx; i < totalKFs; ++i) { + const auto &kp2mp = keyframeKpToMap_[i]; + for (int kpIdx = 0; kpIdx < (int)kp2mp.size(); ++kpIdx) { + int mpIdx = kp2mp[kpIdx]; + if (mpIdx >= 0 && mpIdx < (int)mapPoints_.size()) { + pointObserved[mpIdx] = 1; + } + } + } + // Create mps with observations + for (size_t mpIdx = 0; mpIdx < mapPoints_.size(); ++mpIdx) { + if (!pointObserved[mpIdx]) continue; // only optimize observed points + MapPoint mp; + mp.id = (int)mpIdx; + mp.p = mapPoints_[mpIdx]; + for (int i = startIdx; i < totalKFs; ++i) { + const auto &kp2mp = keyframeKpToMap_[i]; + if ((int)kp2mp.size() == 0) continue; + for (int kpIdx = 0; kpIdx < (int)kp2mp.size(); ++kpIdx) { + if (kp2mp[kpIdx] == (int)mpIdx) { + mp.observations.emplace_back(i, kpIdx); + } + } + } + if (!mp.observations.empty()) mps.push_back(mp); + } + + if (kfs.size() < 2 || mps.empty()) return; + + // Local/fixed KF indices relative to full history ids + std::vector localKfIndices; + localKfIndices.reserve(kfs.size()); + for (const auto &kf : kfs) localKfIndices.push_back(kf.id); + // Fix the oldest KF in the window to anchor optimization + std::vector fixedKfIndices = { startIdx }; + + // Select backend + + if (params_.backendType == 0) { + #ifndef USE_G2O + CV_Error(Error::StsBadArg, "G2O backend is not available (not built with SLAM module)"); + #else + Optimizer::localBundleAdjustment( + kfs, mps, localKfIndices, fixedKfIndices, + K_.K.at(0,0), K_.K.at(1,1), K_.K.at(0,2), K_.K.at(1,2), + params_.baMaxIters); + #endif + } else + { + Optimizer::localBundleAdjustmentSFM( + kfs, mps, localKfIndices, fixedKfIndices, + K_.K.at(0,0), K_.K.at(1,1), K_.K.at(0,2), K_.K.at(1,2), + params_.baMaxIters); + } + + // Write back optimized poses and points + for (const auto &kf : kfs) { + int i = kf.id; + Mat T = Mat::eye(4,4,CV_64F); + kf.R_w.copyTo(T(Rect(0,0,3,3))); + kf.t_w.copyTo(T(Rect(3,0,1,3))); + if (i >= 0 && i < totalKFs) keyframePoses_[i] = T.clone(); + if (i == totalKFs - 1) { // also update current KF pose cache + kfPoseSE3_ = T.clone(); + currentPoseSE3_ = T.clone(); + } + } + for (const auto &mp : mps) { + if (mp.id >= 0 && mp.id < (int)mapPoints_.size()) { + mapPoints_[mp.id] = mp.p; + } + } +} + +} // namespace vo +} // namespace cv \ No newline at end of file From f4a0958ff411507ba8215ed11b1d2135c6d237b9 Mon Sep 17 00:00:00 2001 From: Xuanrui Zhu <634210954@qq.com> Date: Thu, 4 Dec 2025 03:50:54 +0000 Subject: [PATCH 03/29] basic VO --- modules/slam/include/opencv2/slam/vo.hpp | 116 +--- modules/slam/src/optimizer.cpp | 114 +++- modules/slam/src/vo.cpp | 831 +++++++++++++---------- 3 files changed, 606 insertions(+), 455 deletions(-) diff --git a/modules/slam/include/opencv2/slam/vo.hpp b/modules/slam/include/opencv2/slam/vo.hpp index 9f08e203e3f..31681ed1a69 100644 --- a/modules/slam/include/opencv2/slam/vo.hpp +++ b/modules/slam/include/opencv2/slam/vo.hpp @@ -1,114 +1,32 @@ #pragma once - #include #include -#include +#include #include namespace cv { namespace vo { -struct CameraIntrinsics { - Mat K; // 3x3 camera matrix - Mat dist; // distortion coefficients (optional) -}; - -struct VOParams { - int minInitInliers = 80; - double ransacProb = 0.999; - double ransacThresh = 1.5; // pixels - double minParallaxDeg = 1.0; - double reprojErrThresh = 3.0; - int pnpMinInliers = 30; - double pnpReprojErr = 3.0; - int keyframeInterval = 10; // frames - double keyframeMinBaseline = 0.05; // relative scale - double keyframeMinRotationDeg = 5.0; - int localWindowSize = 7; // for BA - // Backend parameters - bool enableBackend = false; - int baMaxIters = 50; - // Backend type: 0=g2o (default), 1=opencv_sfm - int backendType = 0; +struct VisualOdometryOptions { + int min_matches = 15; + int min_inliers = 4; + double min_inlier_ratio = 0.1; + double diff_zero_thresh = 2.0; + double flow_zero_thresh = 0.3; + double min_translation_norm = 1e-4; + double min_rotation_rad = 0.5 * CV_PI / 180.0; + int max_matches_keep = 500; + double flow_weight_lambda = 5.0; }; class VisualOdometry { public: - VisualOdometry(const Ptr& feature, - const Ptr& matcher, - const CameraIntrinsics& intrinsics, - const VOParams& params = VOParams()); - - // Process a single frame. Returns true if tracking succeeded. - bool processFrame(const Mat& img, double timestamp); - - // Current camera pose (R|t) as 4x4 SE3 matrix. - Mat getCurrentPose() const; - - // Full trajectory as vector of 4x4 matrices. - std::vector getTrajectory() const; - - // Backend controls - void setEnableBackend(bool on); - void setWindowSize(int N); - void setBAParams(double reprojThresh, int maxIters); - void setBackendType(int type); - - // Loop closure & localization stubs (Milestone 4) - enum Mode { MODE_SLAM, MODE_LOCALIZATION }; - void setMode(Mode m); - bool saveMap(const std::string& path) const; - bool loadMap(const std::string& path); - void enableLoopClosure(bool on); - void setPlaceRecognizer(const Ptr& vocabFeature); - + VisualOdometry(cv::Ptr feature, cv::Ptr matcher); + int run(const std::string &imageDir, double scale_m = 1.0, const VisualOdometryOptions &options = VisualOdometryOptions()); private: - // internal helpers - bool initializeTwoView(const Mat& img0, const Mat& img1); - bool trackWithPnP(const Mat& img); - bool shouldInsertKeyframe() const; - void triangulateWithLastKeyframe(); - void runLocalBAIfEnabled(); - - // state - Ptr feature_; - Ptr matcher_; - CameraIntrinsics K_; - VOParams params_; - - bool initialized_ = false; - bool backendEnabled_ = false; - int frameCount_ = 0; - Mode mode_ = MODE_SLAM; - bool loopClosureEnabled_ = false; - - // cached - Mat lastImg_; - Mat currentPoseSE3_; // 4x4 - std::vector trajectory_; - - // minimal containers (placeholder, to be integrated with MapManager/KeyFrame) - std::vector mapPoints_; // simple storage for demo - - // keyframe state (latest KF only) - std::vector kf_keypoints_; - Mat kf_descriptors_; - std::vector kf_kp_to_map_; // size == kf_keypoints_.size(), -1 if not mapped - Mat kfPoseSE3_; // KF pose 4x4 - - // current frame cached features - std::vector cur_keypoints_; - Mat cur_descriptors_; - int lastPnPInliers_ = 0; - std::vector> curFeat_to_map_inliers_; // (cur_kp_idx, map_idx) - double unmatchedRatio_ = 0.0; // fraction of matched keyframe features without map points (for KF decision) - - // simple history for sliding window - std::vector keyframePoses_; // poses of historical KFs - std::vector> keyframeKeypoints_; - std::vector keyframeDescriptors_; - std::vector> keyframeKpToMap_; // per-KF mapping from kp idx to map point idx + cv::Ptr feature_; + cv::Ptr matcher_; }; -} // namespace vo -} // namespace cv \ No newline at end of file +} +} // namespace cv::vo \ No newline at end of file diff --git a/modules/slam/src/optimizer.cpp b/modules/slam/src/optimizer.cpp index fe138c718cc..51b438357d6 100644 --- a/modules/slam/src/optimizer.cpp +++ b/modules/slam/src/optimizer.cpp @@ -1,6 +1,8 @@ #include "opencv2/slam/optimizer.hpp" #include #include +#include +#include #include // If g2o is enabled (CMake defines USE_G2O), compile and use the g2o-based @@ -151,7 +153,117 @@ void Optimizer::localBundleAdjustmentSFM( const std::vector &fixedKfIndices, double fx, double fy, double cx, double cy, int iterations) { - //TODO: Implement SFM-based local BA + // Simple SFM-based local bundle adjustment using OpenCV routines. + // This implementation alternates: + // - point-only Gauss-Newton updates (poses kept fixed) + // - pose-only optimization using PnP (points kept fixed) + // It uses available jacobians w.r.t. point coordinates from + // computeReprojectionError and the existing optimizePose() helper + // to avoid depending on g2o / ceres. This provides an effective + // local refinement for SLAM windows when full BA backends are + // not available. + + if (localKfIndices.empty() || mappoints.empty()) return; + + // Fast lookup for whether a keyframe id is in the local window + std::unordered_set localSet(localKfIndices.begin(), localKfIndices.end()); + std::unordered_set fixedSet(fixedKfIndices.begin(), fixedKfIndices.end()); + + const double HUBER_DELTA = 3.0; // pixels + const double MAX_POINT_STEP = 1.0; // meters (safeguard per-iter) + + // Outer iterations: alternate point updates and pose updates + for (int iter = 0; iter < iterations; ++iter) { + // --- Point-only Gauss-Newton update (poses fixed) --- + for (size_t pid = 0; pid < mappoints.size(); ++pid) { + MapPoint &mp = mappoints[pid]; + if (mp.isBad) continue; + + Mat JtJ = Mat::zeros(3, 3, CV_64F); + Mat Jtr = Mat::zeros(3, 1, CV_64F); + int obsCount = 0; + + for (const auto &obs : mp.observations) { + int kfIdx = obs.first; + int kpIdx = obs.second; + if (localSet.find(kfIdx) == localSet.end()) continue; + if (kfIdx < 0 || kfIdx >= static_cast(keyframes.size())) continue; + KeyFrame &kf = keyframes[kfIdx]; + if (kpIdx < 0 || kpIdx >= static_cast(kf.kps.size())) continue; + + Point2f observed = kf.kps[kpIdx].pt; + // Project current point into this keyframe + Point2f proj = project(mp.p, kf.R_w, kf.t_w, fx, fy, cx, cy); + if (proj.x < 0 || proj.y < 0) continue; + // Compute jacobians (fills jacobianPoint) + Mat jacPose, jacPoint; + computeReprojectionError(mp.p, kf.R_w, kf.t_w, observed, fx, fy, cx, cy, jacPose, jacPoint); + // jacPoint is 2x3, residual r = [u - u_obs; v - v_obs] + Mat r = (Mat_(2,1) << proj.x - observed.x, proj.y - observed.y); + + // robust weight (Huber) + double err = std::sqrt(r.at(0,0)*r.at(0,0) + r.at(1,0)*r.at(1,0)); + double w = 1.0; + if (err > HUBER_DELTA) w = HUBER_DELTA / err; + + // accumulate JtJ and Jtr + Mat Jt = jacPoint.t(); // 3x2 + Mat W = Mat::eye(2,2,CV_64F) * w; + Mat JtW = Jt * W; // 3x2 + JtJ += JtW * jacPoint; // 3x3 + Jtr += JtW * r; // 3x1 + obsCount++; + } + + if (obsCount < 2) continue; // not enough constraints + + // Solve for delta using SVD for stability + Mat delta; // 3x1 + bool solved = false; + if (cv::determinant(JtJ) != 0) { + solved = cv::solve(JtJ, -Jtr, delta, cv::DECOMP_SVD); + } else { + solved = cv::solve(JtJ, -Jtr, delta, cv::DECOMP_SVD); + } + if (!solved) continue; + + double step = std::sqrt(delta.at(0,0)*delta.at(0,0) + + delta.at(1,0)*delta.at(1,0) + + delta.at(2,0)*delta.at(2,0)); + if (step > MAX_POINT_STEP) { + double scale = MAX_POINT_STEP / step; + delta *= scale; + } + + mp.p.x += delta.at(0,0); + mp.p.y += delta.at(1,0); + mp.p.z += delta.at(2,0); + } + + // --- Pose-only optimization (points fixed) --- + // For each local keyframe that is not fixed, call optimizePose + for (int kfId : localKfIndices) { + if (fixedSet.find(kfId) != fixedSet.end()) continue; + if (kfId < 0 || kfId >= static_cast(keyframes.size())) continue; + KeyFrame &kf = keyframes[kfId]; + // Build matchedMpIndices: for each keypoint in this KF, which mappoint index it corresponds to + std::vector matchedMpIndices(kf.kps.size(), -1); + for (size_t mpIdx = 0; mpIdx < mappoints.size(); ++mpIdx) { + const MapPoint &mp = mappoints[mpIdx]; + if (mp.isBad) continue; + for (const auto &obs : mp.observations) { + if (obs.first == kfId) { + int kpIdx = obs.second; + if (kpIdx >= 0 && kpIdx < static_cast(matchedMpIndices.size())) + matchedMpIndices[kpIdx] = static_cast(mpIdx); + } + } + } + std::vector inliers; + // Use the same number of iterations as outer loop for RANSAC attempts + optimizePose(kf, mappoints, matchedMpIndices, fx, fy, cx, cy, inliers, std::max(20, iterations)); + } + } } diff --git a/modules/slam/src/vo.cpp b/modules/slam/src/vo.cpp index 439b8bba144..0bd730ac51f 100644 --- a/modules/slam/src/vo.cpp +++ b/modules/slam/src/vo.cpp @@ -1,388 +1,509 @@ -#include "opencv2/slam/vo.hpp" +#include "opencv2/slam/data_loader.hpp" +#include "opencv2/slam/feature.hpp" +#include "opencv2/slam/initializer.hpp" +#include "opencv2/slam/keyframe.hpp" +#include "opencv2/slam/localizer.hpp" +#include "opencv2/slam/map.hpp" +#include "opencv2/slam/matcher.hpp" #include "opencv2/slam/optimizer.hpp" +#include "opencv2/slam/pose.hpp" +#include "opencv2/slam/visualizer.hpp" +#include "opencv2/slam/vo.hpp" +#include +#include +#include +#include +#include +#include #include #include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include namespace cv { namespace vo { -static Mat makeSE3(const Mat& R, const Mat& t) { - Mat T = Mat::eye(4, 4, CV_64F); - R.copyTo(T(Rect(0,0,3,3))); - t.copyTo(T(Rect(3,0,1,3))); - return T; +VisualOdometry::VisualOdometry(Ptr feature, Ptr matcher) + : feature_(std::move(feature)), matcher_(std::move(matcher)) { } -VisualOdometry::VisualOdometry(const Ptr& feature, - const Ptr& matcher, - const CameraIntrinsics& intrinsics, - const VOParams& params) - : feature_(feature), matcher_(matcher), K_(intrinsics), params_(params) { - currentPoseSE3_ = Mat::eye(4,4,CV_64F); -} - -bool VisualOdometry::processFrame(const Mat& img, double /*timestamp*/) { - frameCount_++; - if (!initialized_) { - if (lastImg_.empty()) { - lastImg_ = img.clone(); - return false; - } - initialized_ = initializeTwoView(lastImg_, img); - lastImg_ = img.clone(); - if (initialized_) trajectory_.push_back(currentPoseSE3_.clone()); - return initialized_; +int VisualOdometry::run(const std::string &imageDir, double scale_m, const VisualOdometryOptions &options){ + DataLoader loader(imageDir); + std::cout << "VisualOdometry: loaded " << loader.size() << " images from " << imageDir << std::endl; + if(loader.size() == 0){ + std::cerr << "VisualOdometry: no images found in " << imageDir << std::endl; + return -1; } - bool ok = trackWithPnP(img); - lastImg_ = img.clone(); - if (ok) { - trajectory_.push_back(currentPoseSE3_.clone()); - if (shouldInsertKeyframe()) { - // promote current frame to new keyframe using current features and PnP inliers - kfPoseSE3_ = currentPoseSE3_.clone(); - kf_keypoints_ = cur_keypoints_; - kf_descriptors_ = cur_descriptors_.clone(); - kf_kp_to_map_.assign((int)kf_keypoints_.size(), -1); - keyframePoses_.push_back(kfPoseSE3_.clone()); - keyframeKeypoints_.push_back(kf_keypoints_); - keyframeDescriptors_.push_back(kf_descriptors_.clone()); - // seed mapping for tracked correspondences - for (auto &pr : curFeat_to_map_inliers_) { - int curIdx = pr.first; int mpIdx = pr.second; - if (curIdx >=0 && curIdx < (int)kf_kp_to_map_.size()) kf_kp_to_map_[curIdx] = mpIdx; - } - // push kp->map history for BA - keyframeKpToMap_.push_back(kf_kp_to_map_); - triangulateWithLastKeyframe(); - runLocalBAIfEnabled(); - } + if(!feature_){ + feature_ = ORB::create(2000); } - return ok; -} - -Mat VisualOdometry::getCurrentPose() const { return currentPoseSE3_.clone(); } -std::vector VisualOdometry::getTrajectory() const { return trajectory_; } - -void VisualOdometry::setEnableBackend(bool on) { backendEnabled_ = on; } -void VisualOdometry::setWindowSize(int N) { params_.localWindowSize = N; } -void VisualOdometry::setBAParams(double reprojThresh, int maxIters) { - params_.reprojErrThresh = reprojThresh; - params_.baMaxIters = std::max(1, maxIters); -} -void VisualOdometry::setBackendType(int type) { params_.backendType = type; } -void VisualOdometry::setMode(Mode m) { mode_ = m; } -bool VisualOdometry::saveMap(const std::string& /*path*/) const { return false; } -bool VisualOdometry::loadMap(const std::string& /*path*/) { return false; } -void VisualOdometry::enableLoopClosure(bool on) { loopClosureEnabled_ = on; } -void VisualOdometry::setPlaceRecognizer(const Ptr& /*vocabFeature*/) {} - -bool VisualOdometry::initializeTwoView(const Mat& img0, const Mat& img1) { - std::vector k0, k1; Mat d0, d1; - feature_->detectAndCompute(img0, noArray(), k0, d0); - feature_->detectAndCompute(img1, noArray(), k1, d1); - if (k0.size() < 50 || k1.size() < 50) return false; - - std::vector> knn01; matcher_->knnMatch(d0, d1, knn01, 2); - std::vector good; - for (auto& v : knn01) if (v.size()>=2 && v[0].distance < 0.75 * v[1].distance) good.push_back(v[0]); - if (good.size() < 80) return false; - - std::vector p0, p1; - p0.reserve(good.size()); p1.reserve(good.size()); - for (auto& m : good) { p0.push_back(k0[m.queryIdx].pt); p1.push_back(k1[m.trainIdx].pt); } - - Mat mask; - Mat E = findEssentialMat(p0, p1, K_.K, RANSAC, params_.ransacProb, params_.ransacThresh, mask); - if (E.empty()) return false; - - Mat R, t; - int inliers = recoverPose(E, p0, p1, K_.K, R, t, mask); - if (inliers < params_.minInitInliers) return false; - - // Set first pose = Identity, second = (R,t) - currentPoseSE3_ = makeSE3(R, t); - - // Minimal triangulation demo (no storage of per-point observations yet) - Mat P0 = K_.K * Mat::eye(3,4,CV_64F); - Mat P1(3,4,CV_64F); R.copyTo(P1(Rect(0,0,3,3))); t.copyTo(P1(Rect(3,0,1,3))); P1 = K_.K * P1; - - std::vector tp0, tp1; tp0.reserve(inliers); tp1.reserve(inliers); - std::vector inlier_trainIdx; inlier_trainIdx.reserve(inliers); - for (size_t i=0;i(int(i))) { - tp0.push_back(p0[i]); tp1.push_back(p1[i]); - inlier_trainIdx.push_back(good[i].trainIdx); + if(!matcher_){ + matcher_ = DescriptorMatcher::create(DescriptorMatcher::BRUTEFORCE_HAMMING); } - if (tp0.size() >= 20) { - Mat X4; - triangulatePoints(P0, P1, tp0, tp1, X4); - // initialize KF as second frame - kf_keypoints_ = k1; - kf_descriptors_ = d1.clone(); - kf_kp_to_map_.assign((int)kf_keypoints_.size(), -1); - kfPoseSE3_ = currentPoseSE3_.clone(); - for (int i=0;i 0) { - int mpIndex = (int)mapPoints_.size(); - mapPoints_.push_back(X); - int trainIdx = inlier_trainIdx[i]; - if (trainIdx >=0 && trainIdx < (int)kf_kp_to_map_.size()) - kf_kp_to_map_[trainIdx] = mpIndex; - } - } + // Remove internal FeatureExtractor/Matcher in favor of injected OpenCV components + PoseEstimator poseEst; + Visualizer vis; + MapManager map; + // two-view initializer + Initializer initializer; + // configure Localizer with a slightly stricter Lowe ratio (0.7) + Localizer localizer(0.7f); + + // prepare per-run CSV diagnostics + std::string runTimestamp; + { + auto now = std::chrono::system_clock::now(); + std::time_t t = std::chrono::system_clock::to_time_t(now); + std::tm tm = *std::localtime(&t); + std::ostringstream ss; ss << std::put_time(&tm, "%Y%m%d_%H%M%S"); + runTimestamp = ss.str(); + } + std::filesystem::path resultDir("../../result"); + if(!std::filesystem::exists(resultDir)) std::filesystem::create_directories(resultDir); + // create a per-run folder under result/ named by timestamp + std::filesystem::path runDir = resultDir / runTimestamp; + if(!std::filesystem::exists(runDir)) std::filesystem::create_directories(runDir); + std::filesystem::path csvPath = runDir / std::string("run.csv"); + std::ofstream csv(csvPath); + if(csv){ + csv << "frame_id,mean_diff,median_flow,pre_matches,post_matches,inliers,inlier_ratio,integrated\n"; + csv.flush(); + std::cout << "Writing diagnostics to " << csvPath.string() << std::endl; + } else { + std::cerr << "Failed to open diagnostics CSV " << csvPath.string() << std::endl; } - return true; -} -bool VisualOdometry::trackWithPnP(const Mat& img) { - cur_keypoints_.clear(); - feature_->detectAndCompute(img, noArray(), cur_keypoints_, cur_descriptors_); - if (kf_descriptors_.empty() || cur_descriptors_.empty()) return false; - - // Match KF -> current - std::vector> knn; - matcher_->knnMatch(kf_descriptors_, cur_descriptors_, knn, 2); - std::vector good; - good.reserve(knn.size()); - for (auto &v : knn) if (v.size()>=2 && v[0].distance < 0.75*v[1].distance) good.push_back(v[0]); - - std::vector objPts; objPts.reserve(good.size()); - std::vector imgPts; imgPts.reserve(good.size()); - std::vector curIdxOfCorr; curIdxOfCorr.reserve(good.size()); - std::vector mapIdxOfCorr; mapIdxOfCorr.reserve(good.size()); - - int unmatchedCount = 0; - for (auto &m : good) { - int kf_idx = m.queryIdx; - int mp_idx = (kf_idx >=0 && kf_idx < (int)kf_kp_to_map_.size()) ? kf_kp_to_map_[kf_idx] : -1; - if (mp_idx >= 0 && mp_idx < (int)mapPoints_.size()) { - const Point3d &Xd = mapPoints_[mp_idx]; - objPts.emplace_back((float)Xd.x,(float)Xd.y,(float)Xd.z); - imgPts.push_back(cur_keypoints_[m.trainIdx].pt); - curIdxOfCorr.push_back(m.trainIdx); - mapIdxOfCorr.push_back(mp_idx); - } else { - unmatchedCount++; + Mat R_g = Mat::eye(3,3,CV_64F); + Mat t_g = Mat::zeros(3,1,CV_64F); + + // simple map structures + std::vector keyframes; + std::vector mappoints; + std::unordered_map keyframeIdToIndex; + + // Backend (BA) thread primitives + std::mutex mapMutex; // protects map and keyframe modifications and writeback + std::condition_variable backendCv; + std::atomic backendStop(false); + std::atomic backendRequests(0); + const int LOCAL_BA_WINDOW = 5; // window size for local BA (adjustable) + + // Start backend thread: waits for notifications and runs BA on a snapshot + std::thread backendThread([&]() { + while(!backendStop.load()){ + std::unique_lock lk(mapMutex); + backendCv.wait(lk, [&]{ return backendStop.load() || backendRequests.load() > 0; }); + if(backendStop.load()) break; + // snapshot map and keyframes + auto kfs_snapshot = map.keyframes(); + auto mps_snapshot = map.mappoints(); + // reset requests + backendRequests.store(0); + lk.unlock(); + + // determine local window + int K = static_cast(kfs_snapshot.size()); + if(K <= 0) continue; + int start = std::max(0, K - LOCAL_BA_WINDOW); + std::vector localKfIndices; + for(int ii = start; ii < K; ++ii) localKfIndices.push_back(ii); + std::vector fixedKfIndices; + if(start > 0) fixedKfIndices.push_back(0); + + // Run BA on snapshot (may take time) - uses Optimizer which will use g2o if enabled + Optimizer::localBundleAdjustmentSFM(kfs_snapshot, mps_snapshot, localKfIndices, fixedKfIndices, + loader.fx(), loader.fy(), loader.cx(), loader.cy(), 10); + + // write back optimized poses/points into main map under lock using id-based lookup + std::lock_guard lk2(mapMutex); + auto &kfs_ref = const_cast&>(map.keyframes()); + auto &mps_ref = const_cast&>(map.mappoints()); + // copy back poses by id to ensure we update the authoritative containers + for(const auto &kf_opt : kfs_snapshot){ + int idx = map.keyframeIndex(kf_opt.id); + if(idx >= 0 && idx < static_cast(kfs_ref.size())){ + kfs_ref[idx].R_w = kf_opt.R_w.clone(); + kfs_ref[idx].t_w = kf_opt.t_w.clone(); + } + } + // copy back mappoint positions by id + for(const auto &mp_opt : mps_snapshot){ + if(mp_opt.id <= 0) continue; + int idx = map.mapPointIndex(mp_opt.id); + if(idx >= 0 && idx < static_cast(mps_ref.size())){ + mps_ref[idx].p = mp_opt.p; + } + } } - } - unmatchedRatio_ = good.empty() ? 0.0 : (double)unmatchedCount / (double)good.size(); - if (objPts.size() < 4) return false; - - Mat rvec, tvec, R; - std::vector inliers; - bool ok = solvePnPRansac(objPts, imgPts, K_.K, K_.dist, rvec, tvec, false, - 100, params_.pnpReprojErr, 0.99, inliers, SOLVEPNP_ITERATIVE); - if (!ok || (int)inliers.size() < std::max(4, params_.pnpMinInliers)) return false; - - Rodrigues(rvec, R); - currentPoseSE3_ = makeSE3(R, tvec); - lastPnPInliers_ = (int)inliers.size(); - - // keep inlier correspondences for KF mapping when inserting a new KF - curFeat_to_map_inliers_.clear(); - curFeat_to_map_inliers_.reserve(inliers.size()); - for (int idx : inliers) { - int curIdx = curIdxOfCorr[idx]; - int mpIdx = mapIdxOfCorr[idx]; - curFeat_to_map_inliers_.emplace_back(curIdx, mpIdx); - } - return true; -} + }); + + Mat frame; + std::string imgPath; + int frame_id = 0; + + // persistent previous-frame storage (declare outside loop so detectAndCompute can use them) + static std::vector prevKp; + static Mat prevGray, prevDesc; + while(loader.getNextImage(frame, imgPath)){ + Mat gray = frame; + if(gray.channels() > 1) cvtColor(gray, gray, COLOR_BGR2GRAY); + + std::vector kps; + Mat desc; + // Detect and compute descriptors using injected feature_ (e.g., ORB) + feature_->detect(gray, kps); + feature_->compute(gray, kps, desc); + + // (previous-frame storage declared outside loop) + + if(!prevGray.empty() && !prevDesc.empty() && !desc.empty()){ + // KNN match with ratio test and mutual cross-check + std::vector> knn12, knn21; + matcher_->knnMatch(prevDesc, desc, knn12, 2); + matcher_->knnMatch(desc, prevDesc, knn21, 2); + auto ratioKeep = [&](const std::vector>& knn, bool forward) { + std::vector filtered; + for(size_t qi=0; qi= 2){ + if(knn[qi][1].distance > 0) { + if(best.distance / knn[qi][1].distance > ratio) continue; + } + } + // mutual check + int q = forward ? (int)qi : best.trainIdx; + int t = forward ? best.trainIdx : (int)qi; + // find reverse match for t + const auto &rev = forward ? knn21 : knn12; + if(t < 0 || t >= (int)rev.size() || rev[t].empty()) continue; + DMatch rbest = rev[t][0]; + if((forward && rbest.trainIdx == (int)qi) || (!forward && rbest.trainIdx == best.queryIdx)){ + filtered.push_back(best); + } + } + return filtered; + }; + std::vector goodMatches = ratioKeep(knn12, true); + + Mat imgMatches; + drawMatches(prevGray, prevKp, gray, kps, goodMatches, imgMatches, + Scalar::all(-1), Scalar::all(-1), std::vector(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS); + + std::vector pts1, pts2; + pts1.reserve(goodMatches.size()); pts2.reserve(goodMatches.size()); + for(const auto &m: goodMatches){ + pts1.push_back(prevKp[m.queryIdx].pt); + pts2.push_back(kps[m.trainIdx].pt); + } -bool VisualOdometry::shouldInsertKeyframe() const { - // simple: periodic or tracking weakening - bool periodic = (frameCount_ % params_.keyframeInterval) == 0; - bool weakTracking = (lastPnPInliers_ > 0 && lastPnPInliers_ < params_.pnpMinInliers + 10); - - // baseline & rotation criteria - auto extractRt = [](const Mat& T, Mat& R, Mat& t){ R = T(Rect(0,0,3,3)).clone(); t = T(Rect(3,0,1,3)).clone(); }; - Mat R_prev, t_prev, R_cur, t_cur; extractRt(kfPoseSE3_, R_prev, t_prev); extractRt(currentPoseSE3_, R_cur, t_cur); - double baseline = norm(t_cur - t_prev); - Mat R_delta = R_prev.t() * R_cur; - double traceVal = R_delta.at(0,0) + R_delta.at(1,1) + R_delta.at(2,2); - double rotAngle = std::acos(std::min(1.0, std::max(-1.0, (traceVal - 1.0) / 2.0))) * 180.0 / CV_PI; - bool baselineEnough = baseline > params_.keyframeMinBaseline; - bool rotationEnough = rotAngle > params_.keyframeMinRotationDeg; - - // mapping expansion need: many unmatched candidate features - bool needExpansion = unmatchedRatio_ > 0.5; // heuristic - - return periodic || weakTracking || baselineEnough || rotationEnough || needExpansion; -} + // quick frame-diff to detect near-static frames + double meanDiff = 0.0; + if(!prevGray.empty()){ + Mat diff; absdiff(gray, prevGray, diff); + meanDiff = mean(diff)[0]; + } -void VisualOdometry::triangulateWithLastKeyframe() { - if (kf_descriptors_.empty() || cur_descriptors_.empty()) return; - - // Match KF -> current again to find candidates without map points - std::vector> knn; - matcher_->knnMatch(kf_descriptors_, cur_descriptors_, knn, 2); - std::vector kfPts, curPts; - std::vector kfIdxs; - for (auto &v : knn) { - if (v.size()>=2 && v[0].distance < 0.75*v[1].distance) { - int qi = v[0].queryIdx; int ti = v[0].trainIdx; - if (qi >=0 && qi < (int)kf_kp_to_map_.size() && kf_kp_to_map_[qi] == -1) { - kfPts.push_back(kf_keypoints_[qi].pt); - curPts.push_back(cur_keypoints_[ti].pt); - kfIdxs.push_back(qi); + // compute per-match flow magnitudes and median flow + std::vector flows; flows.reserve(pts1.size()); + double median_flow = 0.0; + for(size_t i=0;i tmp = flows; + size_t mid = tmp.size()/2; + std::nth_element(tmp.begin(), tmp.begin()+mid, tmp.end()); + median_flow = tmp[mid]; } - } - } - if (kfPts.size() < 15) return; - - auto extractRt = [](const Mat& T, Mat& R, Mat& t){ - R = T(Rect(0,0,3,3)).clone(); - t = T(Rect(3,0,1,3)).clone(); - }; - Mat R1,t1,R2,t2; extractRt(kfPoseSE3_, R1,t1); extractRt(currentPoseSE3_, R2,t2); - - Mat P1(3,4,CV_64F), P2(3,4,CV_64F); - R1.copyTo(P1(Rect(0,0,3,3))); t1.copyTo(P1(Rect(3,0,1,3))); - R2.copyTo(P2(Rect(0,0,3,3))); t2.copyTo(P2(Rect(3,0,1,3))); - P1 = K_.K * P1; P2 = K_.K * P2; - - Mat X4; triangulatePoints(P1, P2, kfPts, curPts, X4); - - auto reprojErr = [&](const Point3d& X, const Mat& R, const Mat& t, const Point2f& uv){ - Mat Xv = (Mat_(3,1) << X.x,X.y,X.z); - Mat x = K_.K * (R*Xv + t); - double u = x.at(0)/x.at(2); - double v = x.at(1)/x.at(2); - double du = u - uv.x; double dv = v - uv.y; return std::sqrt(du*du+dv*dv); - }; - - for (int i=0;i(0,i); - double hy = X4.at(1,i); - double hz = X4.at(2,i); - double hw = X4.at(3,i); - if (std::abs(hw) < 1e-8) continue; - Point3d X(hx/hw, hy/hw, hz/hw); - // positive depth check (in both views) - Mat R1_,t1_,R2_,t2_; - R1_ = R1; t1_ = t1; R2_ = R2; t2_ = t2; - Mat Xv = (Mat_(3,1) << X.x,X.y,X.z); - Mat Y1 = R1_*Xv + t1_; - Mat Y2 = R2_*Xv + t2_; - double z1 = Y1.ptr(2)[0]; - double z2 = Y2.ptr(2)[0]; - if (z1 <= 0 || z2 <= 0) continue; - double e1 = reprojErr(X, R1, t1, kfPts[i]); - double e2 = reprojErr(X, R2, t2, curPts[i]); - if (e1 > params_.reprojErrThresh || e2 > params_.reprojErrThresh) continue; - int mpIndex = (int)mapPoints_.size(); - mapPoints_.push_back(X); - int kf_kp = kfIdxs[i]; - if (kf_kp >=0 && kf_kp < (int)kf_kp_to_map_.size()) kf_kp_to_map_[kf_kp] = mpIndex; - } -} -void VisualOdometry::runLocalBAIfEnabled() { - if (!backendEnabled_) return; - // Collect recent keyframes and build observations for map points - int N = std::max(2, params_.localWindowSize); - int totalKFs = (int)keyframePoses_.size(); - if (totalKFs < 2) return; - int startIdx = std::max(0, totalKFs - N); - - // Build KeyFrame vector for optimizer - std::vector kfs; - kfs.reserve(totalKFs - startIdx); - for (int i = startIdx; i < totalKFs; ++i) { - KeyFrame kf; - kf.id = i; // use history index as id - kf.kps = keyframeKeypoints_[i]; - kf.desc = keyframeDescriptors_[i]; - // extract R,t from SE3 4x4 - const Mat &T = keyframePoses_[i]; - Mat R = T(Rect(0,0,3,3)).clone(); - Mat t = T(Rect(3,0,1,3)).clone(); - kf.R_w = R; kf.t_w = t; - kfs.push_back(kf); - } + int pre_matches = static_cast(goodMatches.size()); + int post_matches = pre_matches; + + // Two-view initialization: if map is empty and this is the second frame, try to bootstrap + if(map.keyframes().empty() && frame_id == 1){ + std::cout << "Attempting two-view initialization (frame 0 + 1), matches=" << goodMatches.size() << std::endl; + Mat R_init, t_init; + std::vector pts3D; + std::vector isTri; + // call initializer using the matched keypoints between prev and current + if(initializer.initialize(prevKp, kps, goodMatches, loader.fx(), loader.fy(), loader.cx(), loader.cy(), R_init, t_init, pts3D, isTri)){ + std::cout << "Initializer: success, creating initial keyframes and mappoints (" << pts3D.size() << ")" << std::endl; + // build two keyframes: previous (id = frame_id-1) and current (id = frame_id) + KeyFrame kf0, kf1; + kf0.id = frame_id - 1; + // prevGray/prevKp/prevDesc refer to previous frame + if(!prevGray.empty()){ + if(prevGray.channels() == 1){ cvtColor(prevGray, kf0.image, COLOR_GRAY2BGR); } + else kf0.image = prevGray.clone(); + } + kf0.kps = prevKp; + kf0.desc = prevDesc.clone(); + kf0.R_w = Mat::eye(3,3,CV_64F); + kf0.t_w = Mat::zeros(3,1,CV_64F); + + kf1.id = frame_id; + kf1.image = frame.clone(); + kf1.kps = kps; + kf1.desc = desc.clone(); + // initializer returns pose of second camera relative to first (world = first) + kf1.R_w = R_init.clone(); + kf1.t_w = t_init.clone(); + + // convert initializer 3D points (in first camera frame) to MapPoints in world coords (world==first) + std::vector newMps; + newMps.reserve(pts3D.size()); + for(size_t i=0;i lk(mapMutex); + keyframes.push_back(std::move(kf0)); + map.addKeyFrame(keyframes.back()); + keyframes.push_back(std::move(kf1)); + map.addKeyFrame(keyframes.back()); + if(!newMps.empty()) map.addMapPoints(newMps); + } + + // set global pose to second keyframe (apply scale) + Mat t_d; kf1.t_w.convertTo(t_d, CV_64F); + t_g = t_d * scale_m; + R_g = kf1.R_w.clone(); + double x = t_g.at(0); + double z = t_g.at(2); + vis.addPose(x,-z); + + // write CSV entry for initialization frame + if(csv){ + csv << frame_id << ",init,0,0,0,0,0,1\n"; + csv.flush(); + } + + // notify backend to run BA on initial map + backendRequests.fetch_add(1); + backendCv.notify_one(); + // skip the usual PnP / poseEst path for this frame since we've initialized + prevGray = gray.clone(); prevKp = kps; prevDesc = desc.clone(); + frame_id++; + continue; + } else { + std::cout << "Initializer: failed to initialize from first two frames" << std::endl; + } + } - // Build MapPoint vector with observations from selected window - std::vector mps; - mps.reserve(mapPoints_.size()); - // Track which points are observed within the window - std::vector pointObserved(mapPoints_.size(), 0); - for (int i = startIdx; i < totalKFs; ++i) { - const auto &kp2mp = keyframeKpToMap_[i]; - for (int kpIdx = 0; kpIdx < (int)kp2mp.size(); ++kpIdx) { - int mpIdx = kp2mp[kpIdx]; - if (mpIdx >= 0 && mpIdx < (int)mapPoints_.size()) { - pointObserved[mpIdx] = 1; + // Try PnP against map points first (via Localizer) + bool solvedByPnP = false; + Mat R_pnp, t_pnp; int inliers_pnp = 0; + int preMatches_pnp = 0, postMatches_pnp = 0; double meanReproj_pnp = 0.0; + if(localizer.tryPnP(map, desc, kps, loader.fx(), loader.fy(), loader.cx(), loader.cy(), gray.cols, gray.rows, + options.min_inliers, R_pnp, t_pnp, inliers_pnp, frame_id, &frame, runDir.string(), + &preMatches_pnp, &postMatches_pnp, &meanReproj_pnp)){ + solvedByPnP = true; + std::cout << "PnP solved: preMatches="< pose estimation failed, skipping integration." << std::endl; + } else if(inliers < MIN_INLIERS || matchCount < MIN_MATCHES){ + // Not enough geometric support -> skip (unless absolute inliers pass) + integrate = false; + // std::cout << " -> insufficient matches/inliers (by both absolute and relative metrics), skipping integration." << std::endl; + } else { + // We have sufficient geometric support. Only skip if motion is truly negligible + // (both translation and rotation tiny) AND the image/flow indicate near-identical frames. + const double MIN_TRANSLATION_NORM = 1e-4; + const double MIN_ROTATION_RAD = (0.5 * CV_PI / 180.0); // 0.5 degree + const double DIFF_ZERO_THRESH = 2.0; // nearly identical image + const double FLOW_ZERO_THRESH = 0.3; // nearly zero flow in pixels + + if(t_norm < MIN_TRANSLATION_NORM && std::abs(rot_angle) < MIN_ROTATION_RAD + && meanDiff < DIFF_ZERO_THRESH && median_flow < FLOW_ZERO_THRESH){ + integrate = false; // truly static + // std::cout << " -> negligible motion and near-identical frames, skipping integration." << std::endl; + } + } + if (inliers >= options.min_inliers || (inliers >= 2 && matchCount > 50 && median_flow > 2.0)) { + integrate = true; + } + + // integrate transform if allowed + if(integrate){ + Mat t_d; t.convertTo(t_d, CV_64F); + Mat t_scaled = t_d * scale_m; + Mat R_d; R.convertTo(R_d, CV_64F); + t_g = t_g + R_g * t_scaled; + R_g = R_g * R_d; + double x = t_g.at(0); + double z = t_g.at(2); + vis.addPose(x,-z); } + + // if we integrated, create a keyframe and optionally triangulate new map points + if(integrate){ + KeyFrame kf; + kf.id = frame_id; + kf.image = frame.clone(); + kf.kps = kps; + kf.desc = desc.clone(); + kf.R_w = R_g.clone(); kf.t_w = t_g.clone(); + + bool didTriangulate = false; + if(!map.keyframes().empty() && map.keyframes().back().id == frame_id - 1){ + // triangulate between last keyframe and this frame using normalized coordinates + const KeyFrame &last = map.keyframes().back(); + std::vector pts1n, pts2n; pts1n.reserve(pts1.size()); pts2n.reserve(pts2.size()); + double fx = loader.fx(), fy = loader.fy(), cx = loader.cx(), cy = loader.cy(); + for(size_t i=0;i pts1_kp_idx; pts1_kp_idx.reserve(goodMatches.size()); + std::vector pts2_kp_idx; pts2_kp_idx.reserve(goodMatches.size()); + for(const auto &m: goodMatches){ pts1_kp_idx.push_back(m.queryIdx); pts2_kp_idx.push_back(m.trainIdx); } + auto newPts = map.triangulateBetweenLastTwo(pts1n, pts2n, pts1_kp_idx, pts2_kp_idx, last, keyframes.empty() ? kf : keyframes.back(), fx, fy, cx, cy); + if(!newPts.empty()){ + didTriangulate = true; + // already appended inside MapManager + } + } + + { + // insert keyframe and map points under lock to keep consistent state + std::lock_guard lk(mapMutex); + keyframes.push_back(std::move(kf)); + map.addKeyFrame(keyframes.back()); + } + if(didTriangulate){ + std::cout << "Created keyframe " << frame_id << " and triangulated new map points (total=" << map.mappoints().size() << ")" << std::endl; + } else { + std::cout << "Created keyframe " << frame_id << " (no triangulation)" << std::endl; + } + // Notify backend thread to run local BA asynchronously + backendRequests.fetch_add(1); + backendCv.notify_one(); + } + + // write CSV line + if(csv){ + csv << frame_id << "," << meanDiff << "," << median_flow << "," << pre_matches << "," << post_matches << "," << inliers << "," << inlierRatio << "," << (integrate?1:0) << "\n"; + csv.flush(); + } + + // Always show a single image; if we have matches, draw small boxes around matched keypoints + Mat visImg; + if(frame.channels() > 1) visImg = frame.clone(); + else cvtColor(gray, visImg, COLOR_GRAY2BGR); + std::string info = std::string("Frame ") + std::to_string(frame_id) + " matches=" + std::to_string(matchCount) + " inliers=" + std::to_string(inliers); + if(!goodMatches.empty()){ + for(size_t mi=0; mi(goodMatches.size())){ + isInlier = mask.at(static_cast(mi), 0) != 0; + } else if(mask.cols == static_cast(goodMatches.size())){ + isInlier = mask.at(0, static_cast(mi)) != 0; + } + } + Scalar col = isInlier ? Scalar(0,255,0) : Scalar(0,0,255); + Point ip(cvRound(p2.x), cvRound(p2.y)); + Rect r(ip - Point(4,4), Size(8,8)); + rectangle(visImg, r, col, 2, LINE_AA); + } + } + putText(visImg, info, Point(10,20), FONT_HERSHEY_SIMPLEX, 0.6, Scalar(0,255,0), 2); + vis.showFrame(visImg); + + } else { + vis.showFrame(gray); } + } else { + Mat visFrame; drawKeypoints(gray, kps, visFrame, Scalar::all(-1), DrawMatchesFlags::DRAW_RICH_KEYPOINTS); + vis.showFrame(visFrame); } - if (!mp.observations.empty()) mps.push_back(mp); - } - if (kfs.size() < 2 || mps.empty()) return; - - // Local/fixed KF indices relative to full history ids - std::vector localKfIndices; - localKfIndices.reserve(kfs.size()); - for (const auto &kf : kfs) localKfIndices.push_back(kf.id); - // Fix the oldest KF in the window to anchor optimization - std::vector fixedKfIndices = { startIdx }; - - // Select backend - - if (params_.backendType == 0) { - #ifndef USE_G2O - CV_Error(Error::StsBadArg, "G2O backend is not available (not built with SLAM module)"); - #else - Optimizer::localBundleAdjustment( - kfs, mps, localKfIndices, fixedKfIndices, - K_.K.at(0,0), K_.K.at(1,1), K_.K.at(0,2), K_.K.at(1,2), - params_.baMaxIters); - #endif - } else - { - Optimizer::localBundleAdjustmentSFM( - kfs, mps, localKfIndices, fixedKfIndices, - K_.K.at(0,0), K_.K.at(1,1), K_.K.at(0,2), K_.K.at(1,2), - params_.baMaxIters); + vis.showTopdown(); + // update prev + prevGray = gray.clone(); prevKp = kps; prevDesc = desc.clone(); + frame_id++; + char key = (char)waitKey(1); + if(key == 27) break; } - // Write back optimized poses and points - for (const auto &kf : kfs) { - int i = kf.id; - Mat T = Mat::eye(4,4,CV_64F); - kf.R_w.copyTo(T(Rect(0,0,3,3))); - kf.t_w.copyTo(T(Rect(3,0,1,3))); - if (i >= 0 && i < totalKFs) keyframePoses_[i] = T.clone(); - if (i == totalKFs - 1) { // also update current KF pose cache - kfPoseSE3_ = T.clone(); - currentPoseSE3_ = T.clone(); - } - } - for (const auto &mp : mps) { - if (mp.id >= 0 && mp.id < (int)mapPoints_.size()) { - mapPoints_[mp.id] = mp.p; + // save trajectory with timestamp into result/ folder + try{ + // save trajectory into the per-run folder using a simple filename (no timestamp) + std::filesystem::path outDir = resultDir / runTimestamp; + if(!std::filesystem::exists(outDir)) std::filesystem::create_directories(outDir); + std::filesystem::path outPath = outDir / std::string("trajectory.png"); + if(vis.saveTrajectory(outPath.string())){ + std::cout << "Saved trajectory to " << outPath.string() << std::endl; + } else { + std::cerr << "Failed to save trajectory to " << outPath.string() << std::endl; } + } catch(const std::exception &e){ + std::cerr << "Error saving trajectory: " << e.what() << std::endl; } + + // Shutdown backend thread gracefully + backendStop.store(true); + backendCv.notify_one(); + if(backendThread.joinable()) backendThread.join(); + + return 0; } } // namespace vo From 654366d34a2c5d3ba371d7fc71177ca574b9123d Mon Sep 17 00:00:00 2001 From: Xuanrui Zhu <634210954@qq.com> Date: Thu, 4 Dec 2025 07:08:25 +0000 Subject: [PATCH 04/29] Add CMakeLists.txt --- modules/slam/CMakeLists.txt | 30 ++++++++++++++++++++++++++++++ modules/slam/src/data_loader.cpp | 6 +++--- modules/slam/src/keyframe.cpp | 0 3 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 modules/slam/CMakeLists.txt delete mode 100644 modules/slam/src/keyframe.cpp diff --git a/modules/slam/CMakeLists.txt b/modules/slam/CMakeLists.txt new file mode 100644 index 00000000000..747dcb37cfa --- /dev/null +++ b/modules/slam/CMakeLists.txt @@ -0,0 +1,30 @@ +set(the_description "SLAM module: visual odometry / mapping / localization utilities") + +# Core dependencies used by the sources (see includes in src/ and include/) +ocv_define_module(slam + opencv_core + opencv_imgproc + opencv_calib3d + opencv_features2d + OPTIONAL opencv_sfm + opencv_imgcodecs + opencv_highgui + opencv_video + WRAP python +) + +# Ensure C++17 is enabled for this module (required for ) +if(TARGET ${the_module}) + set_target_properties(${the_module} PROPERTIES CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON) + # For older GCC/libstdc++ (<9) we may need to link stdc++fs + if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "9.0") + target_link_libraries(${the_module} PRIVATE stdc++fs) + endif() +endif() + +# If you need to add special include directories, libraries or compile flags, +# add them below using the OpenCV contrib module helper macros, for example: +# +# ocv_include_directories(${CMAKE_CURRENT_LIST_DIR}/include) +# ocv_module_compile_options(-Wall) +# diff --git a/modules/slam/src/data_loader.cpp b/modules/slam/src/data_loader.cpp index 1488e9a02a0..8deb8e9c26b 100644 --- a/modules/slam/src/data_loader.cpp +++ b/modules/slam/src/data_loader.cpp @@ -11,7 +11,7 @@ namespace vo { DataLoader::DataLoader(const std::string &imageDir) : currentIndex(0), fx_(700.0), fy_(700.0), cx_(0.5), cy_(0.5) { - // 使用 std::filesystem 先检查目录是否存在 + // Use std::filesystem to check whether the directory exists first try { std::filesystem::path p(imageDir); if(!std::filesystem::exists(p) || !std::filesystem::is_directory(p)){ @@ -23,7 +23,7 @@ DataLoader::DataLoader(const std::string &imageDir) // fallthrough to try glob } - // 使用 glob 列出文件,捕获可能的 OpenCV 异常 + // Use glob to list files, catching any possible OpenCV exceptions try { glob(imageDir + "/*", imageFiles, false); } catch(const Exception &e){ @@ -37,7 +37,7 @@ DataLoader::DataLoader(const std::string &imageDir) return; } - // 尝试在 imageDir 或其上一级目录寻找 sensor.yaml + // Try to find sensor.yaml in imageDir or its parent directory std::filesystem::path p(imageDir); std::string yaml1 = (p / "sensor.yaml").string(); std::string yaml2 = (p.parent_path() / "sensor.yaml").string(); diff --git a/modules/slam/src/keyframe.cpp b/modules/slam/src/keyframe.cpp deleted file mode 100644 index e69de29bb2d..00000000000 From 9b7309ab5a1fa358dae818fcb8311d2e4268887e Mon Sep 17 00:00:00 2001 From: Xuanrui Zhu <634210954@qq.com> Date: Thu, 4 Dec 2025 18:35:57 +0000 Subject: [PATCH 05/29] Address link issue --- modules/slam/include/opencv2/slam/vo.hpp | 26 ++++++++++++------------ modules/slam/src/vo.cpp | 3 ++- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/modules/slam/include/opencv2/slam/vo.hpp b/modules/slam/include/opencv2/slam/vo.hpp index 31681ed1a69..5172edb74df 100644 --- a/modules/slam/include/opencv2/slam/vo.hpp +++ b/modules/slam/include/opencv2/slam/vo.hpp @@ -7,22 +7,22 @@ namespace cv { namespace vo { -struct VisualOdometryOptions { - int min_matches = 15; - int min_inliers = 4; - double min_inlier_ratio = 0.1; - double diff_zero_thresh = 2.0; - double flow_zero_thresh = 0.3; - double min_translation_norm = 1e-4; - double min_rotation_rad = 0.5 * CV_PI / 180.0; - int max_matches_keep = 500; - double flow_weight_lambda = 5.0; +struct CV_EXPORTS_W_SIMPLE VisualOdometryOptions { + CV_PROP_RW int min_matches = 15; + CV_PROP_RW int min_inliers = 4; + CV_PROP_RW double min_inlier_ratio = 0.1; + CV_PROP_RW double diff_zero_thresh = 2.0; + CV_PROP_RW double flow_zero_thresh = 0.3; + CV_PROP_RW double min_translation_norm = 1e-4; + CV_PROP_RW double min_rotation_rad = 0.5 * CV_PI / 180.0; + CV_PROP_RW int max_matches_keep = 500; + CV_PROP_RW double flow_weight_lambda = 5.0; }; -class VisualOdometry { +class CV_EXPORTS_W VisualOdometry { public: - VisualOdometry(cv::Ptr feature, cv::Ptr matcher); - int run(const std::string &imageDir, double scale_m = 1.0, const VisualOdometryOptions &options = VisualOdometryOptions()); + CV_WRAP VisualOdometry(cv::Ptr feature, cv::Ptr matcher); + CV_WRAP int run(const std::string &imageDir, double scale_m = 1.0, const VisualOdometryOptions &options = VisualOdometryOptions()); private: cv::Ptr feature_; cv::Ptr matcher_; diff --git a/modules/slam/src/vo.cpp b/modules/slam/src/vo.cpp index 0bd730ac51f..7532ece6283 100644 --- a/modules/slam/src/vo.cpp +++ b/modules/slam/src/vo.cpp @@ -65,7 +65,8 @@ int VisualOdometry::run(const std::string &imageDir, double scale_m, const Visua std::ostringstream ss; ss << std::put_time(&tm, "%Y%m%d_%H%M%S"); runTimestamp = ss.str(); } - std::filesystem::path resultDir("../../result"); + // create a 'result' folder inside the provided imageDir (use filesystem join to be robust) + std::filesystem::path resultDir = std::filesystem::path(imageDir) / "result"; if(!std::filesystem::exists(resultDir)) std::filesystem::create_directories(resultDir); // create a per-run folder under result/ named by timestamp std::filesystem::path runDir = resultDir / runTimestamp; From d53fa3cbe7739b22683ca62b61ee0eb675701c52 Mon Sep 17 00:00:00 2001 From: Xuanrui Zhu <634210954@qq.com> Date: Fri, 5 Dec 2025 06:12:43 +0000 Subject: [PATCH 06/29] Fix cxx standard build error --- modules/slam/src/data_loader.cpp | 52 ++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/modules/slam/src/data_loader.cpp b/modules/slam/src/data_loader.cpp index 8deb8e9c26b..aedfc79e90a 100644 --- a/modules/slam/src/data_loader.cpp +++ b/modules/slam/src/data_loader.cpp @@ -1,6 +1,21 @@ #include "opencv2/slam/data_loader.hpp" #include -#include +#if defined(__has_include) +# if __has_include() +# include +# define HAVE_STD_FILESYSTEM 1 +# namespace fs = std::filesystem; +# elif __has_include() +# include +# define HAVE_STD_FILESYSTEM 1 +# namespace fs = std::experimental::filesystem; +# else +# define HAVE_STD_FILESYSTEM 0 +# endif +#else +# define HAVE_STD_FILESYSTEM 0 +#endif +#include #include #include #include @@ -11,10 +26,12 @@ namespace vo { DataLoader::DataLoader(const std::string &imageDir) : currentIndex(0), fx_(700.0), fy_(700.0), cx_(0.5), cy_(0.5) { - // Use std::filesystem to check whether the directory exists first + // Check whether the directory exists. Prefer std::filesystem when available, + // otherwise fall back to POSIX stat. +#if HAVE_STD_FILESYSTEM try { - std::filesystem::path p(imageDir); - if(!std::filesystem::exists(p) || !std::filesystem::is_directory(p)){ + fs::path p(imageDir); + if(!fs::exists(p) || !fs::is_directory(p)){ std::cerr << "DataLoader: imageDir does not exist or is not a directory: " << imageDir << std::endl; return; } @@ -22,6 +39,13 @@ DataLoader::DataLoader(const std::string &imageDir) std::cerr << "DataLoader: filesystem error checking imageDir: " << e.what() << std::endl; // fallthrough to try glob } +#else + struct stat sb; + if(stat(imageDir.c_str(), &sb) != 0 || !S_ISDIR(sb.st_mode)){ + std::cerr << "DataLoader: imageDir does not exist or is not a directory: " << imageDir << std::endl; + return; + } +#endif // Use glob to list files, catching any possible OpenCV exceptions try { @@ -37,13 +61,29 @@ DataLoader::DataLoader(const std::string &imageDir) return; } - // Try to find sensor.yaml in imageDir or its parent directory - std::filesystem::path p(imageDir); + // Try to find sensor.yaml in imageDir or its parent directory. + // Use filesystem paths when available; otherwise build paths with simple string ops. +#if HAVE_STD_FILESYSTEM + fs::path p(imageDir); std::string yaml1 = (p / "sensor.yaml").string(); std::string yaml2 = (p.parent_path() / "sensor.yaml").string(); if(!loadIntrinsics(yaml1)){ loadIntrinsics(yaml2); // best-effort } +#else + auto make_parent_yaml = [](const std::string &dir)->std::string{ + std::string tmp = dir; + if(!tmp.empty() && tmp.back() == '/') tmp.pop_back(); + auto pos = tmp.find_last_of('/'); + if(pos == std::string::npos) return std::string("sensor.yaml"); + return tmp.substr(0, pos) + "/sensor.yaml"; + }; + std::string yaml1 = imageDir + "/sensor.yaml"; + std::string yaml2 = make_parent_yaml(imageDir); + if(!loadIntrinsics(yaml1)){ + loadIntrinsics(yaml2); // best-effort + } +#endif } bool DataLoader::loadIntrinsics(const std::string &yamlPath){ From b96a45bc9901088424dd9cb184ae0b9e1139a40b Mon Sep 17 00:00:00 2001 From: Xuanrui Zhu <634210954@qq.com> Date: Fri, 5 Dec 2025 07:20:21 +0000 Subject: [PATCH 07/29] Fix filesystem:: namespace issue --- modules/slam/src/data_loader.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/slam/src/data_loader.cpp b/modules/slam/src/data_loader.cpp index aedfc79e90a..446c1485e51 100644 --- a/modules/slam/src/data_loader.cpp +++ b/modules/slam/src/data_loader.cpp @@ -4,11 +4,11 @@ # if __has_include() # include # define HAVE_STD_FILESYSTEM 1 -# namespace fs = std::filesystem; + namespace fs = std::filesystem; # elif __has_include() # include # define HAVE_STD_FILESYSTEM 1 -# namespace fs = std::experimental::filesystem; + namespace fs = std::experimental::filesystem; # else # define HAVE_STD_FILESYSTEM 0 # endif From 61470086d567fe2d69546257f00e175d5e2e9a08 Mon Sep 17 00:00:00 2001 From: Xuanrui Zhu <634210954@qq.com> Date: Fri, 5 Dec 2025 08:16:29 +0000 Subject: [PATCH 08/29] Add function ensureDirectoryExists() --- .../slam/include/opencv2/slam/data_loader.hpp | 2 + modules/slam/src/data_loader.cpp | 41 +++++++++++++++++++ modules/slam/src/localizer.cpp | 4 +- modules/slam/src/vo.cpp | 34 +++++++-------- 4 files changed, 63 insertions(+), 18 deletions(-) diff --git a/modules/slam/include/opencv2/slam/data_loader.hpp b/modules/slam/include/opencv2/slam/data_loader.hpp index acc7795a1f8..109a83639f0 100644 --- a/modules/slam/include/opencv2/slam/data_loader.hpp +++ b/modules/slam/include/opencv2/slam/data_loader.hpp @@ -6,6 +6,8 @@ namespace cv{ namespace vo{ +bool ensureDirectoryExists(const std::string &dir); + class DataLoader { public: // 构造:传入图像目录(可以是相对或绝对路径) diff --git a/modules/slam/src/data_loader.cpp b/modules/slam/src/data_loader.cpp index 446c1485e51..4ad81c17509 100644 --- a/modules/slam/src/data_loader.cpp +++ b/modules/slam/src/data_loader.cpp @@ -16,13 +16,54 @@ # define HAVE_STD_FILESYSTEM 0 #endif #include +#include +#include +#include #include #include #include + namespace cv { namespace vo { +bool ensureDirectoryExists(const std::string &dir){ + if(dir.empty()) return false; +#if HAVE_STD_FILESYSTEM + try{ + fs::path p(dir); + return fs::create_directories(p) || fs::exists(p); + }catch(...){ return false; } +#else + std::string tmp = dir; + if(tmp.empty()) return false; + while(tmp.size() > 1 && tmp.back() == '/') tmp.pop_back(); + std::string cur; + if(!tmp.empty() && tmp[0] == '/') cur = "/"; + size_t pos = 0; + while(pos < tmp.size()){ + size_t next = tmp.find('/', pos); + std::string part = (next == std::string::npos) ? tmp.substr(pos) : tmp.substr(pos, next-pos); + if(!part.empty()){ + if(cur.size() > 1 && cur.back() != '/') cur += '/'; + cur += part; + if(mkdir(cur.c_str(), 0755) != 0){ + if(errno == EEXIST){ /* ok */ } + else { + struct stat st; + if(stat(cur.c_str(), &st) == 0 && S_ISDIR(st.st_mode)){ + // ok + } else return false; + } + } + } + if(next == std::string::npos) break; + pos = next + 1; + } + return true; +#endif +} + DataLoader::DataLoader(const std::string &imageDir) : currentIndex(0), fx_(700.0), fy_(700.0), cx_(0.5), cy_(0.5) { diff --git a/modules/slam/src/localizer.cpp b/modules/slam/src/localizer.cpp index 40647f17518..8e7c3849888 100644 --- a/modules/slam/src/localizer.cpp +++ b/modules/slam/src/localizer.cpp @@ -1,7 +1,7 @@ #include "opencv2/slam/localizer.hpp" #include "opencv2/slam/matcher.hpp" +#include "opencv2/slam/data_loader.hpp" #include -#include #include #include #include @@ -126,7 +126,7 @@ bool Localizer::tryPnP(const MapManager &map, const Mat &desc, const std::vector // Diagnostics: draw matches and inliers if requested if(!outDir.empty() && image){ try{ - std::filesystem::create_directories(outDir); + ensureDirectoryExists(outDir); Mat vis; if(image->channels() == 1) cvtColor(*image, vis, COLOR_GRAY2BGR); else vis = image->clone(); diff --git a/modules/slam/src/vo.cpp b/modules/slam/src/vo.cpp index 7532ece6283..24876f4a33b 100644 --- a/modules/slam/src/vo.cpp +++ b/modules/slam/src/vo.cpp @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -65,20 +64,23 @@ int VisualOdometry::run(const std::string &imageDir, double scale_m, const Visua std::ostringstream ss; ss << std::put_time(&tm, "%Y%m%d_%H%M%S"); runTimestamp = ss.str(); } - // create a 'result' folder inside the provided imageDir (use filesystem join to be robust) - std::filesystem::path resultDir = std::filesystem::path(imageDir) / "result"; - if(!std::filesystem::exists(resultDir)) std::filesystem::create_directories(resultDir); + // create a 'result' folder inside the provided imageDir (portable, avoids requiring ) + std::string resultDirStr = imageDir; + if(resultDirStr.empty()) resultDirStr = std::string("."); + if(resultDirStr.back() == '/') resultDirStr.pop_back(); + resultDirStr += "/result"; + ensureDirectoryExists(resultDirStr); // create a per-run folder under result/ named by timestamp - std::filesystem::path runDir = resultDir / runTimestamp; - if(!std::filesystem::exists(runDir)) std::filesystem::create_directories(runDir); - std::filesystem::path csvPath = runDir / std::string("run.csv"); + std::string runDirStr = resultDirStr + "/" + runTimestamp; + ensureDirectoryExists(runDirStr); + std::string csvPath = runDirStr + "/run.csv"; std::ofstream csv(csvPath); if(csv){ csv << "frame_id,mean_diff,median_flow,pre_matches,post_matches,inliers,inlier_ratio,integrated\n"; csv.flush(); - std::cout << "Writing diagnostics to " << csvPath.string() << std::endl; + std::cout << "Writing diagnostics to " << csvPath << std::endl; } else { - std::cerr << "Failed to open diagnostics CSV " << csvPath.string() << std::endl; + std::cerr << "Failed to open diagnostics CSV " << csvPath << std::endl; } Mat R_g = Mat::eye(3,3,CV_64F); @@ -317,7 +319,7 @@ int VisualOdometry::run(const std::string &imageDir, double scale_m, const Visua Mat R_pnp, t_pnp; int inliers_pnp = 0; int preMatches_pnp = 0, postMatches_pnp = 0; double meanReproj_pnp = 0.0; if(localizer.tryPnP(map, desc, kps, loader.fx(), loader.fy(), loader.cx(), loader.cy(), gray.cols, gray.rows, - options.min_inliers, R_pnp, t_pnp, inliers_pnp, frame_id, &frame, runDir.string(), + options.min_inliers, R_pnp, t_pnp, inliers_pnp, frame_id, &frame, runDirStr, &preMatches_pnp, &postMatches_pnp, &meanReproj_pnp)){ solvedByPnP = true; std::cout << "PnP solved: preMatches="< Date: Fri, 5 Dec 2025 08:41:15 +0000 Subject: [PATCH 09/29] define cross-platform macro --- modules/slam/src/data_loader.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/modules/slam/src/data_loader.cpp b/modules/slam/src/data_loader.cpp index 4ad81c17509..3210ebc9cf6 100644 --- a/modules/slam/src/data_loader.cpp +++ b/modules/slam/src/data_loader.cpp @@ -19,6 +19,15 @@ #include #include #include +#ifdef _WIN32 +# include +# ifndef S_ISDIR +# define S_ISDIR(mode) (((mode) & S_IFDIR) != 0) +# endif +# ifndef mkdir +# define mkdir(path, mode) _mkdir(path) +# endif +#endif #include #include #include From 1d94a0b011255c5a67de26616043599369030fc5 Mon Sep 17 00:00:00 2001 From: Xuanrui Zhu <634210954@qq.com> Date: Fri, 5 Dec 2025 12:28:53 +0000 Subject: [PATCH 10/29] Add sfm/g2o dependency check --- modules/slam/CMakeLists.txt | 32 +++++++++++++++++++ .../slam/include/opencv2/slam/optimizer.hpp | 11 +++++-- modules/slam/src/optimizer.cpp | 15 +++++---- modules/slam/src/vo.cpp | 4 +-- 4 files changed, 51 insertions(+), 11 deletions(-) diff --git a/modules/slam/CMakeLists.txt b/modules/slam/CMakeLists.txt index 747dcb37cfa..3b86f319862 100644 --- a/modules/slam/CMakeLists.txt +++ b/modules/slam/CMakeLists.txt @@ -13,6 +13,38 @@ ocv_define_module(slam WRAP python ) +# Detect optional dependencies and expose preprocessor macros so sources +# can use #if defined(HAVE_SFM) / #if defined(HAVE_G2O) +if(TARGET ${the_module}) + # opencv_sfm is declared OPTIONAL above; if its target exists enable HAVE_SFM + if(TARGET opencv_sfm) + message(STATUS "slam: opencv_sfm target found — enabling HAVE_SFM") + target_compile_definitions(${the_module} PRIVATE HAVE_SFM=1) + # also add a global definition so header-parsers (python generator) + # running during build/config time can evaluate #if HAVE_SFM + add_definitions(-DHAVE_SFM=1) + target_link_libraries(${the_module} PRIVATE opencv_sfm) + else() + message(STATUS "slam: opencv_sfm not found — HAVE_SFM disabled") + endif() + + # Try to detect g2o (header + library). This is a best-effort detection: we + # look for a common g2o header and try to find a library name. Users on + # Windows should install/build g2o and make it discoverable to CMake. + find_path(G2O_INCLUDE_DIR NAMES g2o/core/sparse_optimizer.h) + find_library(G2O_LIBRARY NAMES g2o_core g2o) + if(G2O_INCLUDE_DIR AND G2O_LIBRARY) + message(STATUS "slam: g2o found (headers and library) — enabling HAVE_G2O") + target_include_directories(${the_module} PRIVATE ${G2O_INCLUDE_DIR}) + target_link_libraries(${the_module} PRIVATE ${G2O_LIBRARY}) + target_compile_definitions(${the_module} PRIVATE HAVE_G2O=1) + # also expose globally so external header parsers can evaluate #if HAVE_G2O + add_definitions(-DHAVE_G2O=1) + else() + message(STATUS "slam: g2o not found — HAVE_G2O disabled") + endif() +endif() + # Ensure C++17 is enabled for this module (required for ) if(TARGET ${the_module}) set_target_properties(${the_module} PROPERTIES CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON) diff --git a/modules/slam/include/opencv2/slam/optimizer.hpp b/modules/slam/include/opencv2/slam/optimizer.hpp index a46263d7b15..b99b980815c 100644 --- a/modules/slam/include/opencv2/slam/optimizer.hpp +++ b/modules/slam/include/opencv2/slam/optimizer.hpp @@ -16,7 +16,7 @@ class Optimizer { // Local Bundle Adjustment // Optimizes a window of recent keyframes and all observed map points // fixedKFs: indices of keyframes to keep fixed during optimization -#ifdef USE_G2O +#if defined(HAVE_G2O) static void localBundleAdjustmentG2O( std::vector &keyframes, std::vector &mappoints, @@ -25,6 +25,8 @@ class Optimizer { double fx, double fy, double cx, double cy, int iterations = 10); #endif + +#if defined(HAVE_SFM) static void localBundleAdjustmentSFM( std::vector &keyframes, std::vector &mappoints, @@ -32,6 +34,7 @@ class Optimizer { const std::vector &fixedKfIndices, double fx, double fy, double cx, double cy, int iterations = 10); +#endif // Pose-only optimization (optimize camera pose given fixed 3D points) static bool optimizePose( KeyFrame &kf, @@ -40,14 +43,16 @@ class Optimizer { double fx, double fy, double cx, double cy, std::vector &inliers, int iterations = 10); - + +#if defined(HAVE_SFM) // Global Bundle Adjustment (expensive, use after loop closure) static void globalBundleAdjustmentSFM( std::vector &keyframes, std::vector &mappoints, double fx, double fy, double cx, double cy, int iterations = 20); - +#endif + private: // Compute reprojection error and Jacobian static double computeReprojectionError( diff --git a/modules/slam/src/optimizer.cpp b/modules/slam/src/optimizer.cpp index 51b438357d6..b1e037e142a 100644 --- a/modules/slam/src/optimizer.cpp +++ b/modules/slam/src/optimizer.cpp @@ -5,10 +5,10 @@ #include #include -// If g2o is enabled (CMake defines USE_G2O), compile and use the g2o-based +// If g2o is enabled (CMake defines HAVE_G2O), compile and use the g2o-based // implementation. Otherwise fall back to a simplified OpenCV-based implementation. -#ifdef USE_G2O +#if defined(HAVE_G2O) #include #include #include @@ -19,7 +19,7 @@ #include #include #include -#else +#elif defined(HAVE_SFM) #include "opencv2/sfm.hpp" #endif @@ -30,8 +30,8 @@ namespace vo { Optimizer::Optimizer() {} -#ifdef USE_G2O -void Optimizer::localBundleAdjustment( +#if defined(HAVE_G2O) +void Optimizer::localBundleAdjustmentG2O( std::vector &keyframes, std::vector &mappoints, const std::vector &localKfIndices, @@ -146,6 +146,7 @@ void Optimizer::localBundleAdjustment( } } #endif +#if defined(HAVE_SFM) void Optimizer::localBundleAdjustmentSFM( std::vector &keyframes, std::vector &mappoints, @@ -265,7 +266,7 @@ void Optimizer::localBundleAdjustmentSFM( } } } - +#endif bool Optimizer::optimizePose( KeyFrame &kf, @@ -307,6 +308,7 @@ bool Optimizer::optimizePose( return true; } +#if defined(HAVE_SFM) void Optimizer::globalBundleAdjustmentSFM( std::vector &keyframes, std::vector &mappoints, @@ -321,6 +323,7 @@ void Optimizer::globalBundleAdjustmentSFM( localBundleAdjustmentSFM(keyframes, mappoints, localKfIndices, fixedKfIndices, fx, fy, cx, cy, iterations); std::cout << "Optimizer: Global BA completed" << std::endl; } +#endif double Optimizer::computeReprojectionError( const Point3d &point3D, diff --git a/modules/slam/src/vo.cpp b/modules/slam/src/vo.cpp index 24876f4a33b..c1ac1e09a9e 100644 --- a/modules/slam/src/vo.cpp +++ b/modules/slam/src/vo.cpp @@ -119,11 +119,11 @@ int VisualOdometry::run(const std::string &imageDir, double scale_m, const Visua for(int ii = start; ii < K; ++ii) localKfIndices.push_back(ii); std::vector fixedKfIndices; if(start > 0) fixedKfIndices.push_back(0); - + #if defined(HAVE_SFM) // Run BA on snapshot (may take time) - uses Optimizer which will use g2o if enabled Optimizer::localBundleAdjustmentSFM(kfs_snapshot, mps_snapshot, localKfIndices, fixedKfIndices, loader.fx(), loader.fy(), loader.cx(), loader.cy(), 10); - + #endif // write back optimized poses/points into main map under lock using id-based lookup std::lock_guard lk2(mapMutex); auto &kfs_ref = const_cast&>(map.keyframes()); From d9b4a44176e9b9d88e13c64a6f7dc1aa3a360b20 Mon Sep 17 00:00:00 2001 From: solovesonxi <2172449563@qq.com> Date: Wed, 10 Dec 2025 18:46:22 +0800 Subject: [PATCH 11/29] fix trailing whitespace --- .../slam/include/opencv2/slam/data_loader.hpp | 2 +- .../slam/include/opencv2/slam/initializer.hpp | 18 +- modules/slam/include/opencv2/slam/map.hpp | 12 +- .../slam/include/opencv2/slam/optimizer.hpp | 6 +- modules/slam/src/initializer.cpp | 162 +++++++++--------- modules/slam/src/map.cpp | 44 ++--- 6 files changed, 122 insertions(+), 122 deletions(-) diff --git a/modules/slam/include/opencv2/slam/data_loader.hpp b/modules/slam/include/opencv2/slam/data_loader.hpp index 109a83639f0..b143547fae6 100644 --- a/modules/slam/include/opencv2/slam/data_loader.hpp +++ b/modules/slam/include/opencv2/slam/data_loader.hpp @@ -41,6 +41,6 @@ class DataLoader { // 相机内参(若未加载则为回退值) double fx_, fy_, cx_, cy_; }; - + } // namespace vo } // namespace cv diff --git a/modules/slam/include/opencv2/slam/initializer.hpp b/modules/slam/include/opencv2/slam/initializer.hpp index b0e8676a0f9..62816b0b3c0 100644 --- a/modules/slam/include/opencv2/slam/initializer.hpp +++ b/modules/slam/include/opencv2/slam/initializer.hpp @@ -11,7 +11,7 @@ namespace vo { class Initializer { public: Initializer(); - + // Attempt initialization with two frames // Returns true if initialization successful bool initialize(const std::vector &kps1, @@ -21,13 +21,13 @@ class Initializer { Mat &R, Mat &t, std::vector &points3D, std::vector &isTriangulated); - + // Check if frames have sufficient parallax for initialization static bool checkParallax(const std::vector &kps1, const std::vector &kps2, const std::vector &matches, double minMedianParallax = 15.0); - + private: // Reconstruct from Homography bool reconstructH(const std::vector &pts1, @@ -38,7 +38,7 @@ class Initializer { std::vector &points3D, std::vector &isTriangulated, float ¶llax); - + // Reconstruct from Fundamental/Essential bool reconstructF(const std::vector &pts1, const std::vector &pts2, @@ -48,7 +48,7 @@ class Initializer { std::vector &points3D, std::vector &isTriangulated, float ¶llax); - + // Check reconstructed points quality int checkRT(const Mat &R, const Mat &t, const std::vector &pts1, @@ -57,24 +57,24 @@ class Initializer { std::vector &isGood, double fx, double fy, double cx, double cy, float ¶llax); - + // Triangulate points void triangulate(const Mat &P1, const Mat &P2, const std::vector &pts1, const std::vector &pts2, std::vector &points3D); - + // Decompose Homography void decomposeH(const Mat &H, std::vector &Rs, std::vector &ts, std::vector &normals); - + // Compute homography score float computeScore(const Mat &H21, const Mat &H12, const std::vector &pts1, const std::vector &pts2, std::vector &inliersH, float sigma = 1.0); - + // Compute fundamental score float computeScoreF(const Mat &F21, const std::vector &pts1, diff --git a/modules/slam/include/opencv2/slam/map.hpp b/modules/slam/include/opencv2/slam/map.hpp index 38d8d7097b3..66e4f6c7ae9 100644 --- a/modules/slam/include/opencv2/slam/map.hpp +++ b/modules/slam/include/opencv2/slam/map.hpp @@ -11,22 +11,22 @@ struct MapPoint { int id = -1; // unique id for id-based lookups Point3d p; // 3D position in world frame std::vector> observations; // pairs of (keyframe id, keypoint idx) - + // Quality management fields Mat descriptor; // Representative descriptor for matching int nObs = 0; // Number of observations bool isBad = false; // Flag for bad points to be culled double minDistance = 0.0; // Min viewing distance double maxDistance = 0.0; // Max viewing distance - + // Statistics int nFound = 0; // Number of times found in tracking int nVisible = 0; // Number of times visible - + // Constructor MapPoint() = default; MapPoint(const Point3d& pos) : p(pos) {} - + // Helper: compute found ratio float getFoundRatio() const { return nVisible > 0 ? static_cast(nFound) / nVisible : 0.0f; @@ -72,10 +72,10 @@ class MapManager { // Quality management void cullBadMapPoints(); - double computeReprojError(const MapPoint &mp, const KeyFrame &kf, + double computeReprojError(const MapPoint &mp, const KeyFrame &kf, double fx, double fy, double cx, double cy) const; void updateMapPointDescriptor(MapPoint &mp); - + // Statistics int countGoodMapPoints() const; diff --git a/modules/slam/include/opencv2/slam/optimizer.hpp b/modules/slam/include/opencv2/slam/optimizer.hpp index b99b980815c..1f3d5425e70 100644 --- a/modules/slam/include/opencv2/slam/optimizer.hpp +++ b/modules/slam/include/opencv2/slam/optimizer.hpp @@ -12,7 +12,7 @@ namespace vo { class Optimizer { public: Optimizer(); - + // Local Bundle Adjustment // Optimizes a window of recent keyframes and all observed map points // fixedKFs: indices of keyframes to keep fixed during optimization @@ -51,7 +51,7 @@ class Optimizer { std::vector &mappoints, double fx, double fy, double cx, double cy, int iterations = 20); -#endif +#endif private: // Compute reprojection error and Jacobian @@ -62,7 +62,7 @@ class Optimizer { double fx, double fy, double cx, double cy, Mat &jacobianPose, Mat &jacobianPoint); - + // Project 3D point to image static Point2f project( const Point3d &point3D, diff --git a/modules/slam/src/initializer.cpp b/modules/slam/src/initializer.cpp index 2bdd5a99241..6bbdea263a5 100644 --- a/modules/slam/src/initializer.cpp +++ b/modules/slam/src/initializer.cpp @@ -15,52 +15,52 @@ bool Initializer::initialize(const std::vector &kps1, Mat &R, Mat &t, std::vector &points3D, std::vector &isTriangulated) { - + if(matches.size() < 50) { std::cout << "Initializer: too few matches (" << matches.size() << ")" << std::endl; return false; } - + // Extract matched points std::vector pts1, pts2; pts1.reserve(matches.size()); pts2.reserve(matches.size()); - + for(const auto &m : matches) { pts1.push_back(kps1[m.queryIdx].pt); pts2.push_back(kps2[m.trainIdx].pt); } - + // Estimate both Homography and Fundamental std::vector inliersH, inliersF; Mat H = findHomography(pts1, pts2, RANSAC, 3.0, inliersH, 2000, 0.999); Mat F = findFundamentalMat(pts1, pts2, FM_RANSAC, 3.0, 0.999, inliersF); - + if(H.empty() || F.empty()) { std::cout << "Initializer: failed to compute H or F" << std::endl; return false; } - + // Compute scores std::vector inlH, inlF; float scoreH = computeScore(H, H.inv(), pts1, pts2, inlH, 1.0); float scoreF = computeScoreF(F, pts1, pts2, inlF, 1.0); - + float ratio = scoreH / (scoreH + scoreF); - - std::cout << "Initializer: H score=" << scoreH << " F score=" << scoreF + + std::cout << "Initializer: H score=" << scoreH << " F score=" << scoreF << " ratio=" << ratio << std::endl; - + // Decide between H and F // If ratio > 0.45, scene is likely planar, use H // Otherwise use F for general scenes bool useH = (ratio > 0.45); - + Mat R_out, t_out; std::vector pts3D; std::vector isTri; float parallax = 0.0f; - + bool success = false; if(useH) { std::cout << "Initializer: using Homography" << std::endl; @@ -69,37 +69,37 @@ bool Initializer::initialize(const std::vector &kps1, std::cout << "Initializer: using Fundamental" << std::endl; success = reconstructF(pts1, pts2, F, fx, fy, cx, cy, R_out, t_out, pts3D, isTri, parallax); } - + if(!success) { std::cout << "Initializer: reconstruction failed" << std::endl; return false; } - + // Count good triangulated points int goodCount = 0; for(bool b : isTri) if(b) goodCount++; - - std::cout << "Initializer: triangulated " << goodCount << "/" << pts3D.size() + + std::cout << "Initializer: triangulated " << goodCount << "/" << pts3D.size() << " points, parallax=" << parallax << std::endl; - + // Check quality: need enough good points if(goodCount < 50) { std::cout << "Initializer: too few good points (" << goodCount << ")" << std::endl; return false; } - + // Check parallax if(parallax < 1.0f) { std::cout << "Initializer: insufficient parallax (" << parallax << ")" << std::endl; return false; } - + // Success R = R_out; t = t_out; points3D = pts3D; isTriangulated = isTri; - + std::cout << "Initializer: SUCCESS!" << std::endl; return true; } @@ -109,10 +109,10 @@ bool Initializer::checkParallax(const std::vector &kps1, const std::vector &matches, double minMedianParallax) { if(matches.empty()) return false; - + std::vector parallaxes; parallaxes.reserve(matches.size()); - + for(const auto &m : matches) { Point2f p1 = kps1[m.queryIdx].pt; Point2f p2 = kps2[m.trainIdx].pt; @@ -120,10 +120,10 @@ bool Initializer::checkParallax(const std::vector &kps1, double dy = p2.y - p1.y; parallaxes.push_back(std::sqrt(dx*dx + dy*dy)); } - + std::sort(parallaxes.begin(), parallaxes.end()); double median = parallaxes[parallaxes.size()/2]; - + return median >= minMedianParallax; } @@ -135,34 +135,34 @@ bool Initializer::reconstructF(const std::vector &pts1, std::vector &points3D, std::vector &isTriangulated, float ¶llax) { - + // Compute Essential matrix from F Mat K = (Mat_(3,3) << fx, 0, cx, 0, fy, cy, 0, 0, 1); Mat E = K.t() * F * K; - + // Recover pose from E Mat mask; int inliers = recoverPose(E, pts1, pts2, K, R, t, mask); - + if(inliers < 30) return false; - + // Triangulate points3D.resize(pts1.size()); isTriangulated.resize(pts1.size(), false); - + Mat P1 = Mat::eye(3, 4, CV_64F); Mat P2(3, 4, CV_64F); for(int i = 0; i < 3; ++i) { for(int j = 0; j < 3; ++j) P2.at(i,j) = R.at(i,j); P2.at(i, 3) = t.at(i, 0); } - + triangulate(P1, P2, pts1, pts2, points3D); - + // Check quality std::vector isGood; int nGood = checkRT(R, t, pts1, pts2, points3D, isGood, fx, fy, cx, cy, parallax); - + isTriangulated = isGood; return nGood >= 30; } @@ -175,51 +175,51 @@ bool Initializer::reconstructH(const std::vector &pts1, std::vector &points3D, std::vector &isTriangulated, float ¶llax) { - + // Decompose H to get multiple solutions std::vector Rs, ts, normals; decomposeH(H, Rs, ts, normals); - + // Try all solutions and pick best int bestGood = 0; int bestIdx = -1; std::vector> allPoints(Rs.size()); std::vector> allGood(Rs.size()); std::vector allParallax(Rs.size()); - + for(size_t i = 0; i < Rs.size(); ++i) { std::vector pts3D; std::vector isGood; float par = 0.0f; - + Mat P1 = Mat::eye(3, 4, CV_64F); Mat P2(3, 4, CV_64F); for(int r = 0; r < 3; ++r) { for(int c = 0; c < 3; ++c) P2.at(r,c) = Rs[i].at(r,c); P2.at(r, 3) = ts[i].at(r, 0); } - + triangulate(P1, P2, pts1, pts2, pts3D); int nGood = checkRT(Rs[i], ts[i], pts1, pts2, pts3D, isGood, fx, fy, cx, cy, par); - + allPoints[i] = pts3D; allGood[i] = isGood; allParallax[i] = par; - + if(nGood > bestGood) { bestGood = nGood; bestIdx = i; } } - + if(bestIdx < 0 || bestGood < 30) return false; - + R = Rs[bestIdx]; t = ts[bestIdx]; points3D = allPoints[bestIdx]; isTriangulated = allGood[bestIdx]; parallax = allParallax[bestIdx]; - + return true; } @@ -230,65 +230,65 @@ int Initializer::checkRT(const Mat &R, const Mat &t, std::vector &isGood, double fx, double fy, double cx, double cy, float ¶llax) { - + isGood.resize(points3D.size(), false); - + Mat R1 = Mat::eye(3, 3, CV_64F); Mat t1 = Mat::zeros(3, 1, CV_64F); Mat R2 = R; Mat t2 = t; - + int nGood = 0; std::vector cosParallaxes; cosParallaxes.reserve(points3D.size()); - + for(size_t i = 0; i < points3D.size(); ++i) { const Point3d &p3d = points3D[i]; - + // Check depth in first camera Mat p3dMat = (Mat_(3,1) << p3d.x, p3d.y, p3d.z); Mat p1 = R1 * p3dMat + t1; double z1 = p1.at(2, 0); - + if(z1 <= 0) continue; - + // Check depth in second camera Mat p2 = R2 * p3dMat + t2; double z2 = p2.at(2, 0); - + if(z2 <= 0) continue; - + // Check reprojection error in first camera double u1 = fx * p1.at(0,0)/z1 + cx; double v1 = fy * p1.at(1,0)/z1 + cy; double err1 = std::sqrt(std::pow(u1 - pts1[i].x, 2) + std::pow(v1 - pts1[i].y, 2)); - + if(err1 > 4.0) continue; - + // Check reprojection error in second camera double u2 = fx * p2.at(0,0)/z2 + cx; double v2 = fy * p2.at(1,0)/z2 + cy; double err2 = std::sqrt(std::pow(u2 - pts2[i].x, 2) + std::pow(v2 - pts2[i].y, 2)); - + if(err2 > 4.0) continue; - + // Check parallax Mat normal1 = p3dMat / norm(p3dMat); Mat normal2 = (p3dMat - t2) / norm(p3dMat - t2); double cosParallax = normal1.dot(normal2); - + cosParallaxes.push_back(cosParallax); - + isGood[i] = true; nGood++; } - + if(!cosParallaxes.empty()) { std::sort(cosParallaxes.begin(), cosParallaxes.end()); float medianCos = cosParallaxes[cosParallaxes.size()/2]; parallax = std::acos(medianCos) * 180.0 / CV_PI; } - + return nGood; } @@ -296,12 +296,12 @@ void Initializer::triangulate(const Mat &P1, const Mat &P2, const std::vector &pts1, const std::vector &pts2, std::vector &points3D) { - + points3D.resize(pts1.size()); - + Mat pts4D; triangulatePoints(P1, P2, pts1, pts2, pts4D); - + for(int i = 0; i < pts4D.cols; ++i) { Mat x = pts4D.col(i); x /= x.at(3, 0); @@ -311,13 +311,13 @@ void Initializer::triangulate(const Mat &P1, const Mat &P2, void Initializer::decomposeH(const Mat &H, std::vector &Rs, std::vector &ts, std::vector &normals) { - + Mat H_normalized = H / H.at(2,2); - + std::vector rotations, translations, normalsOut; int solutions = decomposeHomographyMat(H_normalized, Mat::eye(3,3,CV_64F), rotations, translations, normalsOut); - + Rs = rotations; ts = translations; normals = normalsOut; @@ -328,42 +328,42 @@ float Initializer::computeScore(const Mat &H21, const Mat &H12, const std::vector &pts2, std::vector &inliersH, float sigma) { - + const float th = 5.991 * sigma; inliersH.resize(pts1.size(), false); - + float score = 0.0f; const float thSq = th * th; - + for(size_t i = 0; i < pts1.size(); ++i) { // Forward error Mat p1 = (Mat_(3,1) << pts1[i].x, pts1[i].y, 1.0); Mat p2pred = H21 * p1; p2pred /= p2pred.at(2,0); - + float dx = pts2[i].x - p2pred.at(0,0); float dy = pts2[i].y - p2pred.at(1,0); float errSq = dx*dx + dy*dy; - + if(errSq < thSq) { score += thSq - errSq; } - + // Backward error Mat p2 = (Mat_(3,1) << pts2[i].x, pts2[i].y, 1.0); Mat p1pred = H12 * p2; p1pred /= p1pred.at(2,0); - + dx = pts1[i].x - p1pred.at(0,0); dy = pts1[i].y - p1pred.at(1,0); errSq = dx*dx + dy*dy; - + if(errSq < thSq) { score += thSq - errSq; inliersH[i] = true; } } - + return score; } @@ -372,33 +372,33 @@ float Initializer::computeScoreF(const Mat &F21, const std::vector &pts2, std::vector &inliersF, float sigma) { - + const float th = 3.841 * sigma; const float thSq = th * th; - + inliersF.resize(pts1.size(), false); float score = 0.0f; - + for(size_t i = 0; i < pts1.size(); ++i) { Mat p1 = (Mat_(3,1) << pts1[i].x, pts1[i].y, 1.0); Mat p2 = (Mat_(3,1) << pts2[i].x, pts2[i].y, 1.0); - + // Epipolar line in second image Mat l2 = F21 * p1; float a2 = l2.at(0,0); float b2 = l2.at(1,0); float c2 = l2.at(2,0); - + float num2 = a2*pts2[i].x + b2*pts2[i].y + c2; float den2 = a2*a2 + b2*b2; float distSq2 = (num2*num2) / den2; - + if(distSq2 < thSq) { score += thSq - distSq2; inliersF[i] = true; } } - + return score; } diff --git a/modules/slam/src/map.cpp b/modules/slam/src/map.cpp index 670eaff63ca..6c998b955fd 100644 --- a/modules/slam/src/map.cpp +++ b/modules/slam/src/map.cpp @@ -163,23 +163,23 @@ void MapManager::cullBadMapPoints() { const double MAX_REPROJ_ERROR = 3.0; // pixels const int MIN_OBSERVATIONS = 2; const float MIN_FOUND_RATIO = 0.25f; - + for(auto &mp : mappoints_) { if(mp.isBad) continue; - + // 1. Check observation count mp.nObs = static_cast(mp.observations.size()); if(mp.nObs < MIN_OBSERVATIONS) { mp.isBad = true; continue; } - + // 2. Check found ratio (avoid points rarely tracked) if(mp.nVisible > 10 && mp.getFoundRatio() < MIN_FOUND_RATIO) { mp.isBad = true; continue; } - + // 3. Check reprojection error across observations // Sample a few keyframes to check reprojection int errorCount = 0; @@ -188,27 +188,27 @@ void MapManager::cullBadMapPoints() { int kfId = obs.first; int kfIdx = keyframeIndex(kfId); if(kfIdx < 0 || kfIdx >= static_cast(keyframes_.size())) continue; - + const KeyFrame &kf = keyframes_[kfIdx]; // Use default camera params (should be passed in production code) double fx = 500.0, fy = 500.0, cx = 320.0, cy = 240.0; double error = computeReprojError(mp, kf, fx, fy, cx, cy); - + checkCount++; if(error > MAX_REPROJ_ERROR) { errorCount++; } - + // Sample up to 3 observations for efficiency if(checkCount >= 3) break; } - + // If majority of samples have high error, mark as bad if(checkCount > 0 && errorCount > checkCount / 2) { mp.isBad = true; } } - + // Remove bad points size_t before = mappoints_.size(); mappoints_.erase( @@ -217,7 +217,7 @@ void MapManager::cullBadMapPoints() { mappoints_.end() ); size_t after = mappoints_.size(); - + if(before - after > 0) { std::cout << "MapManager: culled " << (before - after) << " bad map points (" << after << " remain)" << std::endl; @@ -229,14 +229,14 @@ double MapManager::computeReprojError(const MapPoint &mp, const KeyFrame &kf, // Transform world point to camera frame Mat Xw = (Mat_(3,1) << mp.p.x, mp.p.y, mp.p.z); Mat Xc = kf.R_w.t() * (Xw - kf.t_w); - + double z = Xc.at(2, 0); if(z <= 0) return std::numeric_limits::max(); - + // Project to image double u = fx * (Xc.at(0, 0) / z) + cx; double v = fy * (Xc.at(1, 0) / z) + cy; - + // Find corresponding observation in this keyframe Point2f observed(-1, -1); for(const auto &obs : mp.observations) { @@ -248,9 +248,9 @@ double MapManager::computeReprojError(const MapPoint &mp, const KeyFrame &kf, } } } - + if(observed.x < 0) return std::numeric_limits::max(); - + // Compute reprojection error double dx = u - observed.x; double dy = v - observed.y; @@ -259,28 +259,28 @@ double MapManager::computeReprojError(const MapPoint &mp, const KeyFrame &kf, void MapManager::updateMapPointDescriptor(MapPoint &mp) { if(mp.observations.empty()) return; - + // Collect all descriptors from observations std::vector descriptors; for(const auto &obs : mp.observations) { int kfIdx = keyframeIndex(obs.first); if(kfIdx < 0) continue; - + const KeyFrame &kf = keyframes_[kfIdx]; int kpIdx = obs.second; if(kpIdx >= 0 && kpIdx < kf.desc.rows) { descriptors.push_back(kf.desc.row(kpIdx)); } } - + if(descriptors.empty()) return; - + // Compute median descriptor (for binary descriptors, use majority voting per bit) if(descriptors[0].type() == CV_8U) { // Binary descriptor (ORB) int bytes = descriptors[0].cols; Mat median = Mat::zeros(1, bytes, CV_8U); - + for(int b = 0; b < bytes; ++b) { int bitCount[8] = {0}; for(const auto &desc : descriptors) { @@ -289,7 +289,7 @@ void MapManager::updateMapPointDescriptor(MapPoint &mp) { if(byte & (1 << bit)) bitCount[bit]++; } } - + uchar medianByte = 0; int threshold = descriptors.size() / 2; for(int bit = 0; bit < 8; ++bit) { @@ -299,7 +299,7 @@ void MapManager::updateMapPointDescriptor(MapPoint &mp) { } median.at(0, b) = medianByte; } - + mp.descriptor = median; } else { // Fallback: use first descriptor From b0bcf13b41b8668745548490fcd908062a37d389 Mon Sep 17 00:00:00 2001 From: solovesonxi <2172449563@qq.com> Date: Wed, 10 Dec 2025 19:46:12 +0800 Subject: [PATCH 12/29] Fix compiler warnings: shadow variables, unused variables/parameters --- modules/slam/src/feature.cpp | 10 +++++----- modules/slam/src/initializer.cpp | 4 ++-- modules/slam/src/matcher.cpp | 2 +- modules/slam/src/visualizer.cpp | 2 +- modules/slam/src/vo.cpp | 1 - 5 files changed, 9 insertions(+), 10 deletions(-) diff --git a/modules/slam/src/feature.cpp b/modules/slam/src/feature.cpp index e2eb7dde507..db33bf0527a 100644 --- a/modules/slam/src/feature.cpp +++ b/modules/slam/src/feature.cpp @@ -137,12 +137,12 @@ void FeatureExtractor::detectAndCompute(const Mat &image, std::vector } std::vector selected; selected.reserve(nfeatures_); - for(auto &b: buckets){ - if(b.empty()) continue; - std::sort(b.begin(), b.end(), [](const CandScore &a, const CandScore &b){ return a.score > b.score; }); - int take = std::min((int)b.size(), cellCap); + for(auto &bucket: buckets){ + if(bucket.empty()) continue; + std::sort(bucket.begin(), bucket.end(), [](const CandScore &a, const CandScore &b){ return a.score > b.score; }); + int take = std::min((int)bucket.size(), cellCap); for(int i=0;i &Rs, Mat H_normalized = H / H.at(2,2); std::vector rotations, translations, normalsOut; - int solutions = decomposeHomographyMat(H_normalized, Mat::eye(3,3,CV_64F), - rotations, translations, normalsOut); + decomposeHomographyMat(H_normalized, Mat::eye(3,3,CV_64F), + rotations, translations, normalsOut); Rs = rotations; ts = translations; diff --git a/modules/slam/src/matcher.cpp b/modules/slam/src/matcher.cpp index e0579793d31..225cc50c7f0 100644 --- a/modules/slam/src/matcher.cpp +++ b/modules/slam/src/matcher.cpp @@ -23,7 +23,7 @@ void Matcher::knnMatch(const Mat &desc1, const Mat &desc2, std::vector & } void Matcher::match(const Mat &desc1, const Mat &desc2, - const std::vector &kps1, const std::vector &kps2, + const std::vector & /*kps1*/, const std::vector &kps2, std::vector &goodMatches, int imgCols, int imgRows, int bucketRows, int bucketCols, int topKPerBucket, diff --git a/modules/slam/src/visualizer.cpp b/modules/slam/src/visualizer.cpp index 59ee2a4186f..ab905bcdbed 100644 --- a/modules/slam/src/visualizer.cpp +++ b/modules/slam/src/visualizer.cpp @@ -11,7 +11,7 @@ Tracker::Tracker() { } -bool Tracker::processFrame(const Mat &gray, const std::string &imagePath, Mat &imgOut, Mat &R_out, Mat &t_out, std::string &info) +bool Tracker::processFrame(const Mat &gray, const std::string & /*imagePath*/, Mat &imgOut, Mat & /*R_out*/, Mat & /*t_out*/, std::string &info) { if(gray.empty()) return false; // detect diff --git a/modules/slam/src/vo.cpp b/modules/slam/src/vo.cpp index c1ac1e09a9e..8959bae9af4 100644 --- a/modules/slam/src/vo.cpp +++ b/modules/slam/src/vo.cpp @@ -183,7 +183,6 @@ int VisualOdometry::run(const std::string &imageDir, double scale_m, const Visua } } // mutual check - int q = forward ? (int)qi : best.trainIdx; int t = forward ? best.trainIdx : (int)qi; // find reverse match for t const auto &rev = forward ? knn21 : knn12; From 8c9328d50a473cb46eca0466ebb7f7cf15a02d19 Mon Sep 17 00:00:00 2001 From: solovesonxi <2172449563@qq.com> Date: Wed, 10 Dec 2025 20:15:38 +0800 Subject: [PATCH 13/29] Fix undef warning: use defined() for CERES_FOUND macro check --- modules/sfm/include/opencv2/sfm.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/sfm/include/opencv2/sfm.hpp b/modules/sfm/include/opencv2/sfm.hpp index 52c1af07e8e..3765a1d2bfa 100644 --- a/modules/sfm/include/opencv2/sfm.hpp +++ b/modules/sfm/include/opencv2/sfm.hpp @@ -42,7 +42,7 @@ #include #include #include -#if CERES_FOUND +#if defined(CERES_FOUND) && CERES_FOUND #include #include #endif From f555b94674d94458abd68a76bac06770b36913de Mon Sep 17 00:00:00 2001 From: solovesonxi <2172449563@qq.com> Date: Wed, 10 Dec 2025 20:59:47 +0800 Subject: [PATCH 14/29] fix Type Conversion/Range Warning --- modules/slam/src/initializer.cpp | 46 ++++++++++++++++---------------- modules/slam/src/map.cpp | 2 +- modules/slam/src/optimizer.cpp | 2 +- modules/slam/src/visualizer.cpp | 4 +-- 4 files changed, 27 insertions(+), 27 deletions(-) diff --git a/modules/slam/src/initializer.cpp b/modules/slam/src/initializer.cpp index 6daed2eca7f..f54351a021c 100644 --- a/modules/slam/src/initializer.cpp +++ b/modules/slam/src/initializer.cpp @@ -208,7 +208,7 @@ bool Initializer::reconstructH(const std::vector &pts1, if(nGood > bestGood) { bestGood = nGood; - bestIdx = i; + bestIdx = static_cast(i); } } @@ -239,7 +239,7 @@ int Initializer::checkRT(const Mat &R, const Mat &t, Mat t2 = t; int nGood = 0; - std::vector cosParallaxes; + std::vector cosParallaxes; cosParallaxes.reserve(points3D.size()); for(size_t i = 0; i < points3D.size(); ++i) { @@ -285,8 +285,8 @@ int Initializer::checkRT(const Mat &R, const Mat &t, if(!cosParallaxes.empty()) { std::sort(cosParallaxes.begin(), cosParallaxes.end()); - float medianCos = cosParallaxes[cosParallaxes.size()/2]; - parallax = std::acos(medianCos) * 180.0 / CV_PI; + double medianCos = cosParallaxes[cosParallaxes.size()/2]; + parallax = static_cast(std::acos(medianCos) * 180.0 / CV_PI); } return nGood; @@ -329,11 +329,11 @@ float Initializer::computeScore(const Mat &H21, const Mat &H12, std::vector &inliersH, float sigma) { - const float th = 5.991 * sigma; + const double th = 5.991 * static_cast(sigma); inliersH.resize(pts1.size(), false); - float score = 0.0f; - const float thSq = th * th; + double score = 0.0; + const double thSq = th * th; for(size_t i = 0; i < pts1.size(); ++i) { // Forward error @@ -341,9 +341,9 @@ float Initializer::computeScore(const Mat &H21, const Mat &H12, Mat p2pred = H21 * p1; p2pred /= p2pred.at(2,0); - float dx = pts2[i].x - p2pred.at(0,0); - float dy = pts2[i].y - p2pred.at(1,0); - float errSq = dx*dx + dy*dy; + double dx = static_cast(pts2[i].x) - p2pred.at(0,0); + double dy = static_cast(pts2[i].y) - p2pred.at(1,0); + double errSq = dx*dx + dy*dy; if(errSq < thSq) { score += thSq - errSq; @@ -354,8 +354,8 @@ float Initializer::computeScore(const Mat &H21, const Mat &H12, Mat p1pred = H12 * p2; p1pred /= p1pred.at(2,0); - dx = pts1[i].x - p1pred.at(0,0); - dy = pts1[i].y - p1pred.at(1,0); + dx = static_cast(pts1[i].x) - p1pred.at(0,0); + dy = static_cast(pts1[i].y) - p1pred.at(1,0); errSq = dx*dx + dy*dy; if(errSq < thSq) { @@ -364,7 +364,7 @@ float Initializer::computeScore(const Mat &H21, const Mat &H12, } } - return score; + return static_cast(score); } float Initializer::computeScoreF(const Mat &F21, @@ -373,11 +373,11 @@ float Initializer::computeScoreF(const Mat &F21, std::vector &inliersF, float sigma) { - const float th = 3.841 * sigma; - const float thSq = th * th; + const double th = 3.841 * static_cast(sigma); + const double thSq = th * th; inliersF.resize(pts1.size(), false); - float score = 0.0f; + double score = 0.0; for(size_t i = 0; i < pts1.size(); ++i) { Mat p1 = (Mat_(3,1) << pts1[i].x, pts1[i].y, 1.0); @@ -385,13 +385,13 @@ float Initializer::computeScoreF(const Mat &F21, // Epipolar line in second image Mat l2 = F21 * p1; - float a2 = l2.at(0,0); - float b2 = l2.at(1,0); - float c2 = l2.at(2,0); + double a2 = l2.at(0,0); + double b2 = l2.at(1,0); + double c2 = l2.at(2,0); - float num2 = a2*pts2[i].x + b2*pts2[i].y + c2; - float den2 = a2*a2 + b2*b2; - float distSq2 = (num2*num2) / den2; + double num2 = a2*pts2[i].x + b2*pts2[i].y + c2; + double den2 = a2*a2 + b2*b2; + double distSq2 = (num2*num2) / den2; if(distSq2 < thSq) { score += thSq - distSq2; @@ -399,7 +399,7 @@ float Initializer::computeScoreF(const Mat &F21, } } - return score; + return static_cast(score); } } // namespace vo diff --git a/modules/slam/src/map.cpp b/modules/slam/src/map.cpp index 6c998b955fd..9b9ca037453 100644 --- a/modules/slam/src/map.cpp +++ b/modules/slam/src/map.cpp @@ -291,7 +291,7 @@ void MapManager::updateMapPointDescriptor(MapPoint &mp) { } uchar medianByte = 0; - int threshold = descriptors.size() / 2; + int threshold = static_cast(descriptors.size()) / 2; for(int bit = 0; bit < 8; ++bit) { if(bitCount[bit] > threshold) { medianByte |= (1 << bit); diff --git a/modules/slam/src/optimizer.cpp b/modules/slam/src/optimizer.cpp index b1e037e142a..6d5c5e66586 100644 --- a/modules/slam/src/optimizer.cpp +++ b/modules/slam/src/optimizer.cpp @@ -279,7 +279,7 @@ bool Optimizer::optimizePose( if(matchedMpIndices.empty()) return false; inliers.assign(matchedMpIndices.size(), false); - std::vector objectPoints; + std::vector objectPoints; std::vector imagePoints; for(size_t i = 0; i < matchedMpIndices.size(); ++i) { int mpIdx = matchedMpIndices[i]; diff --git a/modules/slam/src/visualizer.cpp b/modules/slam/src/visualizer.cpp index ab905bcdbed..30b65cdbb98 100644 --- a/modules/slam/src/visualizer.cpp +++ b/modules/slam/src/visualizer.cpp @@ -96,9 +96,9 @@ void Visualizer::showTopdown(){ Point p = worldToPixel(traj_.back()); // draw heading arrow on topdown map based on recent trajectory if(traj_.size() >= 2){ - int K = std::min(5, traj_.size()-1); + size_t K = std::min(5, traj_.size()-1); double dx = 0.0, dz = 0.0; - for(int i=0;i Date: Thu, 11 Dec 2025 09:34:23 +0800 Subject: [PATCH 15/29] Docs: raise Doxygen DOT_GRAPH_MAX_NODES for contrib build --- doc/doxygen_extra.cfg | 2 ++ modules/slam/CMakeLists.txt | 3 +++ 2 files changed, 5 insertions(+) create mode 100644 doc/doxygen_extra.cfg diff --git a/doc/doxygen_extra.cfg b/doc/doxygen_extra.cfg new file mode 100644 index 00000000000..565efc296c6 --- /dev/null +++ b/doc/doxygen_extra.cfg @@ -0,0 +1,2 @@ +# Extra Doxygen settings to relax graph limits for contrib builds +DOT_GRAPH_MAX_NODES = 512 diff --git a/modules/slam/CMakeLists.txt b/modules/slam/CMakeLists.txt index 3b86f319862..ea761954fed 100644 --- a/modules/slam/CMakeLists.txt +++ b/modules/slam/CMakeLists.txt @@ -54,6 +54,9 @@ if(TARGET ${the_module}) endif() endif() +# Raise Doxygen graph node limit to avoid include-graph generation failures +set(DOXYGEN_DOT_GRAPH_MAX_NODES 512 CACHE STRING "Max nodes for Doxygen dot graphs" FORCE) + # If you need to add special include directories, libraries or compile flags, # add them below using the OpenCV contrib module helper macros, for example: # From 5f0a98070b42ad5f099a738d496436b9d8e48af5 Mon Sep 17 00:00:00 2001 From: solovesonxi <2172449563@qq.com> Date: Thu, 11 Dec 2025 11:57:34 +0800 Subject: [PATCH 16/29] rollback --- doc/doxygen_extra.cfg | 2 -- modules/slam/CMakeLists.txt | 3 --- 2 files changed, 5 deletions(-) delete mode 100644 doc/doxygen_extra.cfg diff --git a/doc/doxygen_extra.cfg b/doc/doxygen_extra.cfg deleted file mode 100644 index 565efc296c6..00000000000 --- a/doc/doxygen_extra.cfg +++ /dev/null @@ -1,2 +0,0 @@ -# Extra Doxygen settings to relax graph limits for contrib builds -DOT_GRAPH_MAX_NODES = 512 diff --git a/modules/slam/CMakeLists.txt b/modules/slam/CMakeLists.txt index ea761954fed..3b86f319862 100644 --- a/modules/slam/CMakeLists.txt +++ b/modules/slam/CMakeLists.txt @@ -54,9 +54,6 @@ if(TARGET ${the_module}) endif() endif() -# Raise Doxygen graph node limit to avoid include-graph generation failures -set(DOXYGEN_DOT_GRAPH_MAX_NODES 512 CACHE STRING "Max nodes for Doxygen dot graphs" FORCE) - # If you need to add special include directories, libraries or compile flags, # add them below using the OpenCV contrib module helper macros, for example: # From 96cb5495cb0f94634bf35a17d290bae1375e9fbf Mon Sep 17 00:00:00 2001 From: Xuanrui Zhu <634210954@qq.com> Date: Thu, 11 Dec 2025 07:01:21 +0000 Subject: [PATCH 17/29] fix keyframe selection during triangulation --- modules/slam/src/vo.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/modules/slam/src/vo.cpp b/modules/slam/src/vo.cpp index 8959bae9af4..75fe960acc0 100644 --- a/modules/slam/src/vo.cpp +++ b/modules/slam/src/vo.cpp @@ -413,7 +413,12 @@ int VisualOdometry::run(const std::string &imageDir, double scale_m, const Visua std::vector pts1_kp_idx; pts1_kp_idx.reserve(goodMatches.size()); std::vector pts2_kp_idx; pts2_kp_idx.reserve(goodMatches.size()); for(const auto &m: goodMatches){ pts1_kp_idx.push_back(m.queryIdx); pts2_kp_idx.push_back(m.trainIdx); } - auto newPts = map.triangulateBetweenLastTwo(pts1n, pts2n, pts1_kp_idx, pts2_kp_idx, last, keyframes.empty() ? kf : keyframes.back(), fx, fy, cx, cy); + // triangulate between the last keyframe in the map and the CURRENT keyframe `kf`. + // previously the code passed `keyframes.back()` which refers to the previous local + // keyframe (or the same as `last`), resulting in triangulation between the same + // frame and thus no new points. Pass `kf` to triangulate between `last` and + // the current frame. + auto newPts = map.triangulateBetweenLastTwo(pts1n, pts2n, pts1_kp_idx, pts2_kp_idx, last, kf, fx, fy, cx, cy); if(!newPts.empty()){ didTriangulate = true; // already appended inside MapManager From 51f2198a8a2f7aea777bf1bc773ed555908c1a9f Mon Sep 17 00:00:00 2001 From: Xuanrui Zhu <634210954@qq.com> Date: Fri, 19 Dec 2025 12:38:46 +0000 Subject: [PATCH 18/29] Fix camel casing issue in vo.hpp --- modules/slam/include/opencv2/slam/vo.hpp | 18 +++++++++--------- modules/slam/src/vo.cpp | 10 +++++----- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/modules/slam/include/opencv2/slam/vo.hpp b/modules/slam/include/opencv2/slam/vo.hpp index 5172edb74df..515b7b21b0f 100644 --- a/modules/slam/include/opencv2/slam/vo.hpp +++ b/modules/slam/include/opencv2/slam/vo.hpp @@ -8,15 +8,15 @@ namespace cv { namespace vo { struct CV_EXPORTS_W_SIMPLE VisualOdometryOptions { - CV_PROP_RW int min_matches = 15; - CV_PROP_RW int min_inliers = 4; - CV_PROP_RW double min_inlier_ratio = 0.1; - CV_PROP_RW double diff_zero_thresh = 2.0; - CV_PROP_RW double flow_zero_thresh = 0.3; - CV_PROP_RW double min_translation_norm = 1e-4; - CV_PROP_RW double min_rotation_rad = 0.5 * CV_PI / 180.0; - CV_PROP_RW int max_matches_keep = 500; - CV_PROP_RW double flow_weight_lambda = 5.0; + CV_PROP_RW int minMatches = 15; + CV_PROP_RW int minInliers = 4; + CV_PROP_RW double minInlierRatio = 0.1; + CV_PROP_RW double diffZeroThresh = 2.0; + CV_PROP_RW double flowZeroThresh = 0.3; + CV_PROP_RW double minTranslationNorm = 1e-4; + CV_PROP_RW double minRotationRad = 0.5 * CV_PI / 180.0; + CV_PROP_RW int maxMatchesKeep = 500; + CV_PROP_RW double flowWeightLambda = 5.0; }; class CV_EXPORTS_W VisualOdometry { diff --git a/modules/slam/src/vo.cpp b/modules/slam/src/vo.cpp index 75fe960acc0..5314aba71c3 100644 --- a/modules/slam/src/vo.cpp +++ b/modules/slam/src/vo.cpp @@ -32,7 +32,7 @@ VisualOdometry::VisualOdometry(Ptr feature, Ptr ma : feature_(std::move(feature)), matcher_(std::move(matcher)) { } -int VisualOdometry::run(const std::string &imageDir, double scale_m, const VisualOdometryOptions &options){ +int VisualOdometry::run(const std::string &imageDir, double scaleM, const VisualOdometryOptions &options){ DataLoader loader(imageDir); std::cout << "VisualOdometry: loaded " << loader.size() << " images from " << imageDir << std::endl; if(loader.size() == 0){ @@ -289,7 +289,7 @@ int VisualOdometry::run(const std::string &imageDir, double scale_m, const Visua // set global pose to second keyframe (apply scale) Mat t_d; kf1.t_w.convertTo(t_d, CV_64F); - t_g = t_d * scale_m; + t_g = t_d * scaleM; R_g = kf1.R_w.clone(); double x = t_g.at(0); double z = t_g.at(2); @@ -318,7 +318,7 @@ int VisualOdometry::run(const std::string &imageDir, double scale_m, const Visua Mat R_pnp, t_pnp; int inliers_pnp = 0; int preMatches_pnp = 0, postMatches_pnp = 0; double meanReproj_pnp = 0.0; if(localizer.tryPnP(map, desc, kps, loader.fx(), loader.fy(), loader.cx(), loader.cy(), gray.cols, gray.rows, - options.min_inliers, R_pnp, t_pnp, inliers_pnp, frame_id, &frame, runDirStr, + options.minInliers, R_pnp, t_pnp, inliers_pnp, frame_id, &frame, runDirStr, &preMatches_pnp, &postMatches_pnp, &meanReproj_pnp)){ solvedByPnP = true; std::cout << "PnP solved: preMatches="< negligible motion and near-identical frames, skipping integration." << std::endl; } } - if (inliers >= options.min_inliers || (inliers >= 2 && matchCount > 50 && median_flow > 2.0)) { + if (inliers >= options.minInliers || (inliers >= 2 && matchCount > 50 && median_flow > 2.0)) { integrate = true; } // integrate transform if allowed if(integrate){ Mat t_d; t.convertTo(t_d, CV_64F); - Mat t_scaled = t_d * scale_m; + Mat t_scaled = t_d * scaleM; Mat R_d; R.convertTo(R_d, CV_64F); t_g = t_g + R_g * t_scaled; R_g = R_g * R_d; From 16a1ab49682dd8d43a8689ce26f7f47f8b234bd9 Mon Sep 17 00:00:00 2001 From: Xuanrui Zhu <634210954@qq.com> Date: Fri, 19 Dec 2025 18:06:59 +0000 Subject: [PATCH 19/29] fix function name issue and add parallel for --- modules/slam/src/feature.cpp | 24 +++++++++++++----------- modules/slam/src/map.cpp | 36 +++++++++++++++++++----------------- 2 files changed, 32 insertions(+), 28 deletions(-) diff --git a/modules/slam/src/feature.cpp b/modules/slam/src/feature.cpp index db33bf0527a..c4c73171366 100644 --- a/modules/slam/src/feature.cpp +++ b/modules/slam/src/feature.cpp @@ -14,7 +14,7 @@ FeatureExtractor::FeatureExtractor(int nfeatures) } // Adaptive Non-Maximal Suppression (ANMS) -static void anms(const std::vector &in, std::vector &out, int maxFeatures) +static void adaptiveNonMaximalSuppression(const std::vector &in, std::vector &out, int maxFeatures) { out.clear(); if(in.empty()) return; @@ -23,17 +23,19 @@ static void anms(const std::vector &in, std::vector &out, in // For each keypoint, find distance to the nearest keypoint with strictly higher response std::vector radius(N, std::numeric_limits::infinity()); - for(int i=0;i in[i].response){ - float dx = in[i].pt.x - in[j].pt.x; - float dy = in[i].pt.y - in[j].pt.y; - float d2 = dx*dx + dy*dy; - if(d2 < radius[i]) radius[i] = d2; + cv::parallel_for_(Range(0, N), [&](const Range &r){ + for(int i=r.start;i in[i].response){ + float dx = in[i].pt.x - in[j].pt.x; + float dy = in[i].pt.y - in[j].pt.y; + float d2 = dx*dx + dy*dy; + if(d2 < radius[i]) radius[i] = d2; + } } + // if no stronger keypoint exists, radius[i] stays INF } - // if no stronger keypoint exists, radius[i] stays INF - } + }); // Now pick top maxFeatures by radius (larger radius preferred). If radius==INF, treat as large. std::vector idx(N); @@ -69,7 +71,7 @@ void FeatureExtractor::detectAndCompute(const Mat &image, std::vector // If no previous-frame info is provided, use simple ANMS + descriptor computation if(prevGray.empty() || prevKp.empty()){ std::vector selected; - anms(candidates, selected, nfeatures_); + adaptiveNonMaximalSuppression(candidates, selected, nfeatures_); if(selected.empty()) return; orb_->compute(image, selected, desc); kps = std::move(selected); diff --git a/modules/slam/src/map.cpp b/modules/slam/src/map.cpp index 9b9ca037453..a575c6f54d0 100644 --- a/modules/slam/src/map.cpp +++ b/modules/slam/src/map.cpp @@ -75,7 +75,8 @@ std::vector MapManager::triangulateBetweenLastTwo(const std::vector(3,1) << Xx, Xy, Xz); Mat Xw = lastKf.R_w * Xc + lastKf.t_w; - MapPoint mp; mp.p = Point3d(Xw.at(0,0), Xw.at(1,0), Xw.at(2,0)); + MapPoint mp; + mp.p = Point3d(Xw.at(0,0), Xw.at(1,0), Xw.at(2,0)); // compute reprojection error in both views (pixel coords) // project into last Mat Xc_last = Xc; @@ -280,25 +281,26 @@ void MapManager::updateMapPointDescriptor(MapPoint &mp) { // Binary descriptor (ORB) int bytes = descriptors[0].cols; Mat median = Mat::zeros(1, bytes, CV_8U); - - for(int b = 0; b < bytes; ++b) { - int bitCount[8] = {0}; - for(const auto &desc : descriptors) { - uchar byte = desc.at(0, b); - for(int bit = 0; bit < 8; ++bit) { - if(byte & (1 << bit)) bitCount[bit]++; + int numDesc = static_cast(descriptors.size()); + int threshold = numDesc / 2; + + cv::parallel_for_(cv::Range(0, bytes), [&](const cv::Range &r){ + for(int b = r.start; b < r.end; ++b){ + int bitCount[8] = {0}; + for(int i = 0; i < numDesc; ++i){ + const uchar *data = descriptors[i].ptr(0); + uchar byte = data[b]; + for(int bit = 0; bit < 8; ++bit){ + if(byte & (1 << bit)) bitCount[bit]++; + } } - } - - uchar medianByte = 0; - int threshold = static_cast(descriptors.size()) / 2; - for(int bit = 0; bit < 8; ++bit) { - if(bitCount[bit] > threshold) { - medianByte |= (1 << bit); + uchar medianByte = 0; + for(int bit = 0; bit < 8; ++bit){ + if(bitCount[bit] > threshold) medianByte |= (1 << bit); } + median.at(0, b) = medianByte; } - median.at(0, b) = medianByte; - } + }); mp.descriptor = median; } else { From 78b4972e9def2e5c90f25df797bf50a11a506865 Mon Sep 17 00:00:00 2001 From: solovesonxi <2172449563@qq.com> Date: Sun, 21 Dec 2025 18:50:47 +0800 Subject: [PATCH 20/29] solve 3 review problems --- modules/slam/src/data_loader.cpp | 109 +++++-------------------------- modules/slam/src/feature.cpp | 8 ++- modules/slam/src/initializer.cpp | 7 +- 3 files changed, 24 insertions(+), 100 deletions(-) diff --git a/modules/slam/src/data_loader.cpp b/modules/slam/src/data_loader.cpp index 3210ebc9cf6..6c5814cc9d2 100644 --- a/modules/slam/src/data_loader.cpp +++ b/modules/slam/src/data_loader.cpp @@ -1,33 +1,6 @@ #include "opencv2/slam/data_loader.hpp" #include -#if defined(__has_include) -# if __has_include() -# include -# define HAVE_STD_FILESYSTEM 1 - namespace fs = std::filesystem; -# elif __has_include() -# include -# define HAVE_STD_FILESYSTEM 1 - namespace fs = std::experimental::filesystem; -# else -# define HAVE_STD_FILESYSTEM 0 -# endif -#else -# define HAVE_STD_FILESYSTEM 0 -#endif -#include -#include -#include -#include -#ifdef _WIN32 -# include -# ifndef S_ISDIR -# define S_ISDIR(mode) (((mode) & S_IFDIR) != 0) -# endif -# ifndef mkdir -# define mkdir(path, mode) _mkdir(path) -# endif -#endif +#include #include #include #include @@ -38,64 +11,27 @@ namespace vo { bool ensureDirectoryExists(const std::string &dir){ if(dir.empty()) return false; -#if HAVE_STD_FILESYSTEM - try{ - fs::path p(dir); - return fs::create_directories(p) || fs::exists(p); - }catch(...){ return false; } -#else - std::string tmp = dir; - if(tmp.empty()) return false; - while(tmp.size() > 1 && tmp.back() == '/') tmp.pop_back(); - std::string cur; - if(!tmp.empty() && tmp[0] == '/') cur = "/"; - size_t pos = 0; - while(pos < tmp.size()){ - size_t next = tmp.find('/', pos); - std::string part = (next == std::string::npos) ? tmp.substr(pos) : tmp.substr(pos, next-pos); - if(!part.empty()){ - if(cur.size() > 1 && cur.back() != '/') cur += '/'; - cur += part; - if(mkdir(cur.c_str(), 0755) != 0){ - if(errno == EEXIST){ /* ok */ } - else { - struct stat st; - if(stat(cur.c_str(), &st) == 0 && S_ISDIR(st.st_mode)){ - // ok - } else return false; - } - } - } - if(next == std::string::npos) break; - pos = next + 1; + try { + if(cv::utils::fs::exists(dir)) return cv::utils::fs::isDirectory(dir); + return cv::utils::fs::createDirectories(dir); + } catch(const std::exception &e) { + std::cerr << "ensureDirectoryExists: " << e.what() << std::endl; + return false; } - return true; -#endif } DataLoader::DataLoader(const std::string &imageDir) : currentIndex(0), fx_(700.0), fy_(700.0), cx_(0.5), cy_(0.5) { - // Check whether the directory exists. Prefer std::filesystem when available, - // otherwise fall back to POSIX stat. -#if HAVE_STD_FILESYSTEM try { - fs::path p(imageDir); - if(!fs::exists(p) || !fs::is_directory(p)){ + if(!cv::utils::fs::exists(imageDir) || !cv::utils::fs::isDirectory(imageDir)){ std::cerr << "DataLoader: imageDir does not exist or is not a directory: " << imageDir << std::endl; return; } } catch(const std::exception &e){ std::cerr << "DataLoader: filesystem error checking imageDir: " << e.what() << std::endl; - // fallthrough to try glob - } -#else - struct stat sb; - if(stat(imageDir.c_str(), &sb) != 0 || !S_ISDIR(sb.st_mode)){ - std::cerr << "DataLoader: imageDir does not exist or is not a directory: " << imageDir << std::endl; return; } -#endif // Use glob to list files, catching any possible OpenCV exceptions try { @@ -111,29 +47,14 @@ DataLoader::DataLoader(const std::string &imageDir) return; } - // Try to find sensor.yaml in imageDir or its parent directory. - // Use filesystem paths when available; otherwise build paths with simple string ops. -#if HAVE_STD_FILESYSTEM - fs::path p(imageDir); - std::string yaml1 = (p / "sensor.yaml").string(); - std::string yaml2 = (p.parent_path() / "sensor.yaml").string(); - if(!loadIntrinsics(yaml1)){ - loadIntrinsics(yaml2); // best-effort - } -#else - auto make_parent_yaml = [](const std::string &dir)->std::string{ - std::string tmp = dir; - if(!tmp.empty() && tmp.back() == '/') tmp.pop_back(); - auto pos = tmp.find_last_of('/'); - if(pos == std::string::npos) return std::string("sensor.yaml"); - return tmp.substr(0, pos) + "/sensor.yaml"; - }; - std::string yaml1 = imageDir + "/sensor.yaml"; - std::string yaml2 = make_parent_yaml(imageDir); - if(!loadIntrinsics(yaml1)){ - loadIntrinsics(yaml2); // best-effort + try { + std::string yaml1 = cv::utils::fs::join(imageDir, "sensor.yaml"); + std::string yaml2 = cv::utils::fs::join(cv::utils::fs::getParent(imageDir), "sensor.yaml"); + if(!loadIntrinsics(yaml1)){ + loadIntrinsics(yaml2); // best-effort + } + } catch(const std::exception &){ } -#endif } bool DataLoader::loadIntrinsics(const std::string &yamlPath){ diff --git a/modules/slam/src/feature.cpp b/modules/slam/src/feature.cpp index c4c73171366..35f5453eb76 100644 --- a/modules/slam/src/feature.cpp +++ b/modules/slam/src/feature.cpp @@ -90,7 +90,13 @@ void FeatureExtractor::detectAndCompute(const Mat &image, std::vector try{ calcOpticalFlowPyrLK(prevGray, image, prevPts, trackedPts, status, err, Size(21,21), 3, TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 30, 0.01), 0, 1e-4); - } catch(...) { status.clear(); } + } catch(const cv::Exception &e) { + CV_Error(cv::Error::StsError, std::string("calcOpticalFlowPyrLK failed: ") + e.what()); + } catch(const std::exception &e) { + CV_Error(cv::Error::StsError, std::string("calcOpticalFlowPyrLK failed: ") + e.what()); + } catch(...) { + CV_Error(cv::Error::StsError, "calcOpticalFlowPyrLK failed with unknown error"); + } trackedFlows.resize(prevPts.size()); for(size_t i=0;i &pts1, isTriangulated.resize(pts1.size(), false); Mat P1 = Mat::eye(3, 4, CV_64F); - Mat P2(3, 4, CV_64F); - for(int i = 0; i < 3; ++i) { - for(int j = 0; j < 3; ++j) P2.at(i,j) = R.at(i,j); - P2.at(i, 3) = t.at(i, 0); - } + Mat P2; + hconcat(R, t, P2); triangulate(P1, P2, pts1, pts2, points3D); From b458ce1155b50a7072759007ead78fc221b31437 Mon Sep 17 00:00:00 2001 From: solovesonxi <2172449563@qq.com> Date: Sun, 21 Dec 2025 20:59:41 +0800 Subject: [PATCH 21/29] Refine VO matching and BA prep; improve triangulation error logging --- modules/slam/src/map.cpp | 14 ++++++++++-- modules/slam/src/optimizer.cpp | 39 +++++++++++++++++++++------------- modules/slam/src/vo.cpp | 38 +++++++++++++-------------------- 3 files changed, 51 insertions(+), 40 deletions(-) diff --git a/modules/slam/src/map.cpp b/modules/slam/src/map.cpp index a575c6f54d0..a7a89938fe3 100644 --- a/modules/slam/src/map.cpp +++ b/modules/slam/src/map.cpp @@ -61,8 +61,18 @@ std::vector MapManager::triangulateBetweenLastTwo(const std::vector(r,3) = t_rel.at(r,0); } Mat points4D; - try{ triangulatePoints(P1, P2, pts1n, pts2n, points4D); } - catch(...) { points4D.release(); } + try { + triangulatePoints(P1, P2, pts1n, pts2n, points4D); + } catch(const cv::Exception &e) { + std::cerr << "triangulatePoints failed: " << e.what() << std::endl; + points4D.release(); + } catch(const std::exception &e) { + std::cerr << "triangulatePoints failed: " << e.what() << std::endl; + points4D.release(); + } catch(...) { + std::cerr << "triangulatePoints failed: unknown error" << std::endl; + points4D.release(); + } if(points4D.empty()) return newPoints; Mat p4d64; if(points4D.type() != CV_64F) points4D.convertTo(p4d64, CV_64F); else p4d64 = points4D; diff --git a/modules/slam/src/optimizer.cpp b/modules/slam/src/optimizer.cpp index 6d5c5e66586..85999618ec3 100644 --- a/modules/slam/src/optimizer.cpp +++ b/modules/slam/src/optimizer.cpp @@ -242,26 +242,35 @@ void Optimizer::localBundleAdjustmentSFM( } // --- Pose-only optimization (points fixed) --- - // For each local keyframe that is not fixed, call optimizePose + // Precompute keyframe->mappoint matches once to avoid nested loops per keyframe + std::vector> matchedPerKf(keyframes.size()); + std::vector kfNeeded(keyframes.size(), 0); + for (int kfId : localKfIndices) { + if (kfId < 0 || kfId >= static_cast(keyframes.size())) continue; + kfNeeded[kfId] = 1; + matchedPerKf[kfId].assign(keyframes[kfId].kps.size(), -1); + } + for (size_t mpIdx = 0; mpIdx < mappoints.size(); ++mpIdx) { + const MapPoint &mp = mappoints[mpIdx]; + if (mp.isBad) continue; + for (const auto &obs : mp.observations) { + int kfId = obs.first; + int kpIdx = obs.second; + if (kfId < 0 || kfId >= static_cast(keyframes.size())) continue; + if (!kfNeeded[kfId]) continue; + auto &matched = matchedPerKf[kfId]; + if (kpIdx >= 0 && kpIdx < static_cast(matched.size())) + matched[kpIdx] = static_cast(mpIdx); + } + } + + // For each local keyframe that is not fixed, call optimizePose using precomputed matches for (int kfId : localKfIndices) { if (fixedSet.find(kfId) != fixedSet.end()) continue; if (kfId < 0 || kfId >= static_cast(keyframes.size())) continue; KeyFrame &kf = keyframes[kfId]; - // Build matchedMpIndices: for each keypoint in this KF, which mappoint index it corresponds to - std::vector matchedMpIndices(kf.kps.size(), -1); - for (size_t mpIdx = 0; mpIdx < mappoints.size(); ++mpIdx) { - const MapPoint &mp = mappoints[mpIdx]; - if (mp.isBad) continue; - for (const auto &obs : mp.observations) { - if (obs.first == kfId) { - int kpIdx = obs.second; - if (kpIdx >= 0 && kpIdx < static_cast(matchedMpIndices.size())) - matchedMpIndices[kpIdx] = static_cast(mpIdx); - } - } - } + std::vector &matchedMpIndices = matchedPerKf[kfId]; std::vector inliers; - // Use the same number of iterations as outer loop for RANSAC attempts optimizePose(kf, mappoints, matchedMpIndices, fx, fy, cx, cy, inliers, std::max(20, iterations)); } } diff --git a/modules/slam/src/vo.cpp b/modules/slam/src/vo.cpp index 5314aba71c3..60fd54e1507 100644 --- a/modules/slam/src/vo.cpp +++ b/modules/slam/src/vo.cpp @@ -171,30 +171,22 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual std::vector> knn12, knn21; matcher_->knnMatch(prevDesc, desc, knn12, 2); matcher_->knnMatch(desc, prevDesc, knn21, 2); - auto ratioKeep = [&](const std::vector>& knn, bool forward) { - std::vector filtered; - for(size_t qi=0; qi= 2){ - if(knn[qi][1].distance > 0) { - if(best.distance / knn[qi][1].distance > ratio) continue; - } - } - // mutual check - int t = forward ? best.trainIdx : (int)qi; - // find reverse match for t - const auto &rev = forward ? knn21 : knn12; - if(t < 0 || t >= (int)rev.size() || rev[t].empty()) continue; - DMatch rbest = rev[t][0]; - if((forward && rbest.trainIdx == (int)qi) || (!forward && rbest.trainIdx == best.queryIdx)){ - filtered.push_back(best); - } + // ratio test + mutual check (prev->curr) + std::vector goodMatches; + goodMatches.reserve(knn12.size()); + const float ratio = 0.75f; + for(size_t qi = 0; qi < knn12.size(); ++qi){ + if(knn12[qi].empty()) continue; + const DMatch &best = knn12[qi][0]; + if(knn12[qi].size() >= 2){ + const DMatch &second = knn12[qi][1]; + if(second.distance > 0.0f && best.distance / second.distance > ratio) continue; } - return filtered; - }; - std::vector goodMatches = ratioKeep(knn12, true); + int t = best.trainIdx; + if(t < 0 || t >= (int)knn21.size() || knn21[t].empty()) continue; + const DMatch &rbest = knn21[t][0]; + if(rbest.trainIdx == (int)qi) goodMatches.push_back(best); + } Mat imgMatches; drawMatches(prevGray, prevKp, gray, kps, goodMatches, imgMatches, From 0be874ef90555f61122cc62970ddfcce74cb03a3 Mon Sep 17 00:00:00 2001 From: solovesonxi <2172449563@qq.com> Date: Sun, 21 Dec 2025 23:37:44 +0800 Subject: [PATCH 22/29] Made backend optional and configurable while wiring VO thresholds to user options. --- modules/slam/include/opencv2/slam/vo.hpp | 4 + modules/slam/src/vo.cpp | 123 ++++++++++++----------- 2 files changed, 69 insertions(+), 58 deletions(-) diff --git a/modules/slam/include/opencv2/slam/vo.hpp b/modules/slam/include/opencv2/slam/vo.hpp index 515b7b21b0f..b72b4416034 100644 --- a/modules/slam/include/opencv2/slam/vo.hpp +++ b/modules/slam/include/opencv2/slam/vo.hpp @@ -17,6 +17,10 @@ struct CV_EXPORTS_W_SIMPLE VisualOdometryOptions { CV_PROP_RW double minRotationRad = 0.5 * CV_PI / 180.0; CV_PROP_RW int maxMatchesKeep = 500; CV_PROP_RW double flowWeightLambda = 5.0; + // Backend/BA controls + CV_PROP_RW bool enableBackend = true; + CV_PROP_RW int backendWindow = 5; // number of latest keyframes for local BA + CV_PROP_RW int backendIterations = 10; // BA iterations (if backend enabled) }; class CV_EXPORTS_W VisualOdometry { diff --git a/modules/slam/src/vo.cpp b/modules/slam/src/vo.cpp index 60fd54e1507..afa8252a194 100644 --- a/modules/slam/src/vo.cpp +++ b/modules/slam/src/vo.cpp @@ -96,56 +96,59 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual std::condition_variable backendCv; std::atomic backendStop(false); std::atomic backendRequests(0); - const int LOCAL_BA_WINDOW = 5; // window size for local BA (adjustable) - - // Start backend thread: waits for notifications and runs BA on a snapshot - std::thread backendThread([&]() { - while(!backendStop.load()){ - std::unique_lock lk(mapMutex); - backendCv.wait(lk, [&]{ return backendStop.load() || backendRequests.load() > 0; }); - if(backendStop.load()) break; - // snapshot map and keyframes - auto kfs_snapshot = map.keyframes(); - auto mps_snapshot = map.mappoints(); - // reset requests - backendRequests.store(0); - lk.unlock(); - - // determine local window - int K = static_cast(kfs_snapshot.size()); - if(K <= 0) continue; - int start = std::max(0, K - LOCAL_BA_WINDOW); - std::vector localKfIndices; - for(int ii = start; ii < K; ++ii) localKfIndices.push_back(ii); - std::vector fixedKfIndices; - if(start > 0) fixedKfIndices.push_back(0); - #if defined(HAVE_SFM) - // Run BA on snapshot (may take time) - uses Optimizer which will use g2o if enabled - Optimizer::localBundleAdjustmentSFM(kfs_snapshot, mps_snapshot, localKfIndices, fixedKfIndices, - loader.fx(), loader.fy(), loader.cx(), loader.cy(), 10); - #endif - // write back optimized poses/points into main map under lock using id-based lookup - std::lock_guard lk2(mapMutex); - auto &kfs_ref = const_cast&>(map.keyframes()); - auto &mps_ref = const_cast&>(map.mappoints()); - // copy back poses by id to ensure we update the authoritative containers - for(const auto &kf_opt : kfs_snapshot){ - int idx = map.keyframeIndex(kf_opt.id); - if(idx >= 0 && idx < static_cast(kfs_ref.size())){ - kfs_ref[idx].R_w = kf_opt.R_w.clone(); - kfs_ref[idx].t_w = kf_opt.t_w.clone(); + const int LOCAL_BA_WINDOW = std::max(1, options.backendWindow); + + // Start backend thread only if enabled + std::thread backendThread; + if(options.enableBackend){ + backendThread = std::thread([&]() { + while(!backendStop.load()){ + std::unique_lock lk(mapMutex); + backendCv.wait(lk, [&]{ return backendStop.load() || backendRequests.load() > 0; }); + if(backendStop.load()) break; + // snapshot map and keyframes + auto kfs_snapshot = map.keyframes(); + auto mps_snapshot = map.mappoints(); + // reset requests + backendRequests.store(0); + lk.unlock(); + + // determine local window + int K = static_cast(kfs_snapshot.size()); + if(K <= 0) continue; + int start = std::max(0, K - LOCAL_BA_WINDOW); + std::vector localKfIndices; + for(int ii = start; ii < K; ++ii) localKfIndices.push_back(ii); + std::vector fixedKfIndices; + if(start > 0) fixedKfIndices.push_back(0); + #if defined(HAVE_SFM) + // Run BA on snapshot (may take time) - uses Optimizer which will use g2o if enabled + Optimizer::localBundleAdjustmentSFM(kfs_snapshot, mps_snapshot, localKfIndices, fixedKfIndices, + loader.fx(), loader.fy(), loader.cx(), loader.cy(), options.backendIterations); + #endif + // write back optimized poses/points into main map under lock using id-based lookup + std::lock_guard lk2(mapMutex); + auto &kfs_ref = const_cast&>(map.keyframes()); + auto &mps_ref = const_cast&>(map.mappoints()); + // copy back poses by id to ensure we update the authoritative containers + for(const auto &kf_opt : kfs_snapshot){ + int idx = map.keyframeIndex(kf_opt.id); + if(idx >= 0 && idx < static_cast(kfs_ref.size())){ + kfs_ref[idx].R_w = kf_opt.R_w.clone(); + kfs_ref[idx].t_w = kf_opt.t_w.clone(); + } } - } - // copy back mappoint positions by id - for(const auto &mp_opt : mps_snapshot){ - if(mp_opt.id <= 0) continue; - int idx = map.mapPointIndex(mp_opt.id); - if(idx >= 0 && idx < static_cast(mps_ref.size())){ - mps_ref[idx].p = mp_opt.p; + // copy back mappoint positions by id + for(const auto &mp_opt : mps_snapshot){ + if(mp_opt.id <= 0) continue; + int idx = map.mapPointIndex(mp_opt.id); + if(idx >= 0 && idx < static_cast(mps_ref.size())){ + mps_ref[idx].p = mp_opt.p; + } } } - } - }); + }); + } Mat frame; std::string imgPath; @@ -324,8 +327,8 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual double inlierRatio = matchCount > 0 ? double(inliers) / double(matchCount) : 0.0; // thresholds (tunable) -- relaxed and add absolute inlier guard - const int MIN_MATCHES = 15; // require at least this many matches (relative) - const int MIN_INLIERS = 4; // OR accept if at least this many absolute inliers + const int MIN_MATCHES = options.minMatches; // require at least this many matches (relative) + const int MIN_INLIERS = options.minInliers; // OR accept if at least this many absolute inliers double t_norm = 0.0, rot_angle = 0.0; if(ok){ Mat t_d; t.convertTo(t_d, CV_64F); @@ -355,10 +358,10 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual } else { // We have sufficient geometric support. Only skip if motion is truly negligible // (both translation and rotation tiny) AND the image/flow indicate near-identical frames. - const double MIN_TRANSLATION_NORM = 1e-4; - const double MIN_ROTATION_RAD = (0.5 * CV_PI / 180.0); // 0.5 degree - const double DIFF_ZERO_THRESH = 2.0; // nearly identical image - const double FLOW_ZERO_THRESH = 0.3; // nearly zero flow in pixels + const double MIN_TRANSLATION_NORM = options.minTranslationNorm; + const double MIN_ROTATION_RAD = options.minRotationRad; // 0.5 degree by default + const double DIFF_ZERO_THRESH = options.diffZeroThresh; // nearly identical image + const double FLOW_ZERO_THRESH = options.flowZeroThresh; // nearly zero flow in pixels if(t_norm < MIN_TRANSLATION_NORM && std::abs(rot_angle) < MIN_ROTATION_RAD && meanDiff < DIFF_ZERO_THRESH && median_flow < FLOW_ZERO_THRESH){ @@ -429,8 +432,10 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual std::cout << "Created keyframe " << frame_id << " (no triangulation)" << std::endl; } // Notify backend thread to run local BA asynchronously - backendRequests.fetch_add(1); - backendCv.notify_one(); + if(options.enableBackend){ + backendRequests.fetch_add(1); + backendCv.notify_one(); + } } // write CSV line @@ -498,9 +503,11 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual } // Shutdown backend thread gracefully - backendStop.store(true); - backendCv.notify_one(); - if(backendThread.joinable()) backendThread.join(); + if(options.enableBackend){ + backendStop.store(true); + backendCv.notify_one(); + if(backendThread.joinable()) backendThread.join(); + } return 0; } From ad9d27ce823c6381f461159be0f69f5d13496df1 Mon Sep 17 00:00:00 2001 From: solovesonxi <2172449563@qq.com> Date: Mon, 22 Dec 2025 13:34:20 +0800 Subject: [PATCH 23/29] Add backend setters, map maintenance, and guarded Ceres SFM BA fallback. --- modules/slam/include/opencv2/slam/vo.hpp | 14 ++- modules/slam/samples/run_vo_sample.cpp | 30 ++++++ modules/slam/src/optimizer.cpp | 125 +++++++++++++++++++++-- modules/slam/src/vo.cpp | 27 +++++ 4 files changed, 186 insertions(+), 10 deletions(-) create mode 100644 modules/slam/samples/run_vo_sample.cpp diff --git a/modules/slam/include/opencv2/slam/vo.hpp b/modules/slam/include/opencv2/slam/vo.hpp index b72b4416034..19d799f1190 100644 --- a/modules/slam/include/opencv2/slam/vo.hpp +++ b/modules/slam/include/opencv2/slam/vo.hpp @@ -21,15 +21,27 @@ struct CV_EXPORTS_W_SIMPLE VisualOdometryOptions { CV_PROP_RW bool enableBackend = true; CV_PROP_RW int backendWindow = 5; // number of latest keyframes for local BA CV_PROP_RW int backendIterations = 10; // BA iterations (if backend enabled) + // Map maintenance controls + CV_PROP_RW bool enableMapMaintenance = true; + CV_PROP_RW int maintenanceInterval = 5; // run cull/descriptor refresh every N keyframes }; class CV_EXPORTS_W VisualOdometry { public: CV_WRAP VisualOdometry(cv::Ptr feature, cv::Ptr matcher); - CV_WRAP int run(const std::string &imageDir, double scale_m = 1.0, const VisualOdometryOptions &options = VisualOdometryOptions()); + // Run with explicit options (keeps compatibility with callers who pass a full options struct) + CV_WRAP int run(const std::string &imageDir, double scale_m, const VisualOdometryOptions &options); + // Run using the internally stored options (affected by setters such as setEnableBackend) + CV_WRAP int run(const std::string &imageDir, double scale_m = 1.0); + + // Convenience setters for backend controls + CV_WRAP void setEnableBackend(bool enable); + CV_WRAP void setBackendWindow(int window); + CV_WRAP void setBackendIterations(int iterations); private: cv::Ptr feature_; cv::Ptr matcher_; + VisualOdometryOptions options_{}; // stored defaults for setter-based configuration }; } diff --git a/modules/slam/samples/run_vo_sample.cpp b/modules/slam/samples/run_vo_sample.cpp new file mode 100644 index 00000000000..f6df4e64279 --- /dev/null +++ b/modules/slam/samples/run_vo_sample.cpp @@ -0,0 +1,30 @@ +// Minimal wrapper to exercise cv::vo::VisualOdometry on a folder of images (e.g., EuRoC MH01). +#include +#include +#include +#include +#include + +int main(int argc, char** argv) { + if(argc < 2){ + std::cout << "Usage: " << argv[0] << " [image_dir] [scale_m=0.02]\n" + << "Example (EuRoC): " << argv[0] << " /datasets/EuRoC/MH01/mav0/cam0/data 0.02" << std::endl; + return 0; + } + + std::string imgDir = argv[1]; + double scale_m = (argc >= 3) ? std::atof(argv[2]) : 0.02; + + cv::Ptr feature = cv::ORB::create(2000); + cv::Ptr matcher = cv::DescriptorMatcher::create(cv::DescriptorMatcher::BRUTEFORCE_HAMMING); + + cv::vo::VisualOdometry vo(feature, matcher); + cv::vo::VisualOdometryOptions options; + // Configure options if desired, e.g. disable backend or tweak thresholds + // options.enableBackend = false; // purely front-end VO + + std::cout << "Running OpenCV VisualOdometry on " << imgDir << std::endl; + int ret = vo.run(imgDir, scale_m, options); + std::cout << "VisualOdometry finished with code " << ret << std::endl; + return ret; +} diff --git a/modules/slam/src/optimizer.cpp b/modules/slam/src/optimizer.cpp index 85999618ec3..671d2a98ba1 100644 --- a/modules/slam/src/optimizer.cpp +++ b/modules/slam/src/optimizer.cpp @@ -21,6 +21,7 @@ #include #elif defined(HAVE_SFM) #include "opencv2/sfm.hpp" +#include #endif #include @@ -154,15 +155,121 @@ void Optimizer::localBundleAdjustmentSFM( const std::vector &fixedKfIndices, double fx, double fy, double cx, double cy, int iterations) { - // Simple SFM-based local bundle adjustment using OpenCV routines. - // This implementation alternates: - // - point-only Gauss-Newton updates (poses kept fixed) - // - pose-only optimization using PnP (points kept fixed) - // It uses available jacobians w.r.t. point coordinates from - // computeReprojectionError and the existing optimizePose() helper - // to avoid depending on g2o / ceres. This provides an effective - // local refinement for SLAM windows when full BA backends are - // not available. + // First attempt a Ceres-based BA through cv::sfm::bundleAdjust (if available). + // If it throws or cannot form a valid problem, fall back to the local + // point/pose alternating optimizer below. + +#if defined(__has_include) +#if __has_include() + if (localKfIndices.size() >= 2) { + try { + // Build mapping from keyframe id to vector index for obs lookup + std::unordered_map id2idx; + for (size_t i = 0; i < keyframes.size(); ++i) { + id2idx[keyframes[i].id] = static_cast(i); + } + + // Collect valid local views (indices into keyframes vector) + std::vector views; + views.reserve(localKfIndices.size()); + for (int idx : localKfIndices) { + if (idx >= 0 && idx < static_cast(keyframes.size())) views.push_back(idx); + } + + const size_t V = views.size(); + if (V >= 2) { + // Per-view columns storage (filled with NaN if missing) + std::vector> cols(V); + std::vector mpKeepIndices; + std::vector pts3d; + + const double NaN = std::numeric_limits::quiet_NaN(); + + for (size_t mpi = 0; mpi < mappoints.size(); ++mpi) { + const MapPoint &mp = mappoints[mpi]; + if (mp.isBad) continue; + + // Collect obs in local views + std::vector perView(V, Point2d(NaN, NaN)); + int obsInWindow = 0; + for (const auto &obs : mp.observations) { + auto it = id2idx.find(obs.first); + if (it == id2idx.end()) continue; + int kfVecIdx = it->second; + // only consider if this keyframe is in our local view list + for (size_t vi = 0; vi < V; ++vi) { + if (views[vi] == kfVecIdx) { + const KeyFrame &kf = keyframes[kfVecIdx]; + int kpIdx = obs.second; + if (kpIdx >=0 && kpIdx < static_cast(kf.kps.size())) { + perView[vi] = kf.kps[kpIdx].pt; + obsInWindow++; + } + break; + } + } + } + if (obsInWindow < 2) continue; // need at least two obs in window + + mpKeepIndices.push_back(static_cast(mpi)); + pts3d.push_back(mp.p); + for (size_t vi = 0; vi < V; ++vi) { + cols[vi].push_back(perView[vi]); + } + } + + const int N = static_cast(pts3d.size()); + if (N > 0) { + std::vector points2dMats(V); + for (size_t vi = 0; vi < V; ++vi) { + points2dMats[vi] = Mat(2, N, CV_64F); + for (int j = 0; j < N; ++j) { + points2dMats[vi].at(0, j) = cols[vi][j].x; + points2dMats[vi].at(1, j) = cols[vi][j].y; + } + } + + Mat points3dMat(3, N, CV_64F); + for (int j = 0; j < N; ++j) { + points3dMat.at(0, j) = pts3d[j].x; + points3dMat.at(1, j) = pts3d[j].y; + points3dMat.at(2, j) = pts3d[j].z; + } + + Mat K = (Mat_(3,3) << fx, 0, cx, 0, fy, cy, 0, 0, 1); + + std::vector Rs, Ts; + Rs.reserve(V); Ts.reserve(V); + for (size_t vi = 0; vi < V; ++vi) { + Rs.push_back(keyframes[views[vi]].R_w.clone()); + Ts.push_back(keyframes[views[vi]].t_w.clone()); + } + + cv::sfm::bundleAdjust(points2dMats, K, Rs, Ts, points3dMat); + + // Write back optimized poses (only for the views we optimized) + for (size_t vi = 0; vi < V; ++vi) { + keyframes[views[vi]].R_w = Rs[vi].clone(); + keyframes[views[vi]].t_w = Ts[vi].clone(); + } + // Write back optimized points + for (int j = 0; j < N; ++j) { + int idx = mpKeepIndices[j]; + mappoints[idx].p.x = points3dMat.at(0, j); + mappoints[idx].p.y = points3dMat.at(1, j); + mappoints[idx].p.z = points3dMat.at(2, j); + } + return; // success, skip fallback optimizer + } + } + } catch (const std::exception &){ + // fall through to fallback optimizer below + } + } +#endif +#endif + + // Fallback: simple alternating point/pose optimizer (non-Ceres) if (localKfIndices.empty() || mappoints.empty()) return; diff --git a/modules/slam/src/vo.cpp b/modules/slam/src/vo.cpp index afa8252a194..a2032002140 100644 --- a/modules/slam/src/vo.cpp +++ b/modules/slam/src/vo.cpp @@ -32,6 +32,22 @@ VisualOdometry::VisualOdometry(Ptr feature, Ptr ma : feature_(std::move(feature)), matcher_(std::move(matcher)) { } +int VisualOdometry::run(const std::string &imageDir, double scaleM){ + return run(imageDir, scaleM, options_); +} + +void VisualOdometry::setEnableBackend(bool enable){ + options_.enableBackend = enable; +} + +void VisualOdometry::setBackendWindow(int window){ + options_.backendWindow = std::max(1, window); +} + +void VisualOdometry::setBackendIterations(int iterations){ + options_.backendIterations = std::max(1, iterations); +} + int VisualOdometry::run(const std::string &imageDir, double scaleM, const VisualOdometryOptions &options){ DataLoader loader(imageDir); std::cout << "VisualOdometry: loaded " << loader.size() << " images from " << imageDir << std::endl; @@ -425,6 +441,17 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual std::lock_guard lk(mapMutex); keyframes.push_back(std::move(kf)); map.addKeyFrame(keyframes.back()); + if(options.enableMapMaintenance){ + const int interval = std::max(1, options.maintenanceInterval); + if(static_cast(map.keyframes().size()) % interval == 0){ + map.cullBadMapPoints(); + auto &mps = map.mappointsMutable(); + for(auto &mp : mps){ + if(mp.isBad) continue; + map.updateMapPointDescriptor(mp); + } + } + } } if(didTriangulate){ std::cout << "Created keyframe " << frame_id << " and triangulated new map points (total=" << map.mappoints().size() << ")" << std::endl; From 22c26d72bbf3b8b315e718165c60f89f31c9a346 Mon Sep 17 00:00:00 2001 From: solovesonxi <2172449563@qq.com> Date: Tue, 23 Dec 2025 19:34:08 +0800 Subject: [PATCH 24/29] clean unused members --- modules/slam/include/opencv2/slam/visualizer.hpp | 3 --- modules/slam/src/visualizer.cpp | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/modules/slam/include/opencv2/slam/visualizer.hpp b/modules/slam/include/opencv2/slam/visualizer.hpp index 22ffe94fd36..4938505deeb 100644 --- a/modules/slam/include/opencv2/slam/visualizer.hpp +++ b/modules/slam/include/opencv2/slam/visualizer.hpp @@ -1,7 +1,6 @@ #pragma once #include #include -#include "opencv2/slam/pose.hpp" #include "opencv2/slam/feature.hpp" #include "opencv2/slam/matcher.hpp" @@ -16,7 +15,6 @@ class Tracker { private: FeatureExtractor feat_; Matcher matcher_; - PoseEstimator poseEst_; Mat prevGray_, prevDesc_; std::vector prevKp_; @@ -35,7 +33,6 @@ class Visualizer { // 保存最终轨迹图像到文件 bool saveTrajectory(const std::string &path); private: - int W_, H_; double meters_per_pixel_; Size mapSize_; Mat map_; diff --git a/modules/slam/src/visualizer.cpp b/modules/slam/src/visualizer.cpp index 30b65cdbb98..5da15843824 100644 --- a/modules/slam/src/visualizer.cpp +++ b/modules/slam/src/visualizer.cpp @@ -7,7 +7,7 @@ namespace cv { namespace vo { Tracker::Tracker() - : feat_(), matcher_(), poseEst_(), frame_id_(0) + : feat_(), matcher_(), frame_id_(0) { } @@ -61,7 +61,7 @@ bool Tracker::processFrame(const Mat &gray, const std::string & /*imagePath*/, M } Visualizer::Visualizer(int W, int H, double meters_per_pixel) - : W_(W), H_(H), meters_per_pixel_(meters_per_pixel), mapSize_(W,H) + : meters_per_pixel_(meters_per_pixel), mapSize_(W,H) { map_ = Mat::zeros(mapSize_, CV_8UC3); } From 70bbdc61c1a14ebf2b8196ad3f64a8819b1432a2 Mon Sep 17 00:00:00 2001 From: solovesonxi <2172449563@qq.com> Date: Wed, 31 Dec 2025 14:45:19 +0800 Subject: [PATCH 25/29] Update trajectory/keyframes with either geometric or PnP pose, remove CSV and unused variables, and fix warnings. --- .../slam/include/opencv2/slam/keyframe.hpp | 3 + .../slam/include/opencv2/slam/optimizer.hpp | 2 +- modules/slam/src/map.cpp | 11 +- modules/slam/src/optimizer.cpp | 2 +- modules/slam/src/vo.cpp | 271 +++++++----------- 5 files changed, 113 insertions(+), 176 deletions(-) diff --git a/modules/slam/include/opencv2/slam/keyframe.hpp b/modules/slam/include/opencv2/slam/keyframe.hpp index ce5d3a6b069..eb91bfd705f 100644 --- a/modules/slam/include/opencv2/slam/keyframe.hpp +++ b/modules/slam/include/opencv2/slam/keyframe.hpp @@ -7,6 +7,9 @@ namespace cv { namespace vo { struct KeyFrame { + // Custom constructor for convenient initialization + KeyFrame(int id_, const Mat& image_, const std::vector& kps_, const Mat& desc_, const Mat& R_w_, const Mat& t_w_) + : id(id_), image(image_.clone()), kps(kps_), desc(desc_.clone()), R_w(R_w_.clone()), t_w(t_w_.clone()) {} int id = -1; Mat image; // optional std::vector kps; diff --git a/modules/slam/include/opencv2/slam/optimizer.hpp b/modules/slam/include/opencv2/slam/optimizer.hpp index 1f3d5425e70..0576c7b59ac 100644 --- a/modules/slam/include/opencv2/slam/optimizer.hpp +++ b/modules/slam/include/opencv2/slam/optimizer.hpp @@ -22,7 +22,7 @@ class Optimizer { std::vector &mappoints, const std::vector &localKfIndices, const std::vector &fixedKfIndices, - double fx, double fy, double cx, double cy, + double fx, double cx, double cy, int iterations = 10); #endif diff --git a/modules/slam/src/map.cpp b/modules/slam/src/map.cpp index a7a89938fe3..83e538fcd5b 100644 --- a/modules/slam/src/map.cpp +++ b/modules/slam/src/map.cpp @@ -135,13 +135,14 @@ std::vector MapManager::triangulateBetweenLastTwo(const std::vector(pts2_kp_idx.size())) ? pts2_kp_idx[c] : -1; if(kp1idx >= 0) mp.observations.emplace_back(lastKf.id, kp1idx); if(kp2idx >= 0) mp.observations.emplace_back(curKf.id, kp2idx); - // assign id - mp.id = next_mappoint_id_++; - mpid2idx_[mp.id] = static_cast(mappoints_.size()) + static_cast(newPoints.size()); + // DO NOT assign global id or insert into internal containers here. + // Return newly triangulated points to caller and let the caller add them + // into the MapManager via `addMapPoints` so keyframes are present first. newPoints.push_back(mp); } - // append to internal map - for(const auto &p: newPoints) mappoints_.push_back(p); + // Note: do NOT append to internal map here. Caller should add returned points + // via MapManager::addMapPoints after ensuring the corresponding keyframe + // has been inserted into the map (avoids transient observations to missing KFs). return newPoints; } diff --git a/modules/slam/src/optimizer.cpp b/modules/slam/src/optimizer.cpp index 671d2a98ba1..b1fe82bd1b5 100644 --- a/modules/slam/src/optimizer.cpp +++ b/modules/slam/src/optimizer.cpp @@ -37,7 +37,7 @@ void Optimizer::localBundleAdjustmentG2O( std::vector &mappoints, const std::vector &localKfIndices, const std::vector &fixedKfIndices, - double fx, double fy, double cx, double cy, + double fx, double cx, double cy, int iterations) { if(localKfIndices.empty()) return; diff --git a/modules/slam/src/vo.cpp b/modules/slam/src/vo.cpp index a2032002140..890348f381f 100644 --- a/modules/slam/src/vo.cpp +++ b/modules/slam/src/vo.cpp @@ -71,33 +71,20 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual // configure Localizer with a slightly stricter Lowe ratio (0.7) Localizer localizer(0.7f); - // prepare per-run CSV diagnostics + // prepare per-run diagnostics folder std::string runTimestamp; - { - auto now = std::chrono::system_clock::now(); - std::time_t t = std::chrono::system_clock::to_time_t(now); - std::tm tm = *std::localtime(&t); - std::ostringstream ss; ss << std::put_time(&tm, "%Y%m%d_%H%M%S"); - runTimestamp = ss.str(); - } - // create a 'result' folder inside the provided imageDir (portable, avoids requiring ) + auto now = std::chrono::system_clock::now(); + std::time_t t = std::chrono::system_clock::to_time_t(now); + std::tm tm = *std::localtime(&t); + std::ostringstream ss; ss << std::put_time(&tm, "%Y%m%d_%H%M%S"); + runTimestamp = ss.str(); std::string resultDirStr = imageDir; if(resultDirStr.empty()) resultDirStr = std::string("."); if(resultDirStr.back() == '/') resultDirStr.pop_back(); resultDirStr += "/result"; ensureDirectoryExists(resultDirStr); - // create a per-run folder under result/ named by timestamp std::string runDirStr = resultDirStr + "/" + runTimestamp; ensureDirectoryExists(runDirStr); - std::string csvPath = runDirStr + "/run.csv"; - std::ofstream csv(csvPath); - if(csv){ - csv << "frame_id,mean_diff,median_flow,pre_matches,post_matches,inliers,inlier_ratio,integrated\n"; - csv.flush(); - std::cout << "Writing diagnostics to " << csvPath << std::endl; - } else { - std::cerr << "Failed to open diagnostics CSV " << csvPath << std::endl; - } Mat R_g = Mat::eye(3,3,CV_64F); Mat t_g = Mat::zeros(3,1,CV_64F); @@ -201,9 +188,9 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual const DMatch &second = knn12[qi][1]; if(second.distance > 0.0f && best.distance / second.distance > ratio) continue; } - int t = best.trainIdx; - if(t < 0 || t >= (int)knn21.size() || knn21[t].empty()) continue; - const DMatch &rbest = knn21[t][0]; + int trainIdx = best.trainIdx; + if(trainIdx < 0 || trainIdx >= (int)knn21.size() || knn21[trainIdx].empty()) continue; + const DMatch &rbest = knn21[trainIdx][0]; if(rbest.trainIdx == (int)qi) goodMatches.push_back(best); } @@ -253,25 +240,13 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual if(initializer.initialize(prevKp, kps, goodMatches, loader.fx(), loader.fy(), loader.cx(), loader.cy(), R_init, t_init, pts3D, isTri)){ std::cout << "Initializer: success, creating initial keyframes and mappoints (" << pts3D.size() << ")" << std::endl; // build two keyframes: previous (id = frame_id-1) and current (id = frame_id) - KeyFrame kf0, kf1; - kf0.id = frame_id - 1; - // prevGray/prevKp/prevDesc refer to previous frame + Mat prevImg; if(!prevGray.empty()){ - if(prevGray.channels() == 1){ cvtColor(prevGray, kf0.image, COLOR_GRAY2BGR); } - else kf0.image = prevGray.clone(); + if(prevGray.channels() == 1){ cvtColor(prevGray, prevImg, COLOR_GRAY2BGR); } + else prevImg = prevGray.clone(); } - kf0.kps = prevKp; - kf0.desc = prevDesc.clone(); - kf0.R_w = Mat::eye(3,3,CV_64F); - kf0.t_w = Mat::zeros(3,1,CV_64F); - - kf1.id = frame_id; - kf1.image = frame.clone(); - kf1.kps = kps; - kf1.desc = desc.clone(); - // initializer returns pose of second camera relative to first (world = first) - kf1.R_w = R_init.clone(); - kf1.t_w = t_init.clone(); + KeyFrame kf0(frame_id - 1, prevImg, prevKp, prevDesc, Mat::eye(3,3,CV_64F), Mat::zeros(3,1,CV_64F)); + KeyFrame kf1(frame_id, frame, kps, desc, R_init, t_init); // convert initializer 3D points (in first camera frame) to MapPoints in world coords (world==first) std::vector newMps; @@ -306,12 +281,6 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual double z = t_g.at(2); vis.addPose(x,-z); - // write CSV entry for initialization frame - if(csv){ - csv << frame_id << ",init,0,0,0,0,0,1\n"; - csv.flush(); - } - // notify backend to run BA on initial map backendRequests.fetch_add(1); backendCv.notify_one(); @@ -324,95 +293,78 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual } } - // Try PnP against map points first (via Localizer) - bool solvedByPnP = false; + // 变量提前声明,保证后续可用 + Mat R_est, t_est, mask_est; + int inliers_est = 0; + bool ok_est = false; + bool ok_pnp = false; Mat R_pnp, t_pnp; int inliers_pnp = 0; int preMatches_pnp = 0, postMatches_pnp = 0; double meanReproj_pnp = 0.0; - if(localizer.tryPnP(map, desc, kps, loader.fx(), loader.fy(), loader.cx(), loader.cy(), gray.cols, gray.rows, + // 统一输出变量 + Mat R_use, t_use, mask_use; + int inliers = 0; + int matchCount = post_matches; + bool integrate = false; + + if(pts1.size() >= 8){ + ok_est = poseEst.estimate(pts1, pts2, loader.fx(), loader.fy(), loader.cx(), loader.cy(), R_est, t_est, mask_est, inliers_est); + } + if(!ok_est){ + ok_pnp = localizer.tryPnP(map, desc, kps, loader.fx(), loader.fy(), loader.cx(), loader.cy(), gray.cols, gray.rows, options.minInliers, R_pnp, t_pnp, inliers_pnp, frame_id, &frame, runDirStr, - &preMatches_pnp, &postMatches_pnp, &meanReproj_pnp)){ - solvedByPnP = true; - std::cout << "PnP solved: preMatches="< pose estimation failed, skipping integration." << std::endl; - } else if(inliers < MIN_INLIERS || matchCount < MIN_MATCHES){ - // Not enough geometric support -> skip (unless absolute inliers pass) - integrate = false; - // std::cout << " -> insufficient matches/inliers (by both absolute and relative metrics), skipping integration." << std::endl; - } else { - // We have sufficient geometric support. Only skip if motion is truly negligible - // (both translation and rotation tiny) AND the image/flow indicate near-identical frames. - const double MIN_TRANSLATION_NORM = options.minTranslationNorm; - const double MIN_ROTATION_RAD = options.minRotationRad; // 0.5 degree by default - const double DIFF_ZERO_THRESH = options.diffZeroThresh; // nearly identical image - const double FLOW_ZERO_THRESH = options.flowZeroThresh; // nearly zero flow in pixels - - if(t_norm < MIN_TRANSLATION_NORM && std::abs(rot_angle) < MIN_ROTATION_RAD - && meanDiff < DIFF_ZERO_THRESH && median_flow < FLOW_ZERO_THRESH){ - integrate = false; // truly static - // std::cout << " -> negligible motion and near-identical frames, skipping integration." << std::endl; + if(t_norm < options.minTranslationNorm && std::abs(rot_angle) < options.minRotationRad + && meanDiff < options.diffZeroThresh && median_flow < options.flowZeroThresh){ + integrate = false; } } if (inliers >= options.minInliers || (inliers >= 2 && matchCount > 50 && median_flow > 2.0)) { integrate = true; } - // integrate transform if allowed if(integrate){ - Mat t_d; t.convertTo(t_d, CV_64F); + // update global pose + Mat t_d; t_use.convertTo(t_d, CV_64F); Mat t_scaled = t_d * scaleM; - Mat R_d; R.convertTo(R_d, CV_64F); + Mat R_d; R_use.convertTo(R_d, CV_64F); t_g = t_g + R_g * t_scaled; R_g = R_g * R_d; double x = t_g.at(0); double z = t_g.at(2); vis.addPose(x,-z); - } - // if we integrated, create a keyframe and optionally triangulate new map points - if(integrate){ - KeyFrame kf; - kf.id = frame_id; - kf.image = frame.clone(); - kf.kps = kps; - kf.desc = desc.clone(); - kf.R_w = R_g.clone(); kf.t_w = t_g.clone(); + // create keyframe (use new constructor) + KeyFrame kf(frame_id, frame, kps, desc, R_g, t_g); + // triangulate new map points bool didTriangulate = false; + std::vector newPts; if(!map.keyframes().empty() && map.keyframes().back().id == frame_id - 1){ - // triangulate between last keyframe and this frame using normalized coordinates const KeyFrame &last = map.keyframes().back(); std::vector pts1n, pts2n; pts1n.reserve(pts1.size()); pts2n.reserve(pts2.size()); double fx = loader.fx(), fy = loader.fy(), cx = loader.cx(), cy = loader.cy(); @@ -420,87 +372,68 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual pts1n.emplace_back(float((pts1[i].x - cx)/fx), float((pts1[i].y - cy)/fy)); pts2n.emplace_back(float((pts2[i].x - cx)/fx), float((pts2[i].y - cy)/fy)); } - // build kp index lists (matching goodMatches order) std::vector pts1_kp_idx; pts1_kp_idx.reserve(goodMatches.size()); std::vector pts2_kp_idx; pts2_kp_idx.reserve(goodMatches.size()); for(const auto &m: goodMatches){ pts1_kp_idx.push_back(m.queryIdx); pts2_kp_idx.push_back(m.trainIdx); } - // triangulate between the last keyframe in the map and the CURRENT keyframe `kf`. - // previously the code passed `keyframes.back()` which refers to the previous local - // keyframe (or the same as `last`), resulting in triangulation between the same - // frame and thus no new points. Pass `kf` to triangulate between `last` and - // the current frame. - auto newPts = map.triangulateBetweenLastTwo(pts1n, pts2n, pts1_kp_idx, pts2_kp_idx, last, kf, fx, fy, cx, cy); - if(!newPts.empty()){ - didTriangulate = true; - // already appended inside MapManager - } + newPts = map.triangulateBetweenLastTwo(pts1n, pts2n, pts1_kp_idx, pts2_kp_idx, last, kf, fx, fy, cx, cy); + if(!newPts.empty()) didTriangulate = true; } - { - // insert keyframe and map points under lock to keep consistent state - std::lock_guard lk(mapMutex); - keyframes.push_back(std::move(kf)); - map.addKeyFrame(keyframes.back()); - if(options.enableMapMaintenance){ - const int interval = std::max(1, options.maintenanceInterval); - if(static_cast(map.keyframes().size()) % interval == 0){ - map.cullBadMapPoints(); - auto &mps = map.mappointsMutable(); - for(auto &mp : mps){ - if(mp.isBad) continue; - map.updateMapPointDescriptor(mp); - } - } - } - } + // insert keyframe and new points under lock + std::lock_guard lk(mapMutex); + keyframes.push_back(std::move(kf)); + map.addKeyFrame(keyframes.back()); if(didTriangulate){ + map.addMapPoints(newPts); std::cout << "Created keyframe " << frame_id << " and triangulated new map points (total=" << map.mappoints().size() << ")" << std::endl; } else { std::cout << "Created keyframe " << frame_id << " (no triangulation)" << std::endl; } + + if(options.enableMapMaintenance){ + const int interval = std::max(1, options.maintenanceInterval); + if(static_cast(map.keyframes().size()) % interval == 0){ + map.cullBadMapPoints(); + auto &mps = map.mappointsMutable(); + for(auto &mp : mps){ + if(mp.isBad) continue; + map.updateMapPointDescriptor(mp); + } + } + } // Notify backend thread to run local BA asynchronously if(options.enableBackend){ backendRequests.fetch_add(1); backendCv.notify_one(); } } + } - // write CSV line - if(csv){ - csv << frame_id << "," << meanDiff << "," << median_flow << "," << pre_matches << "," << post_matches << "," << inliers << "," << inlierRatio << "," << (integrate?1:0) << "\n"; - csv.flush(); - } - - // Always show a single image; if we have matches, draw small boxes around matched keypoints - Mat visImg; - if(frame.channels() > 1) visImg = frame.clone(); - else cvtColor(gray, visImg, COLOR_GRAY2BGR); - std::string info = std::string("Frame ") + std::to_string(frame_id) + " matches=" + std::to_string(matchCount) + " inliers=" + std::to_string(inliers); - if(!goodMatches.empty()){ - for(size_t mi=0; mi(goodMatches.size())){ - isInlier = mask.at(static_cast(mi), 0) != 0; - } else if(mask.cols == static_cast(goodMatches.size())){ - isInlier = mask.at(0, static_cast(mi)) != 0; - } + // Always show a single image; if we have matches, draw small boxes around matched keypoints + Mat visImg; + if(frame.channels() > 1) visImg = frame.clone(); + else cvtColor(gray, visImg, COLOR_GRAY2BGR); + std::string info = std::string("Frame ") + std::to_string(frame_id) + " matches=" + std::to_string(matchCount) + " inliers=" + std::to_string(inliers); + if(!goodMatches.empty()){ + for(size_t mi=0; mi(goodMatches.size())){ + isInlier = mask_use.at(static_cast(mi), 0) != 0; + } else if(mask_use.cols == static_cast(goodMatches.size())){ + isInlier = mask_use.at(0, static_cast(mi)) != 0; } - Scalar col = isInlier ? Scalar(0,255,0) : Scalar(0,0,255); - Point ip(cvRound(p2.x), cvRound(p2.y)); - Rect r(ip - Point(4,4), Size(8,8)); - rectangle(visImg, r, col, 2, LINE_AA); } + Scalar col = isInlier ? Scalar(0,255,0) : Scalar(0,0,255); + Point ip(cvRound(p2.x), cvRound(p2.y)); + Rect r(ip - Point(4,4), Size(8,8)); + rectangle(visImg, r, col, 2, LINE_AA); } - putText(visImg, info, Point(10,20), FONT_HERSHEY_SIMPLEX, 0.6, Scalar(0,255,0), 2); - vis.showFrame(visImg); - - } else { - vis.showFrame(gray); } + putText(visImg, info, Point(10,20), FONT_HERSHEY_SIMPLEX, 0.6, Scalar(0,255,0), 2); + vis.showFrame(visImg); } else { Mat visFrame; drawKeypoints(gray, kps, visFrame, Scalar::all(-1), DrawMatchesFlags::DRAW_RICH_KEYPOINTS); vis.showFrame(visFrame); From 1c996249870e3e3d434bc0885350ed9234868be2 Mon Sep 17 00:00:00 2001 From: solovesonxi <2172449563@qq.com> Date: Tue, 6 Jan 2026 18:52:41 +0800 Subject: [PATCH 26/29] add trajectory CSV export and a test --- modules/slam/src/vo.cpp | 93 +++++++++++++++ modules/slam/test/test_vo_eval.cpp | 178 +++++++++++++++++++++++++++++ 2 files changed, 271 insertions(+) create mode 100644 modules/slam/test/test_vo_eval.cpp diff --git a/modules/slam/src/vo.cpp b/modules/slam/src/vo.cpp index 890348f381f..750848b041f 100644 --- a/modules/slam/src/vo.cpp +++ b/modules/slam/src/vo.cpp @@ -94,6 +94,14 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual std::vector mappoints; std::unordered_map keyframeIdToIndex; + // Trajectory recording: store timestamp + pose for each keyframe + struct TrajectoryEntry { + double timestamp; + Mat R_w; + Mat t_w; + }; + std::vector trajectory; + // Backend (BA) thread primitives std::mutex mapMutex; // protects map and keyframe modifications and writeback std::condition_variable backendCv; @@ -160,6 +168,25 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual // persistent previous-frame storage (declare outside loop so detectAndCompute can use them) static std::vector prevKp; static Mat prevGray, prevDesc; + + // Helper lambda to extract timestamp from EuRoC image filename (nanoseconds) + auto extractTimestamp = [](const std::string &path, int fallbackId) -> double { + try { + std::string fname = path; + auto slash = fname.find_last_of("/\\"); + if(slash != std::string::npos) fname = fname.substr(slash + 1); + auto dot = fname.find_last_of('.'); + if(dot != std::string::npos) fname = fname.substr(0, dot); + if(!fname.empty()){ + double ts = std::stod(fname); + if(ts > 1e12) ts *= 1e-9; // likely nanoseconds -> seconds + else if(ts > 1e9) ts *= 1e-6; // likely microseconds -> seconds + return ts; + } + } catch(const std::exception &){} + return static_cast(fallbackId); + }; + while(loader.getNextImage(frame, imgPath)){ Mat gray = frame; if(gray.channels() > 1) cvtColor(gray, gray, COLOR_BGR2GRAY); @@ -281,6 +308,12 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual double z = t_g.at(2); vis.addPose(x,-z); + // Record initial trajectory entries + double ts0 = extractTimestamp(imgPath, frame_id - 1); + double ts1 = extractTimestamp(imgPath, frame_id); + trajectory.push_back({ts0, Mat::eye(3,3,CV_64F), Mat::zeros(3,1,CV_64F)}); + trajectory.push_back({ts1, R_g.clone(), t_g.clone()}); + // notify backend to run BA on initial map backendRequests.fetch_add(1); backendCv.notify_one(); @@ -358,6 +391,10 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual double z = t_g.at(2); vis.addPose(x,-z); + // Record trajectory entry for this keyframe + double ts = extractTimestamp(imgPath, frame_id); + trajectory.push_back({ts, R_g.clone(), t_g.clone()}); + // create keyframe (use new constructor) KeyFrame kf(frame_id, frame, kps, desc, R_g, t_g); @@ -462,6 +499,62 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual std::cerr << "Error saving trajectory: " << e.what() << std::endl; } + // Write trajectory to TUM-format CSV file + try{ + std::string csvPath = resultDirStr + "/" + runTimestamp + "/trajectory_tum.csv"; + std::ofstream ofs(csvPath); + if(!ofs.is_open()){ + std::cerr << "Failed to open trajectory CSV: " << csvPath << std::endl; + } else { + ofs << "# timestamp,tx,ty,tz,qx,qy,qz,qw" << std::endl; + for(const auto &entry : trajectory){ + if(entry.R_w.empty() || entry.t_w.empty()) continue; + Mat R, t; + entry.R_w.convertTo(R, CV_64F); + entry.t_w.convertTo(t, CV_64F); + // Convert rotation matrix to quaternion + cv::Matx33d Rm; + R.copyTo(Rm); + double tr = Rm(0,0) + Rm(1,1) + Rm(2,2); + double qw, qx, qy, qz; + if(tr > 0){ + double S = std::sqrt(tr + 1.0) * 2.0; + qw = 0.25 * S; + qx = (Rm(2,1) - Rm(1,2)) / S; + qy = (Rm(0,2) - Rm(2,0)) / S; + qz = (Rm(1,0) - Rm(0,1)) / S; + } else if((Rm(0,0) > Rm(1,1)) && (Rm(0,0) > Rm(2,2))){ + double S = std::sqrt(1.0 + Rm(0,0) - Rm(1,1) - Rm(2,2)) * 2.0; + qw = (Rm(2,1) - Rm(1,2)) / S; + qx = 0.25 * S; + qy = (Rm(0,1) + Rm(1,0)) / S; + qz = (Rm(0,2) + Rm(2,0)) / S; + } else if(Rm(1,1) > Rm(2,2)){ + double S = std::sqrt(1.0 + Rm(1,1) - Rm(0,0) - Rm(2,2)) * 2.0; + qw = (Rm(0,2) - Rm(2,0)) / S; + qx = (Rm(0,1) + Rm(1,0)) / S; + qy = 0.25 * S; + qz = (Rm(1,2) + Rm(2,1)) / S; + } else { + double S = std::sqrt(1.0 + Rm(2,2) - Rm(0,0) - Rm(1,1)) * 2.0; + qw = (Rm(1,0) - Rm(0,1)) / S; + qx = (Rm(0,2) + Rm(2,0)) / S; + qy = (Rm(1,2) + Rm(2,1)) / S; + qz = 0.25 * S; + } + // Write in TUM format: timestamp,tx,ty,tz,qx,qy,qz,qw (integer nanoseconds) + long long ts_ns = static_cast(std::llround(entry.timestamp * 1e9)); + ofs << ts_ns << "," + << std::setprecision(9) << t.at(0,0) << "," << t.at(1,0) << "," << t.at(2,0) << "," + << qx << "," << qy << "," << qz << "," << qw << "\n"; + } + ofs.close(); + std::cout << "Saved trajectory CSV (" << trajectory.size() << " poses) to " << csvPath << std::endl; + } + } catch(const std::exception &e){ + std::cerr << "Error writing trajectory CSV: " << e.what() << std::endl; + } + // Shutdown backend thread gracefully if(options.enableBackend){ backendStop.store(true); diff --git a/modules/slam/test/test_vo_eval.cpp b/modules/slam/test/test_vo_eval.cpp new file mode 100644 index 00000000000..3d2ded1f71f --- /dev/null +++ b/modules/slam/test/test_vo_eval.cpp @@ -0,0 +1,178 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +struct PoseRow { + double ts = 0.0; + cv::Mat R; // 3x3 + cv::Mat t; // 3x1 +}; + +static bool readTum(const std::string &path, std::vector &out){ + std::ifstream ifs(path); + if(!ifs.is_open()) return false; + std::string line; + while(std::getline(ifs, line)){ + if(line.empty() || line[0] == '#') continue; + for(char &c : line) if(c == ',') c = ' '; // allow comma-separated CSV + std::stringstream ss(line); + double ts, tx, ty, tz, qx, qy, qz, qw; + if(!(ss >> ts >> tx >> ty >> tz >> qx >> qy >> qz >> qw)) continue; + if(ts > 1e12) ts *= 1e-9; // convert nanoseconds to seconds when needed + cv::Mat R = cv::Mat::eye(3,3,CV_64F); + double xx = qx*qx, yy = qy*qy, zz = qz*qz; + double xy = qx*qy, xz = qx*qz, yz = qy*qz; + double wx = qw*qx, wy = qw*qy, wz = qw*qz; + R.at(0,0) = 1.0 - 2.0*(yy + zz); + R.at(0,1) = 2.0*(xy - wz); + R.at(0,2) = 2.0*(xz + wy); + R.at(1,0) = 2.0*(xy + wz); + R.at(1,1) = 1.0 - 2.0*(xx + zz); + R.at(1,2) = 2.0*(yz - wx); + R.at(2,0) = 2.0*(xz - wy); + R.at(2,1) = 2.0*(yz + wx); + R.at(2,2) = 1.0 - 2.0*(xx + yy); + cv::Mat t = (cv::Mat_(3,1) << tx, ty, tz); + out.push_back({ts, R, t}); + } + return true; +} + +static void buildCorrespondence(const std::vector >, const std::vector &est, + std::vector &p_gt, std::vector &p_est){ + size_t i = 0, j = 0; + while(i < gt.size() && j < est.size()){ + double ts_gt = gt[i].ts; + double ts_est = est[j].ts; + double diff = std::abs(ts_gt - ts_est); + const double tol = 5e-2; // 50ms tolerance (EuRoC frames ~20Hz, ~0.05s step) + if(diff < tol){ + p_gt.emplace_back(gt[i].t.at(0), gt[i].t.at(1), gt[i].t.at(2)); + p_est.emplace_back(est[j].t.at(0), est[j].t.at(1), est[j].t.at(2)); + i++; j++; + } else if(ts_gt < ts_est){ + i++; + } else { + j++; + } + } +} + +static void umeyamaSim3(const std::vector &src, const std::vector &dst, + double &scale, cv::Mat &R, cv::Mat &t){ + const int N = static_cast(src.size()); + CV_Assert(N > 0 && dst.size() == src.size()); + cv::Mat src_mat(N, 3, CV_64F), dst_mat(N, 3, CV_64F); + for(int i=0;i(i,0) = src[i].x; + src_mat.at(i,1) = src[i].y; + src_mat.at(i,2) = src[i].z; + dst_mat.at(i,0) = dst[i].x; + dst_mat.at(i,1) = dst[i].y; + dst_mat.at(i,2) = dst[i].z; + } + cv::Mat mu_src, mu_dst; + cv::reduce(src_mat, mu_src, 0, cv::REDUCE_AVG); + cv::reduce(dst_mat, mu_dst, 0, cv::REDUCE_AVG); + cv::Mat src_centered = src_mat - cv::repeat(mu_src, N, 1); + cv::Mat dst_centered = dst_mat - cv::repeat(mu_dst, N, 1); + cv::Mat Sigma = (dst_centered.t() * src_centered) / static_cast(N); + cv::SVD svd(Sigma); + cv::Mat U = svd.u, Vt = svd.vt; + cv::Mat S = cv::Mat::eye(3,3,CV_64F); + if(cv::determinant(U * Vt) < 0) S.at(2,2) = -1.0; + R = U * S * Vt; + double var_src = 0.0; + for(int i=0;i(i,0), src_centered.at(i,1), src_centered.at(i,2)); + var_src += v.dot(v); + } + var_src /= static_cast(N); + scale = cv::trace(R * Sigma)[0] / var_src; + cv::Mat mu_src_t = mu_src.reshape(1,3); + cv::Mat mu_dst_t = mu_dst.reshape(1,3); + t = mu_dst_t - scale * R * mu_src_t; +} + +static double computeATE(const std::vector >, const std::vector &est, + double &scale_out){ + std::vector p_gt, p_est; + buildCorrespondence(gt, est, p_gt, p_est); + if(p_gt.size() < 2) return -1.0; + cv::Mat R, t; + double s = 1.0; + umeyamaSim3(p_est, p_gt, s, R, t); + scale_out = s; + double se = 0.0; + const size_t N = p_gt.size(); + for(size_t i=0;i(3,1) << p_est[i].x, p_est[i].y, p_est[i].z); + cv::Mat q = (cv::Mat_(3,1) << p_gt[i].x, p_gt[i].y, p_gt[i].z); + cv::Mat pe = s * R * p + t; + cv::Mat d = pe - q; + double dn = cv::norm(d); + se += dn * dn; + } + return std::sqrt(se / static_cast(N)); +} + +static double computeRPE(const std::vector >, const std::vector &est, + const cv::Mat &R_align, const cv::Mat &t_align, double scale, int delta){ + std::vector p_gt, p_est; + buildCorrespondence(gt, est, p_gt, p_est); + if(p_gt.size() <= static_cast(delta)) return -1.0; + double se = 0.0; + size_t count = 0; + for(size_t i=0; i + delta < p_gt.size(); ++i){ + cv::Mat pe1 = (cv::Mat_(3,1) << p_est[i].x, p_est[i].y, p_est[i].z); + cv::Mat pe2 = (cv::Mat_(3,1) << p_est[i+delta].x, p_est[i+delta].y, p_est[i+delta].z); + cv::Mat pa1 = scale * R_align * pe1 + t_align; + cv::Mat pa2 = scale * R_align * pe2 + t_align; + cv::Point3d dg = p_gt[i+delta] - p_gt[i]; + cv::Point3d de(pa2.at(0) - pa1.at(0), + pa2.at(1) - pa1.at(1), + pa2.at(2) - pa1.at(2)); + double diff = cv::norm(dg - de); + se += diff * diff; + count++; + } + if(count == 0) return -1.0; + return std::sqrt(se / static_cast(count)); +} + +int main(int argc, char** argv){ + if(argc < 3){ + std::cout << "Usage: " << argv[0] << " [gt_tum.csv] [est_tum.csv]\n"; + return 1; + } + std::vector gt, est; + if(!readTum(argv[1], gt)){ + std::cerr << "Failed to read GT: " << argv[1] << std::endl; + return 1; + } + if(!readTum(argv[2], est)){ + std::cerr << "Failed to read EST: " << argv[2] << std::endl; + return 1; + } + double scale = 1.0; + double ate = computeATE(gt, est, scale); + cv::Mat R_align, t_align; + // Recompute alignment to reuse for RPE + { + std::vector p_gt, p_est; + buildCorrespondence(gt, est, p_gt, p_est); + if(!p_gt.empty()){ + umeyamaSim3(p_est, p_gt, scale, R_align, t_align); + } + } + double rpe = computeRPE(gt, est, R_align, t_align, scale, 1); + std::cout << "ATE_RMSE " << ate << "\n"; + std::cout << "RPE_RMSE(delta=1) " << rpe << "\n"; + std::cout << "Scale " << scale << "\n"; + return 0; +} From 2999dab05055686cbc484add62af1741605f5556 Mon Sep 17 00:00:00 2001 From: solovesonxi <2172449563@qq.com> Date: Thu, 8 Jan 2026 15:34:15 +0800 Subject: [PATCH 27/29] =?UTF-8?q?Add=20intrinsics=20+=20configurable=20poi?= =?UTF-8?q?nt-culling,=20redundant-keyframe=20removal,=20triangulation=20f?= =?UTF-8?q?ix=20(camera=E2=86=94world),=20switch=20to=20OpenCV=20logging,?= =?UTF-8?q?=20and=20minor=20robustness=20tweaks.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- modules/slam/include/opencv2/slam/map.hpp | 30 ++ .../slam/include/opencv2/slam/visualizer.hpp | 13 +- modules/slam/include/opencv2/slam/vo.hpp | 32 +- modules/slam/src/data_loader.cpp | 16 +- modules/slam/src/initializer.cpp | 36 +- modules/slam/src/localizer.cpp | 14 +- modules/slam/src/map.cpp | 197 +++++++-- modules/slam/src/optimizer.cpp | 6 +- modules/slam/src/visualizer.cpp | 8 + modules/slam/src/vo.cpp | 408 +++++++++++++----- 10 files changed, 566 insertions(+), 194 deletions(-) diff --git a/modules/slam/include/opencv2/slam/map.hpp b/modules/slam/include/opencv2/slam/map.hpp index 66e4f6c7ae9..a7d6bb329dd 100644 --- a/modules/slam/include/opencv2/slam/map.hpp +++ b/modules/slam/include/opencv2/slam/map.hpp @@ -37,6 +37,15 @@ class MapManager { public: MapManager(); + // Camera intrinsics used for reprojection-based scoring/culling. + void setCameraIntrinsics(double fx, double fy, double cx, double cy); + + // Culling / retention tuning (milestone 3) + void setMapPointCullingParams(int minObservations, double minFoundRatio, double maxReprojErrorPx, int maxPointsKeep); + + // Redundant keyframe culling tuning (milestone 2) + void setRedundantKeyframeCullingParams(int minKeyframeObs, int minPointObs, double redundantRatio); + void addKeyFrame(const KeyFrame &kf); void addMapPoints(const std::vector &pts); @@ -72,6 +81,7 @@ class MapManager { // Quality management void cullBadMapPoints(); + void cullRedundantKeyFrames(int maxCullPerCall = 1); double computeReprojError(const MapPoint &mp, const KeyFrame &kf, double fx, double fy, double cx, double cy) const; void updateMapPointDescriptor(MapPoint &mp); @@ -80,11 +90,31 @@ class MapManager { int countGoodMapPoints() const; private: + void rebuildKeyframeIndex_(); + void rebuildMapPointIndex_(); std::vector keyframes_; std::vector mappoints_; std::unordered_map id2idx_; std::unordered_map mpid2idx_; int next_mappoint_id_ = 1; + + // Camera intrinsics (optional; when set, enables reprojection-based scoring) + bool hasIntrinsics_ = false; + double fx_ = 0.0; + double fy_ = 0.0; + double cx_ = 0.0; + double cy_ = 0.0; + + // Map point culling params + int mpMinObservations_ = 2; + double mpMinFoundRatio_ = 0.10; + double mpMaxReprojErrorPx_ = 5.0; + int mpMaxPointsKeep_ = 8000; + + // Redundant keyframe culling params + int kfRedundantMinObs_ = 80; + int kfRedundantMinPointObs_ = 3; + double kfRedundantRatio_ = 0.90; }; } // namespace vo diff --git a/modules/slam/include/opencv2/slam/visualizer.hpp b/modules/slam/include/opencv2/slam/visualizer.hpp index 4938505deeb..9651b69bb72 100644 --- a/modules/slam/include/opencv2/slam/visualizer.hpp +++ b/modules/slam/include/opencv2/slam/visualizer.hpp @@ -1,5 +1,6 @@ #pragma once #include +#include #include #include "opencv2/slam/feature.hpp" #include "opencv2/slam/matcher.hpp" @@ -24,19 +25,21 @@ class Tracker { class Visualizer { public: Visualizer(int W=1000, int H=800, double meters_per_pixel=0.02); - // 更新轨迹(传入 x,z 坐标) + // Update trajectory (x,z coordinates) void addPose(double x, double z); - // 返回帧绘制(matches 或 keypoints)到窗口 + void clearTrajectory(); + void setTrajectoryXZ(const std::vector &xz); + // Show frame window void showFrame(const Mat &frame); - // 返回并显示俯视图 + // Show top-down trajectory void showTopdown(); - // 保存最终轨迹图像到文件 + // Save trajectory image to file bool saveTrajectory(const std::string &path); private: double meters_per_pixel_; Size mapSize_; Mat map_; - std::vector traj_; // 存储 (x,z) + std::vector traj_; Point worldToPixel(const Point2d &p) const; }; diff --git a/modules/slam/include/opencv2/slam/vo.hpp b/modules/slam/include/opencv2/slam/vo.hpp index 19d799f1190..d612eb29e25 100644 --- a/modules/slam/include/opencv2/slam/vo.hpp +++ b/modules/slam/include/opencv2/slam/vo.hpp @@ -17,13 +17,35 @@ struct CV_EXPORTS_W_SIMPLE VisualOdometryOptions { CV_PROP_RW double minRotationRad = 0.5 * CV_PI / 180.0; CV_PROP_RW int maxMatchesKeep = 500; CV_PROP_RW double flowWeightLambda = 5.0; - // Backend/BA controls + CV_PROP_RW bool verbose = false; + + // Keyframe insertion policy + CV_PROP_RW int keyframeMinGap = 1; // allow close KFs if有视差 + CV_PROP_RW int keyframeMaxGap = 8; // force insertion if间隔过大 + CV_PROP_RW double keyframeMinParallaxPx = 8.0; + + // Backend BA cadence + CV_PROP_RW int backendTriggerInterval = 2; CV_PROP_RW bool enableBackend = true; - CV_PROP_RW int backendWindow = 5; // number of latest keyframes for local BA - CV_PROP_RW int backendIterations = 10; // BA iterations (if backend enabled) - // Map maintenance controls + CV_PROP_RW int backendWindow = 45; + CV_PROP_RW int backendIterations = 15; + + // Redundant keyframe culling + CV_PROP_RW int maxKeyframeCullsPerMaintenance = 2; + CV_PROP_RW int redundantKeyframeMinObs = 80; + CV_PROP_RW int redundantKeyframeMinPointObs = 3; + CV_PROP_RW double redundantKeyframeRatio = 0.90; + + // Map maintenance CV_PROP_RW bool enableMapMaintenance = true; - CV_PROP_RW int maintenanceInterval = 5; // run cull/descriptor refresh every N keyframes + CV_PROP_RW int maintenanceInterval = 5; + + // Map point culling / retention + CV_PROP_RW int mapMaxPointsKeep = 8000; + CV_PROP_RW double mapMaxReprojErrorPx = 5.0; + CV_PROP_RW double mapMinFoundRatio = 0.10; + CV_PROP_RW int mapMinObservations = 2; + // Verbose diagnostics (OpenCV log level is global). }; class CV_EXPORTS_W VisualOdometry { diff --git a/modules/slam/src/data_loader.cpp b/modules/slam/src/data_loader.cpp index 6c5814cc9d2..3f01c20dd70 100644 --- a/modules/slam/src/data_loader.cpp +++ b/modules/slam/src/data_loader.cpp @@ -1,7 +1,7 @@ #include "opencv2/slam/data_loader.hpp" #include #include -#include +#include #include #include @@ -15,7 +15,7 @@ bool ensureDirectoryExists(const std::string &dir){ if(cv::utils::fs::exists(dir)) return cv::utils::fs::isDirectory(dir); return cv::utils::fs::createDirectories(dir); } catch(const std::exception &e) { - std::cerr << "ensureDirectoryExists: " << e.what() << std::endl; + CV_LOG_ERROR(NULL, cv::format("ensureDirectoryExists: %s", e.what())); return false; } } @@ -25,11 +25,11 @@ DataLoader::DataLoader(const std::string &imageDir) { try { if(!cv::utils::fs::exists(imageDir) || !cv::utils::fs::isDirectory(imageDir)){ - std::cerr << "DataLoader: imageDir does not exist or is not a directory: " << imageDir << std::endl; + CV_LOG_ERROR(NULL, cv::format("DataLoader: imageDir does not exist or is not a directory: %s", imageDir.c_str())); return; } } catch(const std::exception &e){ - std::cerr << "DataLoader: filesystem error checking imageDir: " << e.what() << std::endl; + CV_LOG_ERROR(NULL, cv::format("DataLoader: filesystem error checking imageDir: %s", e.what())); return; } @@ -37,13 +37,13 @@ DataLoader::DataLoader(const std::string &imageDir) try { glob(imageDir + "/*", imageFiles, false); } catch(const Exception &e){ - std::cerr << "DataLoader: glob failed for '" << imageDir << "': " << e.what() << std::endl; + CV_LOG_ERROR(NULL, cv::format("DataLoader: glob failed for '%s': %s", imageDir.c_str(), e.what())); imageFiles.clear(); return; } if(imageFiles.empty()){ - std::cerr << "DataLoader: no image files found in " << imageDir << std::endl; + CV_LOG_ERROR(NULL, cv::format("DataLoader: no image files found in %s", imageDir.c_str())); return; } @@ -83,7 +83,7 @@ bool DataLoader::loadIntrinsics(const std::string &yamlPath){ while(ss >> v) vals.push_back(v); if(vals.size() >= 4){ fx_ = vals[0]; fy_ = vals[1]; cx_ = vals[2]; cy_ = vals[3]; - std::cerr << "DataLoader: loaded intrinsics from " << yamlPath << std::endl; + CV_LOG_DEBUG(NULL, cv::format("DataLoader: loaded intrinsics from %s", yamlPath.c_str())); return true; } } @@ -102,7 +102,7 @@ bool DataLoader::getNextImage(Mat &image, std::string &imagePath){ imagePath = imageFiles[currentIndex]; image = imread(imagePath, IMREAD_UNCHANGED); if(image.empty()){ - std::cerr << "DataLoader: couldn't read " << imagePath << ", skipping" << std::endl; + CV_LOG_WARNING(NULL, cv::format("DataLoader: couldn't read %s, skipping", imagePath.c_str())); currentIndex++; return getNextImage(image, imagePath); // try next } diff --git a/modules/slam/src/initializer.cpp b/modules/slam/src/initializer.cpp index 798b3f53f95..dc6aa6d1fea 100644 --- a/modules/slam/src/initializer.cpp +++ b/modules/slam/src/initializer.cpp @@ -1,6 +1,6 @@ #include "opencv2/slam/initializer.hpp" #include -#include +#include #include namespace cv { @@ -16,10 +16,7 @@ bool Initializer::initialize(const std::vector &kps1, std::vector &points3D, std::vector &isTriangulated) { - if(matches.size() < 50) { - std::cout << "Initializer: too few matches (" << matches.size() << ")" << std::endl; - return false; - } + if(matches.size() < 50) return false; // Extract matched points std::vector pts1, pts2; @@ -36,10 +33,7 @@ bool Initializer::initialize(const std::vector &kps1, Mat H = findHomography(pts1, pts2, RANSAC, 3.0, inliersH, 2000, 0.999); Mat F = findFundamentalMat(pts1, pts2, FM_RANSAC, 3.0, 0.999, inliersF); - if(H.empty() || F.empty()) { - std::cout << "Initializer: failed to compute H or F" << std::endl; - return false; - } + if(H.empty() || F.empty()) return false; // Compute scores std::vector inlH, inlF; @@ -48,8 +42,7 @@ bool Initializer::initialize(const std::vector &kps1, float ratio = scoreH / (scoreH + scoreF); - std::cout << "Initializer: H score=" << scoreH << " F score=" << scoreF - << " ratio=" << ratio << std::endl; + CV_LOG_DEBUG(NULL, "Initializer: computed model scores"); // Decide between H and F // If ratio > 0.45, scene is likely planar, use H @@ -63,36 +56,24 @@ bool Initializer::initialize(const std::vector &kps1, bool success = false; if(useH) { - std::cout << "Initializer: using Homography" << std::endl; success = reconstructH(pts1, pts2, H, fx, fy, cx, cy, R_out, t_out, pts3D, isTri, parallax); } else { - std::cout << "Initializer: using Fundamental" << std::endl; success = reconstructF(pts1, pts2, F, fx, fy, cx, cy, R_out, t_out, pts3D, isTri, parallax); } - if(!success) { - std::cout << "Initializer: reconstruction failed" << std::endl; - return false; - } + if(!success) return false; // Count good triangulated points int goodCount = 0; for(bool b : isTri) if(b) goodCount++; - std::cout << "Initializer: triangulated " << goodCount << "/" << pts3D.size() - << " points, parallax=" << parallax << std::endl; + (void)goodCount; // Check quality: need enough good points - if(goodCount < 50) { - std::cout << "Initializer: too few good points (" << goodCount << ")" << std::endl; - return false; - } + if(goodCount < 50) return false; // Check parallax - if(parallax < 1.0f) { - std::cout << "Initializer: insufficient parallax (" << parallax << ")" << std::endl; - return false; - } + if(parallax < 1.0f) return false; // Success R = R_out; @@ -100,7 +81,6 @@ bool Initializer::initialize(const std::vector &kps1, points3D = pts3D; isTriangulated = isTri; - std::cout << "Initializer: SUCCESS!" << std::endl; return true; } diff --git a/modules/slam/src/localizer.cpp b/modules/slam/src/localizer.cpp index 8e7c3849888..a490ed2fb44 100644 --- a/modules/slam/src/localizer.cpp +++ b/modules/slam/src/localizer.cpp @@ -104,7 +104,8 @@ bool Localizer::tryPnP(const MapManager &map, const Mat &desc, const std::vector Mat camera = (Mat_(3,3) << fx,0,cx, 0,fy,cy, 0,0,1); Mat dist = Mat::zeros(4,1,CV_64F); std::vector inliersIdx; - bool ok = solvePnPRansac(obj, imgpts, camera, dist, R_out, t_out, false, + Mat rvec, tvec; + bool ok = solvePnPRansac(obj, imgpts, camera, dist, rvec, tvec, false, 100, 8.0, 0.99, inliersIdx, SOLVEPNP_ITERATIVE); if(!ok) return false; inliers_out = static_cast(inliersIdx.size()); @@ -113,7 +114,7 @@ bool Localizer::tryPnP(const MapManager &map, const Mat &desc, const std::vector double meanReproj = 0.0; if(!inliersIdx.empty()){ std::vector proj; - projectPoints(obj, R_out, t_out, camera, dist, proj); + projectPoints(obj, rvec, tvec, camera, dist, proj); double sum = 0.0; for(int idx: inliersIdx){ double e = std::hypot(proj[idx].x - imgpts[idx].x, proj[idx].y - imgpts[idx].y); @@ -150,6 +151,15 @@ bool Localizer::tryPnP(const MapManager &map, const Mat &desc, const std::vector } catch(...) { /* don't crash on diagnostics */ } } + // Convert OpenCV PnP result (world->camera: Xc = Rcw*Xw + tcw) + // into our pose convention: camera->world rotation (R_w) and camera center in world (C_w). + Mat Rcw; + Rodrigues(rvec, Rcw); + Mat Rwc = Rcw.t(); + Mat Cw = -Rwc * tvec; + R_out = Rwc; + t_out = Cw; + return inliers_out >= min_inliers; } diff --git a/modules/slam/src/map.cpp b/modules/slam/src/map.cpp index 83e538fcd5b..dc025b8a9e3 100644 --- a/modules/slam/src/map.cpp +++ b/modules/slam/src/map.cpp @@ -1,6 +1,6 @@ #include "opencv2/slam/map.hpp" #include -#include +#include #include #include @@ -9,6 +9,27 @@ namespace vo { MapManager::MapManager() {} +void MapManager::setCameraIntrinsics(double fx, double fy, double cx, double cy){ + fx_ = fx; + fy_ = fy; + cx_ = cx; + cy_ = cy; + hasIntrinsics_ = (fx_ > 1e-9 && fy_ > 1e-9); +} + +void MapManager::setMapPointCullingParams(int minObservations, double minFoundRatio, double maxReprojErrorPx, int maxPointsKeep){ + mpMinObservations_ = std::max(2, minObservations); + mpMinFoundRatio_ = std::max(0.0, std::min(1.0, minFoundRatio)); + mpMaxReprojErrorPx_ = std::max(0.1, maxReprojErrorPx); + mpMaxPointsKeep_ = std::max(100, maxPointsKeep); +} + +void MapManager::setRedundantKeyframeCullingParams(int minKeyframeObs, int minPointObs, double redundantRatio){ + kfRedundantMinObs_ = std::max(10, minKeyframeObs); + kfRedundantMinPointObs_ = std::max(2, minPointObs); + kfRedundantRatio_ = std::max(0.0, std::min(1.0, redundantRatio)); +} + void MapManager::addKeyFrame(const KeyFrame &kf){ keyframes_.push_back(kf); id2idx_[kf.id] = static_cast(keyframes_.size()) - 1; @@ -22,6 +43,22 @@ void MapManager::addMapPoints(const std::vector &pts){ } } +void MapManager::rebuildKeyframeIndex_(){ + id2idx_.clear(); + id2idx_.reserve(keyframes_.size()); + for(size_t i = 0; i < keyframes_.size(); ++i){ + id2idx_[keyframes_[i].id] = static_cast(i); + } +} + +void MapManager::rebuildMapPointIndex_(){ + mpid2idx_.clear(); + mpid2idx_.reserve(mappoints_.size()); + for(size_t i = 0; i < mappoints_.size(); ++i){ + mpid2idx_[mappoints_[i].id] = static_cast(i); + } +} + std::vector MapManager::findVisibleCandidates(const Mat &lastR, const Mat &lastT, double fx, double fy, double cx, double cy, int imgW, int imgH) const { @@ -47,13 +84,16 @@ std::vector MapManager::triangulateBetweenLastTwo(const std::vector newPoints; if(pts1n.empty() || pts2n.empty()) return newPoints; - // Relative pose from last -> current (we expect lastKf and curKf poses to be in world) - // compute relative transformation - // P1 = [I|0], P2 = [R_rel|t_rel] where R_rel = R_last^{-1} * R_cur, t_rel = R_last^{-1}*(t_cur - t_last) - Mat R_last = lastKf.R_w, t_last = lastKf.t_w; - Mat R_cur = curKf.R_w, t_cur = curKf.t_w; - Mat R_rel = R_last.t() * R_cur; - Mat t_rel = R_last.t() * (t_cur - t_last); + // Relative pose from last camera -> current camera. + // Pose convention: R_w is camera->world rotation, t_w is camera center in world (C_w). + // For triangulatePoints we need: X_cur = R * X_last + t, both in camera coordinates. + // With this convention: + // R = R_cur^T * R_last + // t = R_cur^T * (C_last - C_cur) + Mat R_last = lastKf.R_w, C_last = lastKf.t_w; + Mat R_cur = curKf.R_w, C_cur = curKf.t_w; + Mat R_rel = R_cur.t() * R_last; + Mat t_rel = R_cur.t() * (C_last - C_cur); Mat P1 = Mat::eye(3,4,CV_64F); Mat P2(3,4,CV_64F); for(int r=0;r<3;++r){ @@ -64,13 +104,13 @@ std::vector MapManager::triangulateBetweenLastTwo(const std::vector(mpMinFoundRatio_); + const size_t MAX_POINTS_KEEP = static_cast(mpMaxPointsKeep_); + const double MAX_REPROJ_ERROR = mpMaxReprojErrorPx_; for(auto &mp : mappoints_) { if(mp.isBad) continue; @@ -192,36 +233,67 @@ void MapManager::cullBadMapPoints() { continue; } - // 3. Check reprojection error across observations - // Sample a few keyframes to check reprojection - int errorCount = 0; - int checkCount = 0; - for(const auto &obs : mp.observations) { - int kfId = obs.first; - int kfIdx = keyframeIndex(kfId); - if(kfIdx < 0 || kfIdx >= static_cast(keyframes_.size())) continue; - - const KeyFrame &kf = keyframes_[kfIdx]; - // Use default camera params (should be passed in production code) - double fx = 500.0, fy = 500.0, cx = 320.0, cy = 240.0; - double error = computeReprojError(mp, kf, fx, fy, cx, cy); - - checkCount++; - if(error > MAX_REPROJ_ERROR) { - errorCount++; + // 3. Check reprojection error across observations (if intrinsics known) + if(hasIntrinsics_){ + int errorCount = 0; + int checkCount = 0; + for(const auto &obs : mp.observations) { + int kfId = obs.first; + int kfIdx = keyframeIndex(kfId); + if(kfIdx < 0 || kfIdx >= static_cast(keyframes_.size())) continue; + const KeyFrame &kf = keyframes_[kfIdx]; + double error = computeReprojError(mp, kf, fx_, fy_, cx_, cy_); + + checkCount++; + if(error > MAX_REPROJ_ERROR) errorCount++; + if(checkCount >= 3) break; } - // Sample up to 3 observations for efficiency - if(checkCount >= 3) break; + if(checkCount > 0 && errorCount > checkCount / 2) mp.isBad = true; } + } - // If majority of samples have high error, mark as bad - if(checkCount > 0 && errorCount > checkCount / 2) { - mp.isBad = true; - } + // Gather good points + std::vector goodIdx; goodIdx.reserve(mappoints_.size()); + for(size_t i = 0; i < mappoints_.size(); ++i){ + if(!mappoints_[i].isBad) goodIdx.push_back(i); } - // Remove bad points + // Enforce hard cap with score-based retention when too many points survive + if(goodIdx.size() > MAX_POINTS_KEEP){ + struct ScoredIdx { double score; size_t idx; }; + std::vector scored; scored.reserve(goodIdx.size()); + for(size_t idx : goodIdx){ + const auto &mp = mappoints_[idx]; + double found = static_cast(std::max(0.05f, mp.getFoundRatio())); + // Prefer points with many observations and high found ratio. + // If intrinsics are known, also prefer low reprojection error. + double reproj = 0.0; + if(hasIntrinsics_){ + std::vector errs; + errs.reserve(3); + int checked = 0; + for(const auto &obs : mp.observations){ + int kfIdx = keyframeIndex(obs.first); + if(kfIdx < 0 || kfIdx >= static_cast(keyframes_.size())) continue; + errs.push_back(computeReprojError(mp, keyframes_[kfIdx], fx_, fy_, cx_, cy_)); + if(++checked >= 3) break; + } + if(!errs.empty()){ + size_t mid = errs.size()/2; + std::nth_element(errs.begin(), errs.begin()+mid, errs.end()); + reproj = errs[mid]; + } + } + double score = static_cast(mp.nObs) * found / (1.0 + reproj); + scored.push_back({score, idx}); + } + std::nth_element(scored.begin(), scored.begin() + static_cast(MAX_POINTS_KEEP), scored.end(), + [](const ScoredIdx &a, const ScoredIdx &b){ return a.score > b.score; }); + for(size_t i = MAX_POINTS_KEEP; i < scored.size(); ++i){ + mappoints_[scored[i].idx].isBad = true; + } + } size_t before = mappoints_.size(); mappoints_.erase( std::remove_if(mappoints_.begin(), mappoints_.end(), @@ -230,10 +302,53 @@ void MapManager::cullBadMapPoints() { ); size_t after = mappoints_.size(); - if(before - after > 0) { - std::cout << "MapManager: culled " << (before - after) << " bad map points (" - << after << " remain)" << std::endl; + if(after != before) rebuildMapPointIndex_(); +} + +void MapManager::cullRedundantKeyFrames(int maxCullPerCall){ + if(maxCullPerCall <= 0) return; + if(keyframes_.size() <= 3) return; + + const int protectFirst = 2; + const int protectLast = 2; + if(static_cast(keyframes_.size()) <= protectFirst + protectLast) return; + + int removed = 0; + for(int kfi = protectFirst; kfi < static_cast(keyframes_.size()) - protectLast; ++kfi){ + const int kfId = keyframes_[kfi].id; + + int totalObs = 0; + int redundantObs = 0; + for(auto &mp : mappoints_){ + if(mp.isBad) continue; + bool observedHere = false; + for(const auto &obs : mp.observations){ + if(obs.first == kfId){ observedHere = true; break; } + } + if(!observedHere) continue; + totalObs++; + if(static_cast(mp.observations.size()) >= kfRedundantMinPointObs_) redundantObs++; + } + if(totalObs < kfRedundantMinObs_) continue; + const double ratio = static_cast(redundantObs) / static_cast(totalObs); + if(ratio < kfRedundantRatio_) continue; + + // Remove this keyframe: erase its observations from map points + for(auto &mp : mappoints_){ + if(mp.isBad) continue; + auto &obs = mp.observations; + obs.erase(std::remove_if(obs.begin(), obs.end(), [&](const std::pair &o){ return o.first == kfId; }), obs.end()); + mp.nObs = static_cast(obs.size()); + if(mp.nObs < 2) mp.isBad = true; + } + keyframes_.erase(keyframes_.begin() + kfi); + rebuildKeyframeIndex_(); + removed++; + if(removed >= maxCullPerCall) break; + kfi--; // re-check current index after erase } + + if(removed > 0) cullBadMapPoints(); } double MapManager::computeReprojError(const MapPoint &mp, const KeyFrame &kf, diff --git a/modules/slam/src/optimizer.cpp b/modules/slam/src/optimizer.cpp index b1fe82bd1b5..9fdc2b95ef6 100644 --- a/modules/slam/src/optimizer.cpp +++ b/modules/slam/src/optimizer.cpp @@ -1,4 +1,5 @@ #include "opencv2/slam/optimizer.hpp" +#include #include #include #include @@ -431,13 +432,12 @@ void Optimizer::globalBundleAdjustmentSFM( double fx, double fy, double cx, double cy, int iterations) { - std::cout << "Optimizer: Global BA with " << keyframes.size() - << " KFs and " << mappoints.size() << " map points" << std::endl; + CV_LOG_INFO(NULL, "Optimizer: Global BA"); std::vector localKfIndices; for(size_t i = 1; i < keyframes.size(); ++i) localKfIndices.push_back(static_cast(i)); std::vector fixedKfIndices = {0}; localBundleAdjustmentSFM(keyframes, mappoints, localKfIndices, fixedKfIndices, fx, fy, cx, cy, iterations); - std::cout << "Optimizer: Global BA completed" << std::endl; + CV_LOG_INFO(NULL, "Optimizer: Global BA completed"); } #endif diff --git a/modules/slam/src/visualizer.cpp b/modules/slam/src/visualizer.cpp index 5da15843824..7215bdc6a69 100644 --- a/modules/slam/src/visualizer.cpp +++ b/modules/slam/src/visualizer.cpp @@ -77,6 +77,14 @@ void Visualizer::addPose(double x, double z){ traj_.emplace_back(x,z); } +void Visualizer::clearTrajectory(){ + traj_.clear(); +} + +void Visualizer::setTrajectoryXZ(const std::vector &xz){ + traj_ = xz; +} + void Visualizer::showFrame(const Mat &frame){ if(frame.empty()) return; // Do not draw heading overlay on video frames; only show raw frame. diff --git a/modules/slam/src/vo.cpp b/modules/slam/src/vo.cpp index 750848b041f..98632eb1e49 100644 --- a/modules/slam/src/vo.cpp +++ b/modules/slam/src/vo.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include @@ -24,6 +24,8 @@ #include #include #include +#include +#include namespace cv { namespace vo { @@ -49,10 +51,15 @@ void VisualOdometry::setBackendIterations(int iterations){ } int VisualOdometry::run(const std::string &imageDir, double scaleM, const VisualOdometryOptions &options){ + const auto prevLogLevel = cv::utils::logging::getLogLevel(); + // Keep global OpenCV logs at WARNING to avoid noisy INFO logs from OpenCL/UI. + // Module-specific verbosity is handled explicitly. + cv::utils::logging::setLogLevel(cv::utils::logging::LOG_LEVEL_WARNING); + DataLoader loader(imageDir); - std::cout << "VisualOdometry: loaded " << loader.size() << " images from " << imageDir << std::endl; if(loader.size() == 0){ - std::cerr << "VisualOdometry: no images found in " << imageDir << std::endl; + CV_LOG_WARNING(NULL, "VisualOdometry: no images found"); + cv::utils::logging::setLogLevel(prevLogLevel); return -1; } @@ -62,20 +69,30 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual if(!matcher_){ matcher_ = DescriptorMatcher::create(DescriptorMatcher::BRUTEFORCE_HAMMING); } - // Remove internal FeatureExtractor/Matcher in favor of injected OpenCV components PoseEstimator poseEst; - Visualizer vis; + // Keep the topdown visualization scale consistent with the world scaling applied by scaleM. + // When scaleM is large (e.g. 1.0), a larger meters_per_pixel zooms out to keep the trajectory visible. + // When scaleM is small (e.g. 0.02), it zooms in accordingly. + Visualizer vis(1000, 800, std::max(1e-9, scaleM)); MapManager map; - // two-view initializer Initializer initializer; - // configure Localizer with a slightly stricter Lowe ratio (0.7) Localizer localizer(0.7f); + // Configure map intrinsics and maintenance thresholds (milestone 2/3) + map.setCameraIntrinsics(loader.fx(), loader.fy(), loader.cx(), loader.cy()); + map.setMapPointCullingParams(options.mapMinObservations, + options.mapMinFoundRatio, + options.mapMaxReprojErrorPx, + options.mapMaxPointsKeep); + map.setRedundantKeyframeCullingParams(options.redundantKeyframeMinObs, + options.redundantKeyframeMinPointObs, + options.redundantKeyframeRatio); + // prepare per-run diagnostics folder std::string runTimestamp; auto now = std::chrono::system_clock::now(); - std::time_t t = std::chrono::system_clock::to_time_t(now); - std::tm tm = *std::localtime(&t); + std::time_t now_time_t = std::chrono::system_clock::to_time_t(now); + std::tm tm = *std::localtime(&now_time_t); std::ostringstream ss; ss << std::put_time(&tm, "%Y%m%d_%H%M%S"); runTimestamp = ss.str(); std::string resultDirStr = imageDir; @@ -96,6 +113,7 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual // Trajectory recording: store timestamp + pose for each keyframe struct TrajectoryEntry { + int frame_id; double timestamp; Mat R_w; Mat t_w; @@ -107,7 +125,9 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual std::condition_variable backendCv; std::atomic backendStop(false); std::atomic backendRequests(0); - const int LOCAL_BA_WINDOW = std::max(1, options.backendWindow); + bool backendBusy = false; // guarded by mapMutex + // Expand BA window to optimize meaningful local map (not just last 5 frames) + const int LOCAL_BA_WINDOW = std::max(60, options.backendWindow); // Start backend thread only if enabled std::thread backendThread; @@ -120,22 +140,70 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual // snapshot map and keyframes auto kfs_snapshot = map.keyframes(); auto mps_snapshot = map.mappoints(); - // reset requests + // reset requests and mark busy backendRequests.store(0); + backendBusy = true; lk.unlock(); - // determine local window + // determine local window using covisibility with the latest keyframe int K = static_cast(kfs_snapshot.size()); if(K <= 0) continue; - int start = std::max(0, K - LOCAL_BA_WINDOW); + const int lastIdx = K - 1; + const int lastId = kfs_snapshot[lastIdx].id; + + std::unordered_map idToIdx; + idToIdx.reserve(static_cast(K)); + for(int i = 0; i < K; ++i) idToIdx[kfs_snapshot[i].id] = i; + + std::unordered_map sharedCount; + sharedCount.reserve(mps_snapshot.size()); + for(const auto &mp : mps_snapshot){ + bool hasLast = false; + for(const auto &obs : mp.observations){ + if(obs.first == lastId){ hasLast = true; break; } + } + if(!hasLast) continue; + for(const auto &obs : mp.observations){ + if(obs.first == lastId) continue; + auto it = idToIdx.find(obs.first); + if(it == idToIdx.end()) continue; + sharedCount[it->second] += 1; + } + } + + struct ScoredKf { int score; int idx; }; + std::vector scored; + scored.reserve(sharedCount.size()); + for(const auto &kv : sharedCount){ + scored.push_back({kv.second, kv.first}); + } + std::sort(scored.begin(), scored.end(), [](const ScoredKf &a, const ScoredKf &b){ return a.score > b.score; }); + std::vector localKfIndices; - for(int ii = start; ii < K; ++ii) localKfIndices.push_back(ii); + localKfIndices.reserve(static_cast(LOCAL_BA_WINDOW)); + auto push_unique = [&](int idx){ + if(idx < 0 || idx >= K) return; + if(idx == 0 || idx == 1) return; + if(std::find(localKfIndices.begin(), localKfIndices.end(), idx) != localKfIndices.end()) return; + localKfIndices.push_back(idx); + }; + push_unique(lastIdx); + for(const auto &s : scored){ + if(static_cast(localKfIndices.size()) >= LOCAL_BA_WINDOW) break; + push_unique(s.idx); + } + if(localKfIndices.empty()) localKfIndices.push_back(lastIdx); + std::vector fixedKfIndices; - if(start > 0) fixedKfIndices.push_back(0); + fixedKfIndices.push_back(0); + if(K > 1) fixedKfIndices.push_back(1); #if defined(HAVE_SFM) - // Run BA on snapshot (may take time) - uses Optimizer which will use g2o if enabled + CV_LOG_DEBUG(NULL, "Backend: Running BA"); Optimizer::localBundleAdjustmentSFM(kfs_snapshot, mps_snapshot, localKfIndices, fixedKfIndices, loader.fx(), loader.fy(), loader.cx(), loader.cy(), options.backendIterations); + CV_LOG_DEBUG(NULL, "Backend: BA completed"); + #else + CV_LOG_DEBUG(NULL, "Backend: HAVE_SFM not defined, BA skipped"); #endif // write back optimized poses/points into main map under lock using id-based lookup std::lock_guard lk2(mapMutex); @@ -157,6 +225,8 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual mps_ref[idx].p = mp_opt.p; } } + backendBusy = false; + backendCv.notify_all(); } }); } @@ -259,13 +329,11 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual // Two-view initialization: if map is empty and this is the second frame, try to bootstrap if(map.keyframes().empty() && frame_id == 1){ - std::cout << "Attempting two-view initialization (frame 0 + 1), matches=" << goodMatches.size() << std::endl; Mat R_init, t_init; std::vector pts3D; std::vector isTri; // call initializer using the matched keypoints between prev and current if(initializer.initialize(prevKp, kps, goodMatches, loader.fx(), loader.fy(), loader.cx(), loader.cy(), R_init, t_init, pts3D, isTri)){ - std::cout << "Initializer: success, creating initial keyframes and mappoints (" << pts3D.size() << ")" << std::endl; // build two keyframes: previous (id = frame_id-1) and current (id = frame_id) Mat prevImg; if(!prevGray.empty()){ @@ -273,7 +341,11 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual else prevImg = prevGray.clone(); } KeyFrame kf0(frame_id - 1, prevImg, prevKp, prevDesc, Mat::eye(3,3,CV_64F), Mat::zeros(3,1,CV_64F)); - KeyFrame kf1(frame_id, frame, kps, desc, R_init, t_init); + // recoverPose returns R,t such that x2 = R*x1 + t (cam1 -> cam2). + // Our map uses camera->world rotation (R_w) and camera center in world (C_w). + Mat Rwc1 = R_init.t(); + Mat Cw1 = (-Rwc1 * t_init) * scaleM; + KeyFrame kf1(frame_id, frame, kps, desc, Rwc1, Cw1); // convert initializer 3D points (in first camera frame) to MapPoints in world coords (world==first) std::vector newMps; @@ -281,7 +353,7 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual for(size_t i=0;i(0); double z = t_g.at(2); - vis.addPose(x,-z); + vis.addPose(x, z); // Record initial trajectory entries double ts0 = extractTimestamp(imgPath, frame_id - 1); double ts1 = extractTimestamp(imgPath, frame_id); - trajectory.push_back({ts0, Mat::eye(3,3,CV_64F), Mat::zeros(3,1,CV_64F)}); - trajectory.push_back({ts1, R_g.clone(), t_g.clone()}); + trajectory.push_back({frame_id - 1, ts0, Mat::eye(3,3,CV_64F), Mat::zeros(3,1,CV_64F)}); + trajectory.push_back({frame_id, ts1, R_g.clone(), t_g.clone()}); // notify backend to run BA on initial map backendRequests.fetch_add(1); @@ -321,19 +393,15 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual prevGray = gray.clone(); prevKp = kps; prevDesc = desc.clone(); frame_id++; continue; - } else { - std::cout << "Initializer: failed to initialize from first two frames" << std::endl; } } - // 变量提前声明,保证后续可用 Mat R_est, t_est, mask_est; int inliers_est = 0; bool ok_est = false; bool ok_pnp = false; Mat R_pnp, t_pnp; int inliers_pnp = 0; int preMatches_pnp = 0, postMatches_pnp = 0; double meanReproj_pnp = 0.0; - // 统一输出变量 Mat R_use, t_use, mask_use; int inliers = 0; int matchCount = post_matches; @@ -346,19 +414,15 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual ok_pnp = localizer.tryPnP(map, desc, kps, loader.fx(), loader.fy(), loader.cx(), loader.cy(), gray.cols, gray.rows, options.minInliers, R_pnp, t_pnp, inliers_pnp, frame_id, &frame, runDirStr, &preMatches_pnp, &postMatches_pnp, &meanReproj_pnp); - if(ok_pnp){ - std::cout << "PnP solved: preMatches="<= maxGap) insertKeyframe = true; + else if(gap >= minGap && median_flow >= options.keyframeMinParallaxPx) insertKeyframe = true; } - if(options.enableMapMaintenance){ - const int interval = std::max(1, options.maintenanceInterval); - if(static_cast(map.keyframes().size()) % interval == 0){ - map.cullBadMapPoints(); - auto &mps = map.mappointsMutable(); - for(auto &mp : mps){ - if(mp.isBad) continue; - map.updateMapPointDescriptor(mp); + if(insertKeyframe){ + KeyFrame kf(frame_id, frame, kps, desc, R_g, t_g); + + // Triangulate against last KF + bool didTriangulate = false; + std::vector newPts; + if(!map.keyframes().empty() && !map.keyframes().back().desc.empty() && !kf.desc.empty()){ + const KeyFrame &last = map.keyframes().back(); + + // Match last KF -> current KF + std::vector> knn_kf12, knn_kf21; + matcher_->knnMatch(last.desc, kf.desc, knn_kf12, 2); + matcher_->knnMatch(kf.desc, last.desc, knn_kf21, 2); + std::vector kfMatches; + kfMatches.reserve(knn_kf12.size()); + const float ratio_kf = 0.75f; + for(size_t qi = 0; qi < knn_kf12.size(); ++qi){ + if(knn_kf12[qi].empty()) continue; + const DMatch &best = knn_kf12[qi][0]; + if(knn_kf12[qi].size() >= 2){ + const DMatch &second = knn_kf12[qi][1]; + if(second.distance > 0.0f && best.distance / second.distance > ratio_kf) continue; + } + int trainIdx = best.trainIdx; + if(trainIdx < 0 || trainIdx >= (int)knn_kf21.size() || knn_kf21[trainIdx].empty()) continue; + const DMatch &rbest = knn_kf21[trainIdx][0]; + if(rbest.trainIdx == (int)qi) kfMatches.push_back(best); + } + + if(static_cast(kfMatches.size()) > options.maxMatchesKeep){ + std::nth_element(kfMatches.begin(), kfMatches.begin() + options.maxMatchesKeep, kfMatches.end(), + [](const DMatch &a, const DMatch &b){ return a.distance < b.distance; }); + kfMatches.resize(options.maxMatchesKeep); + } + + if(!kfMatches.empty()){ + std::vector pts1n, pts2n; + pts1n.reserve(kfMatches.size()); + pts2n.reserve(kfMatches.size()); + std::vector pts1_kp_idx; pts1_kp_idx.reserve(kfMatches.size()); + std::vector pts2_kp_idx; pts2_kp_idx.reserve(kfMatches.size()); + double fx = loader.fx(), fy = loader.fy(), cx = loader.cx(), cy = loader.cy(); + for(const auto &m : kfMatches){ + const Point2f &p1 = last.kps[m.queryIdx].pt; + const Point2f &p2 = kf.kps[m.trainIdx].pt; + pts1n.emplace_back(float((p1.x - cx)/fx), float((p1.y - cy)/fy)); + pts2n.emplace_back(float((p2.x - cx)/fx), float((p2.y - cy)/fy)); + pts1_kp_idx.push_back(m.queryIdx); + pts2_kp_idx.push_back(m.trainIdx); + } + newPts = map.triangulateBetweenLastTwo(pts1n, pts2n, pts1_kp_idx, pts2_kp_idx, last, kf, fx, fy, cx, cy); + if(!newPts.empty()) didTriangulate = true; + } + } + + // insert keyframe and new points under lock + { + std::lock_guard lk(mapMutex); + keyframes.push_back(std::move(kf)); + map.addKeyFrame(keyframes.back()); + if(didTriangulate) map.addMapPoints(newPts); + + if(options.enableMapMaintenance){ + const int interval = std::max(1, options.maintenanceInterval); + if(static_cast(map.keyframes().size()) % interval == 0){ + map.cullBadMapPoints(); + map.cullRedundantKeyFrames(std::max(1, options.maxKeyframeCullsPerMaintenance)); + auto &mps = map.mappointsMutable(); + for(auto &mp : mps){ + if(mp.isBad) continue; + map.updateMapPointDescriptor(mp); + } + } + } + // Notify backend thread to run local BA asynchronously every N inserted keyframes + if(options.enableBackend){ + const int interval = std::max(1, options.backendTriggerInterval); + if(static_cast(map.keyframes().size()) % interval == 0){ + backendRequests.fetch_add(1); + backendCv.notify_one(); + } } } - } - // Notify backend thread to run local BA asynchronously - if(options.enableBackend){ - backendRequests.fetch_add(1); - backendCv.notify_one(); } } } @@ -484,37 +606,118 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual if(key == 27) break; } - // save trajectory with timestamp into result/ folder + // Trigger final BA before saving trajectory and wait synchronously + if(options.enableBackend){ + CV_LOG_WARNING(NULL, "Triggering final backend optimization before saving trajectory..."); + { + std::unique_lock lk(mapMutex); + backendRequests.fetch_add(1); + backendCv.notify_one(); + backendCv.wait(lk, [&]{ return backendRequests.load() == 0 && !backendBusy; }); + } + + // Run a final full-map BA synchronously (larger than sliding window) + std::vector kfs_snapshot; + std::vector mps_snapshot; + { + std::lock_guard lk(mapMutex); + kfs_snapshot = map.keyframes(); + mps_snapshot = map.mappoints(); + } + int K = static_cast(kfs_snapshot.size()); + if(K > 0){ + std::vector allIdx(K); + std::iota(allIdx.begin(), allIdx.end(), 0); + std::vector fixed; + fixed.push_back(0); + if(K > 1) fixed.push_back(1); + #if defined(HAVE_SFM) + CV_LOG_WARNING(NULL, "Final BA: optimizing all keyframes"); + Optimizer::localBundleAdjustmentSFM(kfs_snapshot, mps_snapshot, allIdx, fixed, + loader.fx(), loader.fy(), loader.cx(), loader.cy(), std::max(20, options.backendIterations * 2)); + CV_LOG_WARNING(NULL, "Final BA: completed"); + { + std::lock_guard lk(mapMutex); + auto &kfs_ref = const_cast&>(map.keyframes()); + auto &mps_ref = const_cast&>(map.mappoints()); + for(const auto &kf_opt : kfs_snapshot){ + int idx = map.keyframeIndex(kf_opt.id); + if(idx >= 0 && idx < static_cast(kfs_ref.size())){ + kfs_ref[idx].R_w = kf_opt.R_w.clone(); + kfs_ref[idx].t_w = kf_opt.t_w.clone(); + } + } + for(const auto &mp_opt : mps_snapshot){ + if(mp_opt.id <= 0) continue; + int idx = map.mapPointIndex(mp_opt.id); + if(idx >= 0 && idx < static_cast(mps_ref.size())){ + mps_ref[idx].p = mp_opt.p; + } + } + } + #else + CV_LOG_WARNING(NULL, "Final BA: HAVE_SFM not defined, skipped"); + #endif + } + } + + // save trajectory image into result/ folder try{ // save trajectory into the per-run folder using a simple filename (no timestamp) std::string outDir = resultDirStr + "/" + runTimestamp; ensureDirectoryExists(outDir); std::string outPath = outDir + "/trajectory.png"; - if(vis.saveTrajectory(outPath)){ - std::cout << "Saved trajectory to " << outPath << std::endl; - } else { - std::cerr << "Failed to save trajectory to " << outPath << std::endl; + if(options.enableBackend && !map.keyframes().empty()){ + std::vector kfs = map.keyframes(); + std::sort(kfs.begin(), kfs.end(), [](const KeyFrame &a, const KeyFrame &b){ return a.id < b.id; }); + std::vector xz; + xz.reserve(kfs.size()); + for(const auto &kf : kfs){ + if(kf.t_w.empty()) continue; + xz.emplace_back(kf.t_w.at(0,0), kf.t_w.at(2,0)); + } + vis.setTrajectoryXZ(xz); } + (void)vis.saveTrajectory(outPath); } catch(const std::exception &e){ - std::cerr << "Error saving trajectory: " << e.what() << std::endl; + CV_LOG_ERROR(NULL, cv::format("Error saving trajectory: %s", e.what())); } // Write trajectory to TUM-format CSV file + // Extract final optimized poses from map keyframes (after backend BA) + std::vector finalTrajectory; + if(options.enableBackend && !map.keyframes().empty()){ + // Build map from frame_id to timestamp + std::unordered_map idToTimestamp; + for(const auto &trajEntry : trajectory){ + idToTimestamp[trajEntry.frame_id] = trajEntry.timestamp; + } + for(const auto &kf : map.keyframes()){ + if(kf.t_w.empty() || kf.R_w.empty()) continue; + double ts = idToTimestamp.count(kf.id) ? idToTimestamp[kf.id] : static_cast(kf.id); + finalTrajectory.push_back({kf.id, ts, kf.R_w.clone(), kf.t_w.clone()}); + } + if(options.verbose) CV_LOG_WARNING(NULL, "Backend enabled: using optimized poses"); + } else { + finalTrajectory = trajectory; // use front-end trajectory if backend disabled + if(options.verbose) CV_LOG_WARNING(NULL, "Backend disabled: using front-end poses"); + } + try{ std::string csvPath = resultDirStr + "/" + runTimestamp + "/trajectory_tum.csv"; std::ofstream ofs(csvPath); if(!ofs.is_open()){ - std::cerr << "Failed to open trajectory CSV: " << csvPath << std::endl; + CV_LOG_ERROR(NULL, cv::format("Failed to open trajectory CSV: %s", csvPath.c_str())); } else { ofs << "# timestamp,tx,ty,tz,qx,qy,qz,qw" << std::endl; - for(const auto &entry : trajectory){ + for(const auto &entry : finalTrajectory){ if(entry.R_w.empty() || entry.t_w.empty()) continue; - Mat R, t; - entry.R_w.convertTo(R, CV_64F); - entry.t_w.convertTo(t, CV_64F); + Mat Rmat, tvec_w; + entry.R_w.convertTo(Rmat, CV_64F); + entry.t_w.convertTo(tvec_w, CV_64F); // Convert rotation matrix to quaternion cv::Matx33d Rm; - R.copyTo(Rm); + Rmat.copyTo(Rm); double tr = Rm(0,0) + Rm(1,1) + Rm(2,2); double qw, qx, qy, qz; if(tr > 0){ @@ -545,14 +748,14 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual // Write in TUM format: timestamp,tx,ty,tz,qx,qy,qz,qw (integer nanoseconds) long long ts_ns = static_cast(std::llround(entry.timestamp * 1e9)); ofs << ts_ns << "," - << std::setprecision(9) << t.at(0,0) << "," << t.at(1,0) << "," << t.at(2,0) << "," + << std::setprecision(9) << tvec_w.at(0,0) << "," << tvec_w.at(1,0) << "," << tvec_w.at(2,0) << "," << qx << "," << qy << "," << qz << "," << qw << "\n"; } ofs.close(); - std::cout << "Saved trajectory CSV (" << trajectory.size() << " poses) to " << csvPath << std::endl; + if(options.verbose) CV_LOG_WARNING(NULL, "Saved trajectory CSV"); } } catch(const std::exception &e){ - std::cerr << "Error writing trajectory CSV: " << e.what() << std::endl; + CV_LOG_ERROR(NULL, cv::format("Error writing trajectory CSV: %s", e.what())); } // Shutdown backend thread gracefully @@ -562,6 +765,7 @@ int VisualOdometry::run(const std::string &imageDir, double scaleM, const Visual if(backendThread.joinable()) backendThread.join(); } + cv::utils::logging::setLogLevel(prevLogLevel); return 0; } From ef5ebcd6540fb1888c9bcc3c502b20395714feaa Mon Sep 17 00:00:00 2001 From: solovesonxi <2172449563@qq.com> Date: Sun, 11 Jan 2026 20:40:30 +0800 Subject: [PATCH 28/29] Added a full SLAM frontend/backend refactor introducing VisualOdometry, SlamSystem, map persistence, and visualization/sample updates to replace the old VO interface. --- modules/slam/include/opencv2/slam.hpp | 4 +- .../slam/include/opencv2/slam/data_loader.hpp | 50 +- modules/slam/include/opencv2/slam/map.hpp | 8 + .../slam/include/opencv2/slam/slam_system.hpp | 92 +++ modules/slam/include/opencv2/slam/tracker.hpp | 36 + .../include/opencv2/slam/visual_odometry.hpp | 109 +++ .../slam/include/opencv2/slam/visualizer.hpp | 21 +- modules/slam/include/opencv2/slam/vo.hpp | 70 -- modules/slam/samples/run_vo_sample.cpp | 182 ++++- modules/slam/src/map.cpp | 103 +++ modules/slam/src/slam_system.cpp | 238 ++++++ modules/slam/src/tracker.cpp | 48 ++ modules/slam/src/visual_odometry.cpp | 339 ++++++++ modules/slam/src/visualizer.cpp | 68 +- modules/slam/src/vo.cpp | 773 ------------------ 15 files changed, 1205 insertions(+), 936 deletions(-) create mode 100644 modules/slam/include/opencv2/slam/slam_system.hpp create mode 100644 modules/slam/include/opencv2/slam/tracker.hpp create mode 100644 modules/slam/include/opencv2/slam/visual_odometry.hpp delete mode 100644 modules/slam/include/opencv2/slam/vo.hpp create mode 100644 modules/slam/src/slam_system.cpp create mode 100644 modules/slam/src/tracker.cpp create mode 100644 modules/slam/src/visual_odometry.cpp delete mode 100644 modules/slam/src/vo.cpp diff --git a/modules/slam/include/opencv2/slam.hpp b/modules/slam/include/opencv2/slam.hpp index 334925ec9b7..db3d17aa416 100644 --- a/modules/slam/include/opencv2/slam.hpp +++ b/modules/slam/include/opencv2/slam.hpp @@ -14,8 +14,10 @@ #include "opencv2/slam/matcher.hpp" #include "opencv2/slam/optimizer.hpp" #include "opencv2/slam/pose.hpp" +#include "opencv2/slam/tracker.hpp" #include "opencv2/slam/visualizer.hpp" -#include "opencv2/slam/vo.hpp" +#include "opencv2/slam/visual_odometry.hpp" +#include "opencv2/slam/slam_system.hpp" /** @defgroup slam Simultaneous Localization and Mapping diff --git a/modules/slam/include/opencv2/slam/data_loader.hpp b/modules/slam/include/opencv2/slam/data_loader.hpp index b143547fae6..ee3f4daf3d0 100644 --- a/modules/slam/include/opencv2/slam/data_loader.hpp +++ b/modules/slam/include/opencv2/slam/data_loader.hpp @@ -6,39 +6,69 @@ namespace cv{ namespace vo{ -bool ensureDirectoryExists(const std::string &dir); +CV_EXPORTS bool ensureDirectoryExists(const std::string &dir); -class DataLoader { +/** + * @brief Simple helper to iterate image sequences on disk. + * + * DataLoader enumerates image files in a directory and provides simple + * sequential access for samples and demos. This class does not perform + * any image decoding policy beyond forwarding files to OpenCV's IO. + */ +class CV_EXPORTS DataLoader { public: - // 构造:传入图像目录(可以是相对或绝对路径) + /** + * @brief Construct a DataLoader for the given image directory. + * @param imageDir Directory containing image files (absolute or relative path). + */ DataLoader(const std::string &imageDir); - // 获取下一张图像,成功返回 true 并填充 image 与 imagePath;到末尾返回 false + /** + * @brief Load the next image in the sequence. + * @param image Output image (decoded by OpenCV). + * @param imagePath Output path of the loaded image file. + * @return True if an image was loaded; false when the sequence has ended. + */ bool getNextImage(Mat &image, std::string &imagePath); - // 重置到序列开始 + /** + * @brief Reset the internal iterator to the beginning of the sequence. + */ void reset(); - // 是否还有图像 + /** + * @brief Check whether more images are available. + * @return True when there are remaining images to read. + */ bool hasNext() const; - // 图像总数 + /** + * @brief Get total number of images discovered in the directory. + * @return Number of image files. + */ size_t size() const; - // 尝试加载并返回相机内参(fx, fy, cx, cy),返回是否成功 + /** + * @brief Try to load camera intrinsics from a YAML file. + * @param yamlPath Path to the YAML file containing camera parameters. + * @return True on success, false otherwise. + */ bool loadIntrinsics(const std::string &yamlPath); - // 内参访问 + /** @brief Focal length in x. */ double fx() const { return fx_; } + /** @brief Focal length in y. */ double fy() const { return fy_; } + /** @brief Principal point x-coordinate. */ double cx() const { return cx_; } + /** @brief Principal point y-coordinate. */ double cy() const { return cy_; } private: std::vector imageFiles; size_t currentIndex; - // 相机内参(若未加载则为回退值) + // Camera intrinsics (fallback values when not loaded) double fx_, fy_, cx_, cy_; }; diff --git a/modules/slam/include/opencv2/slam/map.hpp b/modules/slam/include/opencv2/slam/map.hpp index a7d6bb329dd..65e72a33b95 100644 --- a/modules/slam/include/opencv2/slam/map.hpp +++ b/modules/slam/include/opencv2/slam/map.hpp @@ -2,6 +2,7 @@ #include #include #include +#include #include "opencv2/slam/keyframe.hpp" namespace cv { @@ -89,6 +90,13 @@ class MapManager { // Statistics int countGoodMapPoints() const; + // Map lifecycle utilities + void clear(); + + // Persistence + bool save(const std::string &path) const; + bool load(const std::string &path); + private: void rebuildKeyframeIndex_(); void rebuildMapPointIndex_(); diff --git a/modules/slam/include/opencv2/slam/slam_system.hpp b/modules/slam/include/opencv2/slam/slam_system.hpp new file mode 100644 index 00000000000..dabd6df1597 --- /dev/null +++ b/modules/slam/include/opencv2/slam/slam_system.hpp @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: Apache-2.0 +#pragma once +#include "opencv2/slam/visual_odometry.hpp" + +namespace cv { +namespace vo { + +//! @addtogroup slam +//! @{ + +/** + * @brief System operating mode. + */ +enum Mode { + MODE_SLAM = 0, //!< Build/extend map while tracking + MODE_LOCALIZATION = 1 //!< Localize against a pre-built (frozen) map +}; + +/** + * @brief Options for the SlamSystem (backend and system-level). + */ +struct CV_EXPORTS_W_SIMPLE SlamSystemOptions { + // --- Backend BA parameters --- + CV_PROP_RW bool enableBackend = true; //!< Enable backend BA thread + CV_PROP_RW int backendTriggerInterval = 2; //!< Keyframes between BA triggers + CV_PROP_RW int backendWindow = 45; //!< BA sliding window size + CV_PROP_RW int backendIterations = 15; //!< BA iterations per trigger + + // --- Map maintenance --- + CV_PROP_RW bool enableMapMaintenance = true; //!< Enable map maintenance + CV_PROP_RW int maintenanceInterval = 5; //!< Keyframes between maintenance + + // --- Redundant keyframe culling --- + CV_PROP_RW int maxKeyframeCullsPerMaintenance = 2; + CV_PROP_RW int redundantKeyframeMinObs = 80; + CV_PROP_RW int redundantKeyframeMinPointObs = 3; + CV_PROP_RW double redundantKeyframeRatio = 0.90; + + // --- Map point culling / retention --- + CV_PROP_RW int mapMaxPointsKeep = 8000; //!< Max map points to keep + CV_PROP_RW double mapMaxReprojErrorPx = 5.0; //!< Max reprojection error in pixels + CV_PROP_RW double mapMinFoundRatio = 0.10; //!< Min found ratio for map points + CV_PROP_RW int mapMinObservations = 2; //!< Min observations for map points +}; + +class CV_EXPORTS_W SlamSystem { +public: + CV_WRAP SlamSystem(); + CV_WRAP explicit SlamSystem(cv::Ptr detector, + cv::Ptr matcher); + + ~SlamSystem(); + + SlamSystem(const SlamSystem&) = delete; + SlamSystem& operator=(const SlamSystem&) = delete; + + CV_WRAP void setFrontendOptions(const VisualOdometryOptions& options); + CV_WRAP void setSystemOptions(const SlamSystemOptions& options); + + CV_WRAP void setCameraIntrinsics(double fx, double fy, double cx, double cy); + CV_WRAP void setWorldScale(double scale_m); + + CV_WRAP void setMode(int mode); + CV_WRAP int getMode() const; + + CV_WRAP VisualOdometryOptions getFrontendOptions() const; + CV_WRAP SlamSystemOptions getSystemOptions() const; + + CV_WRAP void setEnableBackend(bool enable); + CV_WRAP void setBackendWindow(int window); + CV_WRAP void setBackendIterations(int iterations); + + CV_WRAP TrackingResult track(cv::InputArray frame, double timestamp = 0.0); + + CV_WRAP bool saveTrajectoryTUM(const std::string& path) const; + CV_WRAP bool saveMap(const std::string& path) const; + CV_WRAP bool loadMap(const std::string& path); + + CV_WRAP void reset(); + + const MapManager& getMap() const; + MapManager& getMapMutable(); + +private: + class Impl; + std::unique_ptr impl_; +}; + +//! @} + +} // namespace vo +} // namespace cv diff --git a/modules/slam/include/opencv2/slam/tracker.hpp b/modules/slam/include/opencv2/slam/tracker.hpp new file mode 100644 index 00000000000..7b5a2fb688f --- /dev/null +++ b/modules/slam/include/opencv2/slam/tracker.hpp @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: Apache-2.0 +#pragma once +#include +#include +#include +#include + +#include "opencv2/slam/feature.hpp" +#include "opencv2/slam/matcher.hpp" + +namespace cv { +namespace vo { + +class CV_EXPORTS Tracker { +public: + Tracker(); + + // Process a gray image. imgOut contains visualization (matches or keypoints). + bool processFrame(const cv::Mat& gray, + const std::string& imagePath, + cv::Mat& imgOut, + cv::Mat& R_out, + cv::Mat& t_out, + std::string& info); + +private: + FeatureExtractor feat_; + Matcher matcher_; + + cv::Mat prevGray_, prevDesc_; + std::vector prevKp_; + int frame_id_; +}; + +} // namespace vo +} // namespace cv diff --git a/modules/slam/include/opencv2/slam/visual_odometry.hpp b/modules/slam/include/opencv2/slam/visual_odometry.hpp new file mode 100644 index 00000000000..4ab09baf375 --- /dev/null +++ b/modules/slam/include/opencv2/slam/visual_odometry.hpp @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: Apache-2.0 +#pragma once +#include +#include +#include +#include +#include + +namespace cv { +namespace vo { + +//! @addtogroup slam +//! @{ + +/** + * @brief Tracking state enumeration for VisualOdometry. + */ +enum TrackingState { + NOT_INITIALIZED = 0, //!< System not yet initialized + INITIALIZING = 1, //!< Waiting for initialization + TRACKING = 2, //!< Successfully tracking + LOST = 3 //!< Tracking lost +}; + +/** + * @brief Result returned by VisualOdometry::track() for each processed frame. + */ +struct CV_EXPORTS_W_SIMPLE TrackingResult { + CV_PROP_RW bool ok = false; //!< True if pose was successfully estimated + CV_PROP_RW int state = 0; //!< TrackingState as integer + CV_PROP_RW cv::Mat R_w; //!< Rotation matrix (world frame) 3x3 + CV_PROP_RW cv::Mat t_w; //!< Translation vector (world frame) 3x1 + CV_PROP_RW int numMatches = 0; //!< Number of feature matches found + CV_PROP_RW int numInliers = 0; //!< Number of inliers after RANSAC + CV_PROP_RW bool keyframeInserted = false; //!< True if this frame became a keyframe + CV_PROP_RW int frameId = -1; //!< Frame ID + CV_PROP_RW double timestamp = 0.0; //!< Frame timestamp +}; + +/** + * @brief Options for the Visual Odometry frontend (per-frame tracking). + */ +struct CV_EXPORTS_W_SIMPLE VisualOdometryOptions { + // --- Matching parameters --- + CV_PROP_RW int minMatches = 15; //!< Minimum matches to attempt pose estimation + CV_PROP_RW int minInliers = 4; //!< Minimum inliers for valid pose + CV_PROP_RW double minInlierRatio = 0.1; //!< Minimum inlier ratio + CV_PROP_RW int maxMatchesKeep = 500; //!< Max matches to retain after sorting + CV_PROP_RW double flowWeightLambda = 5.0; //!< Flow weight lambda for scoring + + // --- Pose estimation thresholds --- + CV_PROP_RW double diffZeroThresh = 2.0; //!< Diff zero threshold + CV_PROP_RW double flowZeroThresh = 0.3; //!< Flow zero threshold + CV_PROP_RW double minTranslationNorm = 1e-4; //!< Min translation norm + CV_PROP_RW double minRotationRad = 0.5 * CV_PI / 180.0; //!< Min rotation (radians) + + // --- Keyframe insertion policy --- + CV_PROP_RW int keyframeMinGap = 1; //!< Minimum frames between keyframes + CV_PROP_RW int keyframeMaxGap = 8; //!< Maximum frames before forcing keyframe + CV_PROP_RW double keyframeMinParallaxPx = 8.0; //!< Minimum parallax in pixels + + // --- Verbose logging --- + CV_PROP_RW bool verbose = false; //!< Enable verbose logging +}; + +class MapManager; +struct KeyFrame; + +/** + * @brief Visual Odometry frontend for monocular camera pose estimation. + */ +class CV_EXPORTS_W VisualOdometry { +public: + CV_WRAP explicit VisualOdometry( + cv::Ptr detector = cv::ORB::create(), + cv::Ptr matcher = cv::BFMatcher::create(cv::NORM_HAMMING)); + + ~VisualOdometry(); + + VisualOdometry(const VisualOdometry&) = delete; + VisualOdometry& operator=(const VisualOdometry&) = delete; + + CV_WRAP void setCameraIntrinsics(double fx, double fy, double cx, double cy); + CV_WRAP void setWorldScale(double scale); + + CV_WRAP void setOptions(const VisualOdometryOptions& options); + CV_WRAP VisualOdometryOptions getOptions() const; + + CV_WRAP void reset(); + + CV_WRAP TrackingResult track(cv::InputArray frame, double timestamp = 0.0); + + // Advanced C++ API (not wrapped) + TrackingResult track(cv::InputArray frame, double timestamp, MapManager* map); + TrackingResult track(cv::InputArray frame, double timestamp, MapManager* map, bool allowMapping); + + CV_WRAP int getState() const; + CV_WRAP int getFrameId() const; + CV_WRAP void getCurrentPose(cv::OutputArray R_out, cv::OutputArray t_out) const; + +private: + class Impl; + std::unique_ptr impl_; +}; + +//! @} + +} // namespace vo +} // namespace cv diff --git a/modules/slam/include/opencv2/slam/visualizer.hpp b/modules/slam/include/opencv2/slam/visualizer.hpp index 9651b69bb72..59ea180c34e 100644 --- a/modules/slam/include/opencv2/slam/visualizer.hpp +++ b/modules/slam/include/opencv2/slam/visualizer.hpp @@ -2,27 +2,14 @@ #include #include #include -#include "opencv2/slam/feature.hpp" -#include "opencv2/slam/matcher.hpp" +#include "opencv2/slam/tracker.hpp" namespace cv { namespace vo { -class Tracker { -public: - Tracker(); - // Process a gray image, returns true if a pose was estimated. imgOut contains visualization (matches or keypoints) - bool processFrame(const Mat &gray, const std::string &imagePath, Mat &imgOut, Mat &R_out, Mat &t_out, std::string &info); -private: - FeatureExtractor feat_; - Matcher matcher_; - - Mat prevGray_, prevDesc_; - std::vector prevKp_; - int frame_id_; -}; +struct TrackingResult; -class Visualizer { +class CV_EXPORTS Visualizer { public: Visualizer(int W=1000, int H=800, double meters_per_pixel=0.02); // Update trajectory (x,z coordinates) @@ -31,6 +18,8 @@ class Visualizer { void setTrajectoryXZ(const std::vector &xz); // Show frame window void showFrame(const Mat &frame); + // Overlay tracking info text onto a frame + void drawTrackingInfo(Mat &frame, const TrackingResult &res); // Show top-down trajectory void showTopdown(); // Save trajectory image to file diff --git a/modules/slam/include/opencv2/slam/vo.hpp b/modules/slam/include/opencv2/slam/vo.hpp deleted file mode 100644 index d612eb29e25..00000000000 --- a/modules/slam/include/opencv2/slam/vo.hpp +++ /dev/null @@ -1,70 +0,0 @@ -#pragma once -#include -#include -#include -#include - -namespace cv { -namespace vo { - -struct CV_EXPORTS_W_SIMPLE VisualOdometryOptions { - CV_PROP_RW int minMatches = 15; - CV_PROP_RW int minInliers = 4; - CV_PROP_RW double minInlierRatio = 0.1; - CV_PROP_RW double diffZeroThresh = 2.0; - CV_PROP_RW double flowZeroThresh = 0.3; - CV_PROP_RW double minTranslationNorm = 1e-4; - CV_PROP_RW double minRotationRad = 0.5 * CV_PI / 180.0; - CV_PROP_RW int maxMatchesKeep = 500; - CV_PROP_RW double flowWeightLambda = 5.0; - CV_PROP_RW bool verbose = false; - - // Keyframe insertion policy - CV_PROP_RW int keyframeMinGap = 1; // allow close KFs if有视差 - CV_PROP_RW int keyframeMaxGap = 8; // force insertion if间隔过大 - CV_PROP_RW double keyframeMinParallaxPx = 8.0; - - // Backend BA cadence - CV_PROP_RW int backendTriggerInterval = 2; - CV_PROP_RW bool enableBackend = true; - CV_PROP_RW int backendWindow = 45; - CV_PROP_RW int backendIterations = 15; - - // Redundant keyframe culling - CV_PROP_RW int maxKeyframeCullsPerMaintenance = 2; - CV_PROP_RW int redundantKeyframeMinObs = 80; - CV_PROP_RW int redundantKeyframeMinPointObs = 3; - CV_PROP_RW double redundantKeyframeRatio = 0.90; - - // Map maintenance - CV_PROP_RW bool enableMapMaintenance = true; - CV_PROP_RW int maintenanceInterval = 5; - - // Map point culling / retention - CV_PROP_RW int mapMaxPointsKeep = 8000; - CV_PROP_RW double mapMaxReprojErrorPx = 5.0; - CV_PROP_RW double mapMinFoundRatio = 0.10; - CV_PROP_RW int mapMinObservations = 2; - // Verbose diagnostics (OpenCV log level is global). -}; - -class CV_EXPORTS_W VisualOdometry { -public: - CV_WRAP VisualOdometry(cv::Ptr feature, cv::Ptr matcher); - // Run with explicit options (keeps compatibility with callers who pass a full options struct) - CV_WRAP int run(const std::string &imageDir, double scale_m, const VisualOdometryOptions &options); - // Run using the internally stored options (affected by setters such as setEnableBackend) - CV_WRAP int run(const std::string &imageDir, double scale_m = 1.0); - - // Convenience setters for backend controls - CV_WRAP void setEnableBackend(bool enable); - CV_WRAP void setBackendWindow(int window); - CV_WRAP void setBackendIterations(int iterations); -private: - cv::Ptr feature_; - cv::Ptr matcher_; - VisualOdometryOptions options_{}; // stored defaults for setter-based configuration -}; - -} -} // namespace cv::vo \ No newline at end of file diff --git a/modules/slam/samples/run_vo_sample.cpp b/modules/slam/samples/run_vo_sample.cpp index f6df4e64279..e83b1910025 100644 --- a/modules/slam/samples/run_vo_sample.cpp +++ b/modules/slam/samples/run_vo_sample.cpp @@ -1,30 +1,192 @@ // Minimal wrapper to exercise cv::vo::VisualOdometry on a folder of images (e.g., EuRoC MH01). #include +#include #include +#include +#include +#include +#include +#include +#include +#include #include +#include +#include +#include #include #include +static std::string makeTimestampFolder(){ + auto now = std::chrono::system_clock::now(); + std::time_t t = std::chrono::system_clock::to_time_t(now); + std::tm tm{}; +#if defined(_WIN32) + localtime_s(&tm, &t); +#else + localtime_r(&t, &tm); +#endif + std::ostringstream ss; + ss << std::put_time(&tm, "%Y%m%d_%H%M%S"); + return ss.str(); +} + int main(int argc, char** argv) { if(argc < 2){ - std::cout << "Usage: " << argv[0] << " [image_dir] [scale_m=0.02]\n" - << "Example (EuRoC): " << argv[0] << " /datasets/EuRoC/MH01/mav0/cam0/data 0.02" << std::endl; + std::cout << "Usage: " << argv[0] << " [image_dir] [scale_m=0.02] [output_dir] [mode=slam|localization] [map_path]\n" + << "- Default output: /slam_output//\n" + << "- Default map path: /map.yml.gz (gzip to shrink size)\n" + << "Example (SLAM): " << argv[0] << " /datasets/EuRoC/MH01/mav0/cam0/data 0.02 (auto) slam\n" + << "Example (LOC): " << argv[0] << " /datasets/EuRoC/MH01/mav0/cam0/data 0.02 (auto) localization /map.yml.gz" << std::endl; return 0; } std::string imgDir = argv[1]; + // imgDir = "../../datasets/iphone/2025-11-05_170219"; + // std::string imgDir = "../../datasets/EuRoC/MH01/mav0/cam0/data"; + // imgDir = "../../datasets/vivo/room"; double scale_m = (argc >= 3) ? std::atof(argv[2]) : 0.02; + std::string modeStr = (argc >= 5) ? argv[4] : std::string("slam"); + + // Default output root: dataset folder / slam_output / + std::string imgDirAbs = cv::utils::fs::canonical(imgDir); + std::string datasetRoot = cv::utils::fs::getParent(imgDirAbs); + + auto isAbsolutePath = [](const std::string& p){ + return !p.empty() && (p[0] == '/' || (p.size() > 1 && p[1] == ':')); + }; + + std::string userOut = (argc >= 4) ? argv[3] : std::string(); + std::string outRoot; + if(!userOut.empty()){ + outRoot = isAbsolutePath(userOut) ? userOut : cv::utils::fs::join(datasetRoot, userOut); + } else { + outRoot = cv::utils::fs::join(datasetRoot, "slam_output"); + } + + std::string outDir = cv::utils::fs::join(outRoot, makeTimestampFolder()); + std::string defaultMapName = "map.yml.gz"; // gzip reduces map size significantly + std::string mapPath = (argc >= 6) ? argv[5] : cv::utils::fs::join(outDir, defaultMapName); cv::Ptr feature = cv::ORB::create(2000); cv::Ptr matcher = cv::DescriptorMatcher::create(cv::DescriptorMatcher::BRUTEFORCE_HAMMING); - cv::vo::VisualOdometry vo(feature, matcher); - cv::vo::VisualOdometryOptions options; - // Configure options if desired, e.g. disable backend or tweak thresholds - // options.enableBackend = false; // purely front-end VO + cv::vo::SlamSystem slam(feature, matcher); + cv::vo::VisualOdometryOptions feOpts; + cv::vo::SlamSystemOptions sysOpts; + // sysOpts.enableBackend = false; + + slam.setFrontendOptions(feOpts); + slam.setSystemOptions(sysOpts); + + if(modeStr == "localization"){ + if(!slam.loadMap(mapPath)){ + std::cerr << "Failed to load map for localization: " << mapPath << std::endl; + return -4; + } + slam.setMode(cv::vo::MODE_LOCALIZATION); + std::cout << "Mode: LOCALIZATION (map frozen)" << std::endl; + } else { + slam.setMode(cv::vo::MODE_SLAM); + std::cout << "Mode: SLAM" << std::endl; + } + + std::cout << "Running OpenCV SlamSystem on " << imgDir << std::endl; + std::cout << "Controls: press 'q' or ESC to exit" << std::endl; + + if(!cv::vo::ensureDirectoryExists(outDir)){ + std::cerr << "Failed to create output directory: " << outDir << std::endl; + return -2; + } + + cv::vo::DataLoader loader(imgDir); + if(loader.size() == 0){ + std::cerr << "No images found in: " << imgDir << std::endl; + return -1; + } + + slam.setCameraIntrinsics(loader.fx(), loader.fy(), loader.cx(), loader.cy()); + slam.setWorldScale(scale_m); + + // Reuse the module's Visualizer for the top-down trajectory view. + // meters_per_pixel=0.02 => 50 px per meter (roughly matches previous drawScale=50). + cv::vo::Visualizer viz(600, 600, 0.02); + + // TUM-like CSV: timestamp,tx,ty,tz,qx,qy,qz,qw + std::ofstream trajCsv(outDir + "/trajectory_tum.csv", std::ios::out); + if(!trajCsv.is_open()){ + std::cerr << "Failed to open trajectory output: " << (outDir + "/trajectory_tum.csv") << std::endl; + return -3; + } + trajCsv << "timestamp,tx,ty,tz,qx,qy,qz,qw\n"; + + auto extractTimestamp = [](const std::string &p, int fallback)->double{ + try{ + std::string fname = p; + auto slash = fname.find_last_of("/\\"); + if(slash != std::string::npos) fname = fname.substr(slash+1); + auto dot = fname.find_last_of('.'); + if(dot != std::string::npos) fname = fname.substr(0, dot); + if(!fname.empty()){ + double ts = std::stod(fname); + if(ts > 1e12) ts *= 1e-9; + else if(ts > 1e9) ts *= 1e-6; + return ts; + } + } catch(...){} + return static_cast(fallback); + }; + + cv::Mat frame; + std::string path; + int fid = 0; + while(loader.getNextImage(frame, path)){ + const double ts = extractTimestamp(path, fid); + auto res = slam.track(frame, ts); + + cv::Mat vis; + if(frame.channels() == 1) cv::cvtColor(frame, vis, cv::COLOR_GRAY2BGR); + else vis = frame.clone(); + + if(res.ok && !res.t_w.empty()){ + // res.t_w is camera center in world (per current implementation). + const double x = res.t_w.at(0); + const double z = res.t_w.rows >= 3 ? res.t_w.at(2) : 0.0; + + // Update trajectory view using existing Visualizer. + viz.addPose(x, z); + + if(!res.R_w.empty() && res.t_w.rows >= 3){ + cv::Quatd q = cv::Quatd::createFromRotMat(res.R_w); + const cv::Vec4d qvec(q.at(1), q.at(2), q.at(3), q.at(0)); // [qx,qy,qz,qw] + const double tx = res.t_w.at(0); + const double ty = res.t_w.at(1); + const double tz = res.t_w.at(2); + trajCsv << std::fixed << std::setprecision(9) + << ts << "," << tx << "," << ty << "," << tz << "," + << qvec[0] << "," << qvec[1] << "," << qvec[2] << "," << qvec[3] << "\n"; + } + } + + // Use Visualizer to overlay tracking info and present frames. + viz.drawTrackingInfo(vis, res); + viz.showFrame(vis); + viz.showTopdown(); + + int key = cv::waitKey(1); + if(key == 27 || key == 'q' || key == 'Q') break; + fid++; + } + + trajCsv.close(); + + const std::string trajPng = outDir + "/trajectory.png"; + if(!viz.saveTrajectory(trajPng)){ + std::cerr << "Failed to write trajectory image: " << trajPng << std::endl; + } + + if(!slam.saveMap(mapPath)){ + std::cerr << "Failed to save map: " << mapPath << std::endl; + } - std::cout << "Running OpenCV VisualOdometry on " << imgDir << std::endl; - int ret = vo.run(imgDir, scale_m, options); - std::cout << "VisualOdometry finished with code " << ret << std::endl; - return ret; + return 0; } diff --git a/modules/slam/src/map.cpp b/modules/slam/src/map.cpp index dc025b8a9e3..06bd482ba5e 100644 --- a/modules/slam/src/map.cpp +++ b/modules/slam/src/map.cpp @@ -9,6 +9,14 @@ namespace vo { MapManager::MapManager() {} +void MapManager::clear(){ + keyframes_.clear(); + mappoints_.clear(); + id2idx_.clear(); + mpid2idx_.clear(); + next_mappoint_id_ = 1; +} + void MapManager::setCameraIntrinsics(double fx, double fy, double cx, double cy){ fx_ = fx; fy_ = fy; @@ -443,5 +451,100 @@ int MapManager::countGoodMapPoints() const { return count; } +bool MapManager::save(const std::string &path) const { + // Tip: use .yaml.gz/.yml.gz to enable built-in gzip compression when available. + cv::FileStorage fs(path, cv::FileStorage::WRITE); + if(!fs.isOpened()) return false; + + fs << "has_intrinsics" << static_cast(hasIntrinsics_); + fs << "fx" << fx_ << "fy" << fy_ << "cx" << cx_ << "cy" << cy_; + fs << "next_mappoint_id" << next_mappoint_id_; + + fs << "keyframes" << "["; + for(const auto &kf : keyframes_){ + fs << "{"; + fs << "id" << kf.id; + fs << "R" << kf.R_w; + fs << "t" << kf.t_w; + std::vector kps = kf.kps; + write(fs, "kps", kps); + fs << "desc" << kf.desc; + fs << "}"; + } + fs << "]"; + + fs << "mappoints" << "["; + for(const auto &mp : mappoints_){ + fs << "{"; + fs << "id" << mp.id; + fs << "p" << mp.p; + fs << "isBad" << static_cast(mp.isBad); + fs << "nFound" << mp.nFound; + fs << "nVisible" << mp.nVisible; + fs << "descriptor" << mp.descriptor; + fs << "observations" << "["; + for(const auto &obs : mp.observations){ + fs << "{" << "kf" << obs.first << "kp" << obs.second << "}"; + } + fs << "]"; + fs << "}"; + } + fs << "]"; + + return true; +} + +bool MapManager::load(const std::string &path){ + cv::FileStorage fs(path, cv::FileStorage::READ); + if(!fs.isOpened()) return false; + + clear(); + + int hasIntr = 0; + fs["has_intrinsics"] >> hasIntr; + fs["fx"] >> fx_; fs["fy"] >> fy_; fs["cx"] >> cx_; fs["cy"] >> cy_; + hasIntrinsics_ = (hasIntr != 0); + fs["next_mappoint_id"] >> next_mappoint_id_; + + cv::FileNode kfNode = fs["keyframes"]; + if(kfNode.type() == cv::FileNode::SEQ){ + for(const auto &n : kfNode){ + int id = -1; Mat R, t, desc; std::vector kps; + n["id"] >> id; + n["R"] >> R; + n["t"] >> t; + read(n["kps"], kps); + n["desc"] >> desc; + KeyFrame kf(id, Mat(), kps, desc, R, t); + keyframes_.push_back(kf); + } + } + rebuildKeyframeIndex_(); + + cv::FileNode mpNode = fs["mappoints"]; + if(mpNode.type() == cv::FileNode::SEQ){ + for(const auto &n : mpNode){ + MapPoint mp; + n["id"] >> mp.id; + n["p"] >> mp.p; + int bad = 0; n["isBad"] >> bad; mp.isBad = (bad != 0); + n["nFound"] >> mp.nFound; + n["nVisible"] >> mp.nVisible; + n["descriptor"] >> mp.descriptor; + cv::FileNode obsNode = n["observations"]; + if(obsNode.type() == cv::FileNode::SEQ){ + for(const auto &o : obsNode){ + int kfId = -1, kpIdx = -1; + o["kf"] >> kfId; o["kp"] >> kpIdx; + mp.observations.emplace_back(kfId, kpIdx); + } + } + mappoints_.push_back(mp); + } + } + rebuildMapPointIndex_(); + return true; +} + } // namespace vo } // namespace cv \ No newline at end of file diff --git a/modules/slam/src/slam_system.cpp b/modules/slam/src/slam_system.cpp new file mode 100644 index 00000000000..ef3928b324e --- /dev/null +++ b/modules/slam/src/slam_system.cpp @@ -0,0 +1,238 @@ +// SPDX-License-Identifier: Apache-2.0 +#include "opencv2/slam/slam_system.hpp" +#include "opencv2/slam/optimizer.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cv { +namespace vo { + +struct TrajEntry { int frameId = -1; double ts = 0.0; Mat R; Mat t; }; + +static cv::Vec4d rotationMatrixToQuaternion_(const cv::Mat& R_in) +{ + cv::Quatd q = cv::Quatd::createFromRotMat(R_in); + // OpenCV Quat stores [w, x, y, z]; we export [qx, qy, qz, qw]. + return cv::Vec4d(q.at(1), q.at(2), q.at(3), q.at(0)); +} + +class SlamSystem::Impl { +public: + Impl(Ptr det, Ptr matcher) + : frontend_(std::move(det), std::move(matcher)) {} + ~Impl(){ stopBackend(); } + + void setFrontendOptions(const VisualOdometryOptions& o){ feOpts_ = o; frontend_.setOptions(o); } + void setSystemOptions(const SlamSystemOptions& o){ + sysOpts_ = o; + map_.setMapPointCullingParams(sysOpts_.mapMinObservations, sysOpts_.mapMinFoundRatio, + sysOpts_.mapMaxReprojErrorPx, sysOpts_.mapMaxPointsKeep); + map_.setRedundantKeyframeCullingParams(sysOpts_.redundantKeyframeMinObs, + sysOpts_.redundantKeyframeMinPointObs, + sysOpts_.redundantKeyframeRatio); + } + VisualOdometryOptions frontendOptions() const { return feOpts_; } + SlamSystemOptions systemOptions() const { return sysOpts_; } + + void setIntrinsics(double fx, double fy, double cx, double cy){ + fx_ = fx; fy_ = fy; cx_ = cx; cy_ = cy; + frontend_.setCameraIntrinsics(fx, fy, cx, cy); + map_.setCameraIntrinsics(fx, fy, cx, cy); + } + void setScale(double s){ scale_ = s; frontend_.setWorldScale(s); } + + void setMode(int mode){ + mode_ = mode; + if(mode_ == MODE_LOCALIZATION){ + // Localization mode should be lightweight and map-frozen. + stopBackend(); + } + } + int getMode() const { return mode_; } + + TrackingResult track(InputArray frame, double ts){ + const bool isLocalization = (mode_ == MODE_LOCALIZATION); + if(!isLocalization) ensureBackend(); + + TrackingResult res; + { + // Protect map_ against concurrent backend snapshots/writeback. + std::lock_guard lk(mapMutex_); + res = frontend_.track(frame, ts, &map_, !isLocalization); + } + + if(!isLocalization && sysOpts_.enableBackend && res.keyframeInserted){ + kfSinceBackend_++; + const int interval = std::max(1, sysOpts_.backendTriggerInterval); + if(kfSinceBackend_ >= interval){ + kfSinceBackend_ = 0; + backendRequests_.fetch_add(1); + backendCv_.notify_one(); + } + } + + if(!isLocalization && sysOpts_.enableMapMaintenance && res.keyframeInserted){ + kfSinceMaintenance_++; + const int interval = std::max(1, sysOpts_.maintenanceInterval); + if(kfSinceMaintenance_ >= interval){ + kfSinceMaintenance_ = 0; + std::lock_guard lk(mapMutex_); + map_.cullRedundantKeyFrames(std::max(1, sysOpts_.maxKeyframeCullsPerMaintenance)); + map_.cullBadMapPoints(); + } + } + if(res.ok){ + TrajEntry te; te.frameId = res.frameId; te.ts = ts; te.R = res.R_w; te.t = res.t_w; + trajectory_.push_back(te); + } + return res; + } + + bool saveTrajectoryTUM(const std::string& path) const { + std::ofstream out(path, std::ios::out); + if(!out.is_open()) return false; + out << "timestamp,tx,ty,tz,qx,qy,qz,qw\n"; + for(const auto &te : trajectory_){ + if(te.R.empty() || te.t.empty()) continue; + cv::Vec4d q = rotationMatrixToQuaternion_(te.R); + cv::Mat t; te.t.convertTo(t, CV_64F); + if(t.rows < 3) continue; + out << std::fixed << std::setprecision(9) + << te.ts << "," << t.at(0) << "," << t.at(1) << "," << t.at(2) << "," + << q[0] << "," << q[1] << "," << q[2] << "," << q[3] << "\n"; + } + return true; + } + + bool saveMap(const std::string& path) const { return map_.save(path); } + bool loadMap(const std::string& path){ bool ok = map_.load(path); return ok; } + void reset(){ stopBackend(); map_.clear(); frontend_.reset(); trajectory_.clear(); } + const MapManager& map() const { return map_; } + MapManager& mapMutable() { return map_; } + +private: + void ensureBackend(){ + if(backendStarted_ || !sysOpts_.enableBackend) return; + backendStop_.store(false); + backendThread_ = std::thread([this](){ backendLoop(); }); + backendStarted_ = true; + } + + void stopBackend(){ + if(!backendStarted_) return; + backendStop_.store(true); + backendCv_.notify_one(); + if(backendThread_.joinable()) backendThread_.join(); + backendStarted_ = false; + } + + void backendLoop(){ + while(!backendStop_.load()){ + std::unique_lock lk(backendMutex_); + backendCv_.wait(lk, [&]{ return backendStop_.load() || backendRequests_.load() > 0; }); + if(backendStop_.load()) break; + backendRequests_.store(0); + backendBusy_ = true; + + std::vector kfs; + std::vector mps; + { + std::lock_guard mapLk(mapMutex_); + // Copy snapshots under lock to avoid data races. + kfs = map_.keyframes(); + mps = map_.mappoints(); + } + lk.unlock(); + + int K = static_cast(kfs.size()); + if(K > 0){ + int lastIdx = K - 1; int lastId = kfs[lastIdx].id; + std::unordered_map idToIdx; idToIdx.reserve(kfs.size()); + for(int i=0;i shared; + for(const auto &mp : mps){ + bool hasLast = false; + for(const auto &o : mp.observations){ if(o.first == lastId){ hasLast = true; break; } } + if(!hasLast) continue; + for(const auto &o : mp.observations){ if(o.first == lastId) continue; auto it = idToIdx.find(o.first); if(it != idToIdx.end()) shared[it->second]++; } + } + std::vector localIdx; localIdx.reserve(sysOpts_.backendWindow); + auto pushU = [&](int idx){ if(idx<0||idx>=K) return; if(idx==0||idx==1) return; if(std::find(localIdx.begin(), localIdx.end(), idx)!=localIdx.end()) return; localIdx.push_back(idx); }; + pushU(lastIdx); + std::vector> scored(shared.begin(), shared.end()); + std::sort(scored.begin(), scored.end(), [](auto a, auto b){ return a.second > b.second; }); + for(const auto &p : scored){ if((int)localIdx.size() >= sysOpts_.backendWindow) break; pushU(p.first); } + if(localIdx.empty()) localIdx.push_back(lastIdx); + std::vector fixed{0}; if(K>1) fixed.push_back(1); + #if defined(HAVE_SFM) + Optimizer::localBundleAdjustmentSFM(kfs, mps, localIdx, fixed, fx_, fy_, cx_, cy_, sysOpts_.backendIterations); + { + std::lock_guard mapLk(mapMutex_); + for(const auto &kf : kfs) map_.applyOptimizedKeyframePose(kf.id, kf.R_w, kf.t_w); + for(const auto &mp : mps) if(mp.id>0) map_.applyOptimizedMapPoint(mp.id, mp.p); + } + #endif + } + lk.lock(); + backendBusy_ = false; + backendCv_.notify_all(); + } + } + + VisualOdometry frontend_; + MapManager map_; + VisualOdometryOptions feOpts_; + SlamSystemOptions sysOpts_; + int mode_ = MODE_SLAM; + int kfSinceBackend_ = 0; + int kfSinceMaintenance_ = 0; + double fx_ = 0.0, fy_ = 0.0, cx_ = 0.0, cy_ = 0.0; + double scale_ = 1.0; + + std::vector trajectory_; + + std::thread backendThread_; + std::mutex backendMutex_; + std::condition_variable backendCv_; + std::atomic backendStop_{false}; + std::atomic backendRequests_{0}; + bool backendBusy_ = false; + bool backendStarted_ = false; + + std::mutex mapMutex_; +}; + +SlamSystem::SlamSystem() : impl_(std::make_unique(ORB::create(), BFMatcher::create(NORM_HAMMING))) {} +SlamSystem::SlamSystem(Ptr det, Ptr matcher) + : impl_(std::make_unique(std::move(det), std::move(matcher))) {} +SlamSystem::~SlamSystem() = default; + +void SlamSystem::setFrontendOptions(const VisualOdometryOptions& o){ impl_->setFrontendOptions(o); } +void SlamSystem::setSystemOptions(const SlamSystemOptions& o){ impl_->setSystemOptions(o); } +void SlamSystem::setCameraIntrinsics(double fx, double fy, double cx, double cy){ impl_->setIntrinsics(fx, fy, cx, cy); } +void SlamSystem::setWorldScale(double scale_m){ impl_->setScale(scale_m); } +void SlamSystem::setMode(int mode){ impl_->setMode(mode); } +int SlamSystem::getMode() const { return impl_->getMode(); } +VisualOdometryOptions SlamSystem::getFrontendOptions() const { return impl_->frontendOptions(); } +SlamSystemOptions SlamSystem::getSystemOptions() const { return impl_->systemOptions(); } +void SlamSystem::setEnableBackend(bool enable){ auto opts = impl_->systemOptions(); opts.enableBackend = enable; impl_->setSystemOptions(opts); } +void SlamSystem::setBackendWindow(int w){ auto opts = impl_->systemOptions(); opts.backendWindow = std::max(1, w); impl_->setSystemOptions(opts); } +void SlamSystem::setBackendIterations(int it){ auto opts = impl_->systemOptions(); opts.backendIterations = std::max(1, it); impl_->setSystemOptions(opts); } +TrackingResult SlamSystem::track(InputArray frame, double ts){ return impl_->track(frame, ts); } +bool SlamSystem::saveTrajectoryTUM(const std::string& path) const { return impl_->saveTrajectoryTUM(path); } +bool SlamSystem::saveMap(const std::string& path) const { return impl_->saveMap(path); } +bool SlamSystem::loadMap(const std::string& path){ return impl_->loadMap(path); } +void SlamSystem::reset(){ impl_->reset(); } +const MapManager& SlamSystem::getMap() const { return impl_->map(); } +MapManager& SlamSystem::getMapMutable(){ return impl_->mapMutable(); } + +} // namespace vo +} // namespace cv diff --git a/modules/slam/src/tracker.cpp b/modules/slam/src/tracker.cpp new file mode 100644 index 00000000000..6b657cb91f9 --- /dev/null +++ b/modules/slam/src/tracker.cpp @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: Apache-2.0 +#include "opencv2/slam/tracker.hpp" + +#include + +namespace cv { +namespace vo { + +Tracker::Tracker() + : feat_(), matcher_(), frame_id_(0) +{ +} + +bool Tracker::processFrame(const Mat &gray, const std::string & /*imagePath*/, Mat &imgOut, Mat & /*R_out*/, Mat & /*t_out*/, std::string &info) +{ + if(gray.empty()) return false; + // detect + std::vector kps; + Mat desc; + feat_.detectAndCompute(gray, kps, desc); + + if(!prevGray_.empty() && !prevDesc_.empty() && !desc.empty()){ + // match + std::vector goodMatches; + matcher_.knnMatch(prevDesc_, desc, goodMatches); + + // draw matches for visualization + drawMatches(prevGray_, prevKp_, gray, kps, goodMatches, imgOut, + Scalar::all(-1), Scalar::all(-1), std::vector(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS); + + // For now, Tracker is visualization-only; pose is computed by VisualOdometry/PoseEstimator. + info = "matches=" + std::to_string(goodMatches.size()) + ", inliers=0"; + } else { + // first frame: draw keypoints + drawKeypoints(gray, kps, imgOut, Scalar::all(-1), DrawMatchesFlags::DRAW_RICH_KEYPOINTS); + info = "first_frame"; + } + + // update prev + prevGray_ = gray.clone(); + prevKp_ = kps; + prevDesc_ = desc.clone(); + frame_id_++; + return true; +} + +} // namespace vo +} // namespace cv diff --git a/modules/slam/src/visual_odometry.cpp b/modules/slam/src/visual_odometry.cpp new file mode 100644 index 00000000000..8ba05b1af2e --- /dev/null +++ b/modules/slam/src/visual_odometry.cpp @@ -0,0 +1,339 @@ +// SPDX-License-Identifier: Apache-2.0 +#include "opencv2/slam/visual_odometry.hpp" +#include "opencv2/slam/initializer.hpp" +#include "opencv2/slam/localizer.hpp" +#include "opencv2/slam/pose.hpp" +#include "opencv2/slam/map.hpp" +#include "opencv2/slam/keyframe.hpp" +#include +#include +#include +#include +#include + +namespace cv { +namespace vo { + +class VisualOdometry::Impl { +public: + Impl(Ptr det, Ptr matcher) + : detector_(std::move(det)), matcher_(std::move(matcher)) { + if(!detector_) detector_ = ORB::create(2000); + if(!matcher_) matcher_ = BFMatcher::create(NORM_HAMMING); + } + + TrackingResult track(InputArray frame, double timestamp, MapManager* map, bool allowMapping); + void setOptions(const VisualOdometryOptions& opts) { options_ = opts; } + VisualOdometryOptions getOptions() const { return options_; } + void setIntrinsics(double fx, double fy, double cx, double cy){ + fx_ = fx; fy_ = fy; cx_ = cx; cy_ = cy; hasIntrinsics_ = (fx_ > 1e-9 && fy_ > 1e-9); + } + void setScale(double s){ scale_ = std::max(1e-9, s); } + void reset(){ + prevGray_.release(); prevDesc_.release(); prevKps_.clear(); prevColor_.release(); + frameId_ = 0; state_ = TrackingState::NOT_INITIALIZED; + R_w_ = Mat::eye(3,3,CV_64F); t_w_ = Mat::zeros(3,1,CV_64F); + } + int getFrameId() const { return frameId_; } + int getState() const { return static_cast(state_); } + void getPose(OutputArray R_out, OutputArray t_out) const { + if(R_out.needed()) R_w_.copyTo(R_out); + if(t_out.needed()) t_w_.copyTo(t_out); + } + +private: + Ptr detector_; + Ptr matcher_; + VisualOdometryOptions options_; + double fx_ = 0.0, fy_ = 0.0, cx_ = 0.0, cy_ = 0.0; + bool hasIntrinsics_ = false; + double scale_ = 1.0; + + int frameId_ = 0; + TrackingState state_ = TrackingState::NOT_INITIALIZED; + Mat R_w_ = Mat::eye(3,3,CV_64F); + Mat t_w_ = Mat::zeros(3,1,CV_64F); + + Mat prevGray_; + Mat prevDesc_; + std::vector prevKps_; + Mat prevColor_; + + Initializer initializer_; + PoseEstimator poseEst_; + Localizer localizer_{0.7f}; +}; + +static std::vector mutualRatioMatches(const Mat& desc1, const Mat& desc2, DescriptorMatcher& matcher, float ratio){ + std::vector> knn12, knn21; + matcher.knnMatch(desc1, desc2, knn12, 2); + matcher.knnMatch(desc2, desc1, knn21, 2); + std::vector good; + good.reserve(knn12.size()); + for(size_t qi = 0; qi < knn12.size(); ++qi){ + if(knn12[qi].empty()) continue; + const DMatch &best = knn12[qi][0]; + if(knn12[qi].size() >= 2){ + const DMatch &second = knn12[qi][1]; + if(second.distance > 0.0f && best.distance / second.distance > ratio) continue; + } + int trainIdx = best.trainIdx; + if(trainIdx < 0 || trainIdx >= (int)knn21.size() || knn21[trainIdx].empty()) continue; + const DMatch &rbest = knn21[trainIdx][0]; + if(rbest.trainIdx == (int)qi) good.push_back(best); + } + return good; +} + +TrackingResult VisualOdometry::Impl::track(InputArray frameIn, double timestamp, MapManager* map, bool allowMapping){ + TrackingResult res; res.frameId = frameId_; res.timestamp = timestamp; res.state = static_cast(state_); + + if(!hasIntrinsics_){ + res.state = static_cast(state_); + return res; + } + + Mat frame = frameIn.getMat(); + Mat color = frame.channels() == 1 ? Mat() : frame; + if(color.empty()){ cvtColor(frame, color, COLOR_GRAY2BGR); } + Mat gray = frame; + if(gray.channels() > 1) cvtColor(gray, gray, COLOR_BGR2GRAY); + + std::vector kps; Mat desc; + detector_->detect(gray, kps); + detector_->compute(gray, kps, desc); + + if(prevGray_.empty() || prevDesc_.empty()){ + prevGray_ = gray.clone(); prevDesc_ = desc.clone(); prevKps_ = kps; prevColor_ = color.clone(); + state_ = TrackingState::INITIALIZING; + res.state = static_cast(state_); + frameId_++; + return res; + } + + if(desc.empty() || prevDesc_.empty()){ + state_ = TrackingState::LOST; + res.state = static_cast(state_); + prevGray_ = gray.clone(); prevDesc_ = desc.clone(); prevKps_ = kps; prevColor_ = color.clone(); + frameId_++; + return res; + } + + auto matches = mutualRatioMatches(prevDesc_, desc, *matcher_, 0.75f); + std::vector pts1, pts2; + pts1.reserve(matches.size()); pts2.reserve(matches.size()); + for(const auto &m: matches){ + pts1.push_back(prevKps_[m.queryIdx].pt); + pts2.push_back(kps[m.trainIdx].pt); + } + + double median_flow = 0.0; + if(!pts1.empty()){ + std::vector flows; flows.reserve(pts1.size()); + for(size_t i=0;i::infinity(); + if(!map->keyframes().empty()){ + ok_pnp = localizer_.tryPnP(*map, desc, kps, fx_, fy_, cx_, cy_, gray.cols, gray.rows, + options_.minInliers, R_pnp, t_pnp, inliers_pnp, frameId_, nullptr, "", + nullptr, &postMatches, &meanReproj); + if(ok_pnp){ + if(postMatches > 0 && inliers_pnp < static_cast(postMatches * options_.minInlierRatio)) ok_pnp = false; + if(meanReproj > 5.0) ok_pnp = false; // loose but guards against bad PnP + } + } + if(ok_pnp){ + R_pnp.convertTo(R_w_, CV_64F); + t_pnp.convertTo(t_w_, CV_64F); + state_ = TrackingState::TRACKING; + res.ok = true; + res.R_w = R_w_.clone(); + res.t_w = t_w_.clone(); + res.numMatches = 0; + res.numInliers = inliers_pnp; + } else { + state_ = TrackingState::LOST; + } + res.state = static_cast(state_); + prevGray_ = gray.clone(); prevDesc_ = desc.clone(); prevKps_ = kps; prevColor_ = color.clone(); + frameId_++; + return res; + } + + // try two-view initialization if map is empty and this is second frame + if(map && allowMapping && map->keyframes().empty() && frameId_ == 1){ + Mat R_init, t_init; std::vector pts3D; std::vector isTri; + bool okInit = initializer_.initialize(prevKps_, kps, matches, fx_, fy_, cx_, cy_, R_init, t_init, pts3D, isTri); + if(okInit){ + Mat prevImg = prevColor_.empty() ? prevGray_ : prevColor_; + KeyFrame kf0(frameId_ - 1, prevImg, prevKps_, prevDesc_, Mat::eye(3,3,CV_64F), Mat::zeros(3,1,CV_64F)); + Mat Rwc1 = R_init.t(); + Mat Cw1 = (-Rwc1 * t_init) * scale_; + KeyFrame kf1(frameId_, color, kps, desc, Rwc1, Cw1); + + std::vector newMps; newMps.reserve(pts3D.size()); + for(size_t i=0;iaddKeyFrame(kf0); + map->addKeyFrame(kf1); + if(!newMps.empty()) map->addMapPoints(newMps); + + R_w_ = kf1.R_w.clone(); + t_w_ = kf1.t_w.clone(); + state_ = TrackingState::TRACKING; + res.ok = true; res.state = static_cast(state_); + res.R_w = R_w_.clone(); res.t_w = t_w_.clone(); + res.keyframeInserted = true; res.numMatches = static_cast(matches.size()); + res.numInliers = static_cast(matches.size()); + prevGray_ = gray.clone(); prevDesc_ = desc.clone(); prevKps_ = kps; prevColor_ = color.clone(); + frameId_++; + return res; + } + } + + Mat R_est, t_est, mask_est; int inliers_est = 0; bool ok_est = false; + if(pts1.size() >= static_cast(std::max(8, options_.minMatches))){ + ok_est = poseEst_.estimate(pts1, pts2, fx_, fy_, cx_, cy_, R_est, t_est, mask_est, inliers_est); + } + + Mat R_pnp, t_pnp; int inliers_pnp = 0; bool ok_pnp = false; int pnpMatches = 0; double pnpMeanReproj = std::numeric_limits::infinity(); + if(map && !map->keyframes().empty()){ + ok_pnp = localizer_.tryPnP(*map, desc, kps, fx_, fy_, cx_, cy_, gray.cols, gray.rows, + options_.minInliers, R_pnp, t_pnp, inliers_pnp, frameId_, nullptr, "", + nullptr, &pnpMatches, &pnpMeanReproj); + if(ok_pnp){ + if(pnpMatches > 0 && inliers_pnp < static_cast(pnpMatches * options_.minInlierRatio)) ok_pnp = false; + if(pnpMeanReproj > 5.0) ok_pnp = false; + } + } + + Mat R_use, t_use; int inliers_use = 0; int matchCount = static_cast(matches.size()); + if(ok_est){ R_use = R_est; t_use = t_est; inliers_use = inliers_est; } + else if(ok_pnp){ R_use = R_pnp; t_use = t_pnp; inliers_use = inliers_pnp; } + + bool integrate = ok_est || ok_pnp; + if(integrate){ + if(inliers_use < options_.minInliers || matchCount < options_.minMatches) integrate = false; + if(ok_est){ + Mat t_d; t_use.convertTo(t_d, CV_64F); + Mat R_d; R_use.convertTo(R_d, CV_64F); + double t_norm = norm(t_d); + double trace = R_d.at(0,0) + R_d.at(1,1) + R_d.at(2,2); + double cos_angle = std::min(1.0, std::max(-1.0, (trace - 1.0) * 0.5)); + double rot_angle = std::acos(cos_angle); + if(t_norm < options_.minTranslationNorm && std::abs(rot_angle) < options_.minRotationRad && median_flow < options_.flowZeroThresh){ + integrate = false; + } + } + } + + if(integrate){ + if(ok_est){ + Mat R_d, t_d; R_use.convertTo(R_d, CV_64F); t_use.convertTo(t_d, CV_64F); + Mat C2_in_c1 = (-R_d.t() * t_d) * scale_; + t_w_ = t_w_ + R_w_ * C2_in_c1; + R_w_ = R_w_ * R_d.t(); + } else { + R_use.convertTo(R_w_, CV_64F); + t_use.convertTo(t_w_, CV_64F); + } + state_ = TrackingState::TRACKING; + res.ok = true; + res.R_w = R_w_.clone(); res.t_w = t_w_.clone(); + } else { + state_ = ok_est || ok_pnp ? TrackingState::INITIALIZING : TrackingState::LOST; + } + + res.state = static_cast(state_); + res.numMatches = matchCount; res.numInliers = inliers_use; + + // keyframe decision + if(map && allowMapping && res.ok){ + bool insertKf = false; + const auto &kfs = map->keyframes(); + const int minGap = std::max(1, options_.keyframeMinGap); + const int maxGap = std::max(minGap, options_.keyframeMaxGap); + const int lastKfId = kfs.empty() ? -100000 : kfs.back().id; + const int gap = frameId_ - lastKfId; + if(gap >= maxGap) insertKf = true; + + // simplistic parallax proxy based on median flow + if(!insertKf && gap >= minGap){ + if(median_flow >= options_.keyframeMinParallaxPx) insertKf = true; + } + + // require reasonable inlier ratio before inserting a keyframe + if(matchCount > 0 && inliers_use < static_cast(matchCount * options_.minInlierRatio)) insertKf = false; + + if(insertKf){ + KeyFrame kf(frameId_, color, kps, desc, R_w_, t_w_); + map->addKeyFrame(kf); + res.keyframeInserted = true; + + // triangulate with last two keyframes if possible + if(map->keyframes().size() >= 2){ + const KeyFrame &lastKf = map->keyframes()[map->keyframes().size() - 2]; + const KeyFrame &curKf = map->keyframes()[map->keyframes().size() - 1]; + + // Build normalized coordinate matches from last -> cur using descriptors + std::vector kfMatches = mutualRatioMatches(lastKf.desc, curKf.desc, *matcher_, 0.75f); + std::vector pts1n, pts2n; + std::vector idx1, idx2; + pts1n.reserve(kfMatches.size()); pts2n.reserve(kfMatches.size()); + idx1.reserve(kfMatches.size()); idx2.reserve(kfMatches.size()); + for(const auto &m : kfMatches){ + Point2f p1 = lastKf.kps[m.queryIdx].pt; + Point2f p2 = curKf.kps[m.trainIdx].pt; + pts1n.emplace_back((p1.x - cx_) / fx_, (p1.y - cy_) / fy_); + pts2n.emplace_back((p2.x - cx_) / fx_, (p2.y - cy_) / fy_); + idx1.push_back(m.queryIdx); + idx2.push_back(m.trainIdx); + } + auto newMps = map->triangulateBetweenLastTwo(pts1n, pts2n, idx1, idx2, lastKf, curKf, fx_, fy_, cx_, cy_); + if(!newMps.empty()) map->addMapPoints(newMps); + } + } + } + + prevGray_ = gray.clone(); prevDesc_ = desc.clone(); prevKps_ = kps; prevColor_ = color.clone(); + frameId_++; + return res; +} + +VisualOdometry::VisualOdometry(Ptr detector, Ptr matcher) + : impl_(std::make_unique(std::move(detector), std::move(matcher))) {} + +VisualOdometry::~VisualOdometry() = default; + +void VisualOdometry::setCameraIntrinsics(double fx, double fy, double cx, double cy){ impl_->setIntrinsics(fx, fy, cx, cy); } +void VisualOdometry::setWorldScale(double scale){ impl_->setScale(scale); } +void VisualOdometry::setOptions(const VisualOdometryOptions& options){ impl_->setOptions(options); } +VisualOdometryOptions VisualOdometry::getOptions() const { return impl_->getOptions(); } +void VisualOdometry::reset(){ impl_->reset(); } +TrackingResult VisualOdometry::track(InputArray frame, double timestamp){ return impl_->track(frame, timestamp, nullptr, true); } +TrackingResult VisualOdometry::track(InputArray frame, double timestamp, MapManager* map){ return impl_->track(frame, timestamp, map, true); } +TrackingResult VisualOdometry::track(InputArray frame, double timestamp, MapManager* map, bool allowMapping){ return impl_->track(frame, timestamp, map, allowMapping); } +int VisualOdometry::getState() const { return impl_->getState(); } +int VisualOdometry::getFrameId() const { return impl_->getFrameId(); } +void VisualOdometry::getCurrentPose(OutputArray R_out, OutputArray t_out) const { impl_->getPose(R_out, t_out); } + +} // namespace vo +} // namespace cv + diff --git a/modules/slam/src/visualizer.cpp b/modules/slam/src/visualizer.cpp index 7215bdc6a69..cc1f3947e9b 100644 --- a/modules/slam/src/visualizer.cpp +++ b/modules/slam/src/visualizer.cpp @@ -1,65 +1,11 @@ #include "opencv2/slam/visualizer.hpp" +#include "opencv2/slam/visual_odometry.hpp" #include -#include #include namespace cv { namespace vo { -Tracker::Tracker() - : feat_(), matcher_(), frame_id_(0) -{ -} - -bool Tracker::processFrame(const Mat &gray, const std::string & /*imagePath*/, Mat &imgOut, Mat & /*R_out*/, Mat & /*t_out*/, std::string &info) -{ - if(gray.empty()) return false; - // detect - std::vector kps; - Mat desc; - feat_.detectAndCompute(gray, kps, desc); - - if(!prevGray_.empty() && !prevDesc_.empty() && !desc.empty()){ - // match - std::vector goodMatches; - matcher_.knnMatch(prevDesc_, desc, goodMatches); - - // draw matches for visualization - drawMatches(prevGray_, prevKp_, gray, kps, goodMatches, imgOut, - Scalar::all(-1), Scalar::all(-1), std::vector(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS); - - // prepare points - std::vector pts1, pts2; - for(const auto &m: goodMatches){ - pts1.push_back(prevKp_[m.queryIdx].pt); - pts2.push_back(kps[m.trainIdx].pt); - } - - if(pts1.size() >= 8){ - Mat R, t, mask; - int inliers = 0; - // Note: we don't have intrinsics here; caller should provide via global or arguments. For now, caller will use PoseEstimator directly if needed. - // We'll estimate using default focal/pp later (caller will adapt). Return false for now so caller can invoke PoseEstimator separately. - // But to keep compatibility, leave R_out/t_out empty and set info. - info = "matches=" + std::to_string(goodMatches.size()) + ", inliers=" + std::to_string(inliers); - // update prev buffers below - } else { - info = "matches=" + std::to_string(goodMatches.size()) + ", inliers=0"; - } - } else { - // first frame: draw keypoints - drawKeypoints(gray, kps, imgOut, Scalar::all(-1), DrawMatchesFlags::DRAW_RICH_KEYPOINTS); - info = "first_frame"; - } - - // update prev - prevGray_ = gray.clone(); - prevKp_ = kps; - prevDesc_ = desc.clone(); - frame_id_++; - return true; -} - Visualizer::Visualizer(int W, int H, double meters_per_pixel) : meters_per_pixel_(meters_per_pixel), mapSize_(W,H) { @@ -74,7 +20,7 @@ Point Visualizer::worldToPixel(const Point2d &p) const { } void Visualizer::addPose(double x, double z){ - traj_.emplace_back(x,z); + traj_.emplace_back(x,-z); } void Visualizer::clearTrajectory(){ @@ -91,6 +37,16 @@ void Visualizer::showFrame(const Mat &frame){ imshow("frame", frame); } +void Visualizer::drawTrackingInfo(Mat &frame, const TrackingResult &res){ + if(frame.empty()) return; + std::string info = res.ok ? "TRACKING" : "NOT TRACKING"; + info += " matches=" + std::to_string(res.numMatches); + info += " inliers=" + std::to_string(res.numInliers); + info += " state=" + std::to_string(res.state); + Scalar color = res.ok ? Scalar(0, 255, 0) : Scalar(0, 0, 255); + putText(frame, info, Point(10, 30), FONT_HERSHEY_SIMPLEX, 0.7, color, 2, LINE_AA); +} + void Visualizer::showTopdown(){ map_ = Mat::zeros(mapSize_, CV_8UC3); for (int gx = 0; gx < mapSize_.width; gx += 50) line(map_, Point(gx,0), Point(gx,mapSize_.height), Scalar(30,30,30), 1); diff --git a/modules/slam/src/vo.cpp b/modules/slam/src/vo.cpp deleted file mode 100644 index 98632eb1e49..00000000000 --- a/modules/slam/src/vo.cpp +++ /dev/null @@ -1,773 +0,0 @@ -#include "opencv2/slam/data_loader.hpp" -#include "opencv2/slam/feature.hpp" -#include "opencv2/slam/initializer.hpp" -#include "opencv2/slam/keyframe.hpp" -#include "opencv2/slam/localizer.hpp" -#include "opencv2/slam/map.hpp" -#include "opencv2/slam/matcher.hpp" -#include "opencv2/slam/optimizer.hpp" -#include "opencv2/slam/pose.hpp" -#include "opencv2/slam/visualizer.hpp" -#include "opencv2/slam/vo.hpp" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace cv { -namespace vo { - -VisualOdometry::VisualOdometry(Ptr feature, Ptr matcher) - : feature_(std::move(feature)), matcher_(std::move(matcher)) { -} - -int VisualOdometry::run(const std::string &imageDir, double scaleM){ - return run(imageDir, scaleM, options_); -} - -void VisualOdometry::setEnableBackend(bool enable){ - options_.enableBackend = enable; -} - -void VisualOdometry::setBackendWindow(int window){ - options_.backendWindow = std::max(1, window); -} - -void VisualOdometry::setBackendIterations(int iterations){ - options_.backendIterations = std::max(1, iterations); -} - -int VisualOdometry::run(const std::string &imageDir, double scaleM, const VisualOdometryOptions &options){ - const auto prevLogLevel = cv::utils::logging::getLogLevel(); - // Keep global OpenCV logs at WARNING to avoid noisy INFO logs from OpenCL/UI. - // Module-specific verbosity is handled explicitly. - cv::utils::logging::setLogLevel(cv::utils::logging::LOG_LEVEL_WARNING); - - DataLoader loader(imageDir); - if(loader.size() == 0){ - CV_LOG_WARNING(NULL, "VisualOdometry: no images found"); - cv::utils::logging::setLogLevel(prevLogLevel); - return -1; - } - - if(!feature_){ - feature_ = ORB::create(2000); - } - if(!matcher_){ - matcher_ = DescriptorMatcher::create(DescriptorMatcher::BRUTEFORCE_HAMMING); - } - PoseEstimator poseEst; - // Keep the topdown visualization scale consistent with the world scaling applied by scaleM. - // When scaleM is large (e.g. 1.0), a larger meters_per_pixel zooms out to keep the trajectory visible. - // When scaleM is small (e.g. 0.02), it zooms in accordingly. - Visualizer vis(1000, 800, std::max(1e-9, scaleM)); - MapManager map; - Initializer initializer; - Localizer localizer(0.7f); - - // Configure map intrinsics and maintenance thresholds (milestone 2/3) - map.setCameraIntrinsics(loader.fx(), loader.fy(), loader.cx(), loader.cy()); - map.setMapPointCullingParams(options.mapMinObservations, - options.mapMinFoundRatio, - options.mapMaxReprojErrorPx, - options.mapMaxPointsKeep); - map.setRedundantKeyframeCullingParams(options.redundantKeyframeMinObs, - options.redundantKeyframeMinPointObs, - options.redundantKeyframeRatio); - - // prepare per-run diagnostics folder - std::string runTimestamp; - auto now = std::chrono::system_clock::now(); - std::time_t now_time_t = std::chrono::system_clock::to_time_t(now); - std::tm tm = *std::localtime(&now_time_t); - std::ostringstream ss; ss << std::put_time(&tm, "%Y%m%d_%H%M%S"); - runTimestamp = ss.str(); - std::string resultDirStr = imageDir; - if(resultDirStr.empty()) resultDirStr = std::string("."); - if(resultDirStr.back() == '/') resultDirStr.pop_back(); - resultDirStr += "/result"; - ensureDirectoryExists(resultDirStr); - std::string runDirStr = resultDirStr + "/" + runTimestamp; - ensureDirectoryExists(runDirStr); - - Mat R_g = Mat::eye(3,3,CV_64F); - Mat t_g = Mat::zeros(3,1,CV_64F); - - // simple map structures - std::vector keyframes; - std::vector mappoints; - std::unordered_map keyframeIdToIndex; - - // Trajectory recording: store timestamp + pose for each keyframe - struct TrajectoryEntry { - int frame_id; - double timestamp; - Mat R_w; - Mat t_w; - }; - std::vector trajectory; - - // Backend (BA) thread primitives - std::mutex mapMutex; // protects map and keyframe modifications and writeback - std::condition_variable backendCv; - std::atomic backendStop(false); - std::atomic backendRequests(0); - bool backendBusy = false; // guarded by mapMutex - // Expand BA window to optimize meaningful local map (not just last 5 frames) - const int LOCAL_BA_WINDOW = std::max(60, options.backendWindow); - - // Start backend thread only if enabled - std::thread backendThread; - if(options.enableBackend){ - backendThread = std::thread([&]() { - while(!backendStop.load()){ - std::unique_lock lk(mapMutex); - backendCv.wait(lk, [&]{ return backendStop.load() || backendRequests.load() > 0; }); - if(backendStop.load()) break; - // snapshot map and keyframes - auto kfs_snapshot = map.keyframes(); - auto mps_snapshot = map.mappoints(); - // reset requests and mark busy - backendRequests.store(0); - backendBusy = true; - lk.unlock(); - - // determine local window using covisibility with the latest keyframe - int K = static_cast(kfs_snapshot.size()); - if(K <= 0) continue; - const int lastIdx = K - 1; - const int lastId = kfs_snapshot[lastIdx].id; - - std::unordered_map idToIdx; - idToIdx.reserve(static_cast(K)); - for(int i = 0; i < K; ++i) idToIdx[kfs_snapshot[i].id] = i; - - std::unordered_map sharedCount; - sharedCount.reserve(mps_snapshot.size()); - for(const auto &mp : mps_snapshot){ - bool hasLast = false; - for(const auto &obs : mp.observations){ - if(obs.first == lastId){ hasLast = true; break; } - } - if(!hasLast) continue; - for(const auto &obs : mp.observations){ - if(obs.first == lastId) continue; - auto it = idToIdx.find(obs.first); - if(it == idToIdx.end()) continue; - sharedCount[it->second] += 1; - } - } - - struct ScoredKf { int score; int idx; }; - std::vector scored; - scored.reserve(sharedCount.size()); - for(const auto &kv : sharedCount){ - scored.push_back({kv.second, kv.first}); - } - std::sort(scored.begin(), scored.end(), [](const ScoredKf &a, const ScoredKf &b){ return a.score > b.score; }); - - std::vector localKfIndices; - localKfIndices.reserve(static_cast(LOCAL_BA_WINDOW)); - auto push_unique = [&](int idx){ - if(idx < 0 || idx >= K) return; - if(idx == 0 || idx == 1) return; - if(std::find(localKfIndices.begin(), localKfIndices.end(), idx) != localKfIndices.end()) return; - localKfIndices.push_back(idx); - }; - push_unique(lastIdx); - for(const auto &s : scored){ - if(static_cast(localKfIndices.size()) >= LOCAL_BA_WINDOW) break; - push_unique(s.idx); - } - if(localKfIndices.empty()) localKfIndices.push_back(lastIdx); - - std::vector fixedKfIndices; - fixedKfIndices.push_back(0); - if(K > 1) fixedKfIndices.push_back(1); - #if defined(HAVE_SFM) - CV_LOG_DEBUG(NULL, "Backend: Running BA"); - Optimizer::localBundleAdjustmentSFM(kfs_snapshot, mps_snapshot, localKfIndices, fixedKfIndices, - loader.fx(), loader.fy(), loader.cx(), loader.cy(), options.backendIterations); - CV_LOG_DEBUG(NULL, "Backend: BA completed"); - #else - CV_LOG_DEBUG(NULL, "Backend: HAVE_SFM not defined, BA skipped"); - #endif - // write back optimized poses/points into main map under lock using id-based lookup - std::lock_guard lk2(mapMutex); - auto &kfs_ref = const_cast&>(map.keyframes()); - auto &mps_ref = const_cast&>(map.mappoints()); - // copy back poses by id to ensure we update the authoritative containers - for(const auto &kf_opt : kfs_snapshot){ - int idx = map.keyframeIndex(kf_opt.id); - if(idx >= 0 && idx < static_cast(kfs_ref.size())){ - kfs_ref[idx].R_w = kf_opt.R_w.clone(); - kfs_ref[idx].t_w = kf_opt.t_w.clone(); - } - } - // copy back mappoint positions by id - for(const auto &mp_opt : mps_snapshot){ - if(mp_opt.id <= 0) continue; - int idx = map.mapPointIndex(mp_opt.id); - if(idx >= 0 && idx < static_cast(mps_ref.size())){ - mps_ref[idx].p = mp_opt.p; - } - } - backendBusy = false; - backendCv.notify_all(); - } - }); - } - - Mat frame; - std::string imgPath; - int frame_id = 0; - - // persistent previous-frame storage (declare outside loop so detectAndCompute can use them) - static std::vector prevKp; - static Mat prevGray, prevDesc; - - // Helper lambda to extract timestamp from EuRoC image filename (nanoseconds) - auto extractTimestamp = [](const std::string &path, int fallbackId) -> double { - try { - std::string fname = path; - auto slash = fname.find_last_of("/\\"); - if(slash != std::string::npos) fname = fname.substr(slash + 1); - auto dot = fname.find_last_of('.'); - if(dot != std::string::npos) fname = fname.substr(0, dot); - if(!fname.empty()){ - double ts = std::stod(fname); - if(ts > 1e12) ts *= 1e-9; // likely nanoseconds -> seconds - else if(ts > 1e9) ts *= 1e-6; // likely microseconds -> seconds - return ts; - } - } catch(const std::exception &){} - return static_cast(fallbackId); - }; - - while(loader.getNextImage(frame, imgPath)){ - Mat gray = frame; - if(gray.channels() > 1) cvtColor(gray, gray, COLOR_BGR2GRAY); - - std::vector kps; - Mat desc; - // Detect and compute descriptors using injected feature_ (e.g., ORB) - feature_->detect(gray, kps); - feature_->compute(gray, kps, desc); - - // (previous-frame storage declared outside loop) - - if(!prevGray.empty() && !prevDesc.empty() && !desc.empty()){ - // KNN match with ratio test and mutual cross-check - std::vector> knn12, knn21; - matcher_->knnMatch(prevDesc, desc, knn12, 2); - matcher_->knnMatch(desc, prevDesc, knn21, 2); - // ratio test + mutual check (prev->curr) - std::vector goodMatches; - goodMatches.reserve(knn12.size()); - const float ratio = 0.75f; - for(size_t qi = 0; qi < knn12.size(); ++qi){ - if(knn12[qi].empty()) continue; - const DMatch &best = knn12[qi][0]; - if(knn12[qi].size() >= 2){ - const DMatch &second = knn12[qi][1]; - if(second.distance > 0.0f && best.distance / second.distance > ratio) continue; - } - int trainIdx = best.trainIdx; - if(trainIdx < 0 || trainIdx >= (int)knn21.size() || knn21[trainIdx].empty()) continue; - const DMatch &rbest = knn21[trainIdx][0]; - if(rbest.trainIdx == (int)qi) goodMatches.push_back(best); - } - - Mat imgMatches; - drawMatches(prevGray, prevKp, gray, kps, goodMatches, imgMatches, - Scalar::all(-1), Scalar::all(-1), std::vector(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS); - - std::vector pts1, pts2; - pts1.reserve(goodMatches.size()); pts2.reserve(goodMatches.size()); - for(const auto &m: goodMatches){ - pts1.push_back(prevKp[m.queryIdx].pt); - pts2.push_back(kps[m.trainIdx].pt); - } - - // quick frame-diff to detect near-static frames - double meanDiff = 0.0; - if(!prevGray.empty()){ - Mat diff; absdiff(gray, prevGray, diff); - meanDiff = mean(diff)[0]; - } - - // compute per-match flow magnitudes and median flow - std::vector flows; flows.reserve(pts1.size()); - double median_flow = 0.0; - for(size_t i=0;i tmp = flows; - size_t mid = tmp.size()/2; - std::nth_element(tmp.begin(), tmp.begin()+mid, tmp.end()); - median_flow = tmp[mid]; - } - - int pre_matches = static_cast(goodMatches.size()); - int post_matches = pre_matches; - - // Two-view initialization: if map is empty and this is the second frame, try to bootstrap - if(map.keyframes().empty() && frame_id == 1){ - Mat R_init, t_init; - std::vector pts3D; - std::vector isTri; - // call initializer using the matched keypoints between prev and current - if(initializer.initialize(prevKp, kps, goodMatches, loader.fx(), loader.fy(), loader.cx(), loader.cy(), R_init, t_init, pts3D, isTri)){ - // build two keyframes: previous (id = frame_id-1) and current (id = frame_id) - Mat prevImg; - if(!prevGray.empty()){ - if(prevGray.channels() == 1){ cvtColor(prevGray, prevImg, COLOR_GRAY2BGR); } - else prevImg = prevGray.clone(); - } - KeyFrame kf0(frame_id - 1, prevImg, prevKp, prevDesc, Mat::eye(3,3,CV_64F), Mat::zeros(3,1,CV_64F)); - // recoverPose returns R,t such that x2 = R*x1 + t (cam1 -> cam2). - // Our map uses camera->world rotation (R_w) and camera center in world (C_w). - Mat Rwc1 = R_init.t(); - Mat Cw1 = (-Rwc1 * t_init) * scaleM; - KeyFrame kf1(frame_id, frame, kps, desc, Rwc1, Cw1); - - // convert initializer 3D points (in first camera frame) to MapPoints in world coords (world==first) - std::vector newMps; - newMps.reserve(pts3D.size()); - for(size_t i=0;i lk(mapMutex); - keyframes.push_back(std::move(kf0)); - map.addKeyFrame(keyframes.back()); - keyframes.push_back(std::move(kf1)); - map.addKeyFrame(keyframes.back()); - if(!newMps.empty()) map.addMapPoints(newMps); - } - - // set global pose to second keyframe - Mat t_d; kf1.t_w.convertTo(t_d, CV_64F); - t_g = t_d; - R_g = kf1.R_w.clone(); - double x = t_g.at(0); - double z = t_g.at(2); - vis.addPose(x, z); - - // Record initial trajectory entries - double ts0 = extractTimestamp(imgPath, frame_id - 1); - double ts1 = extractTimestamp(imgPath, frame_id); - trajectory.push_back({frame_id - 1, ts0, Mat::eye(3,3,CV_64F), Mat::zeros(3,1,CV_64F)}); - trajectory.push_back({frame_id, ts1, R_g.clone(), t_g.clone()}); - - // notify backend to run BA on initial map - backendRequests.fetch_add(1); - backendCv.notify_one(); - // skip the usual PnP / poseEst path for this frame since we've initialized - prevGray = gray.clone(); prevKp = kps; prevDesc = desc.clone(); - frame_id++; - continue; - } - } - - Mat R_est, t_est, mask_est; - int inliers_est = 0; - bool ok_est = false; - bool ok_pnp = false; - Mat R_pnp, t_pnp; int inliers_pnp = 0; - int preMatches_pnp = 0, postMatches_pnp = 0; double meanReproj_pnp = 0.0; - Mat R_use, t_use, mask_use; - int inliers = 0; - int matchCount = post_matches; - bool integrate = false; - - if(pts1.size() >= 8){ - ok_est = poseEst.estimate(pts1, pts2, loader.fx(), loader.fy(), loader.cx(), loader.cy(), R_est, t_est, mask_est, inliers_est); - } - if(!ok_est){ - ok_pnp = localizer.tryPnP(map, desc, kps, loader.fx(), loader.fy(), loader.cx(), loader.cy(), gray.cols, gray.rows, - options.minInliers, R_pnp, t_pnp, inliers_pnp, frame_id, &frame, runDirStr, - &preMatches_pnp, &postMatches_pnp, &meanReproj_pnp); - (void)preMatches_pnp; (void)postMatches_pnp; (void)meanReproj_pnp; - } - if(ok_est || ok_pnp){ - R_use = ok_est ? R_est : R_pnp; - t_use = ok_est ? t_est : t_pnp; - mask_use = ok_est ? mask_est : Mat(); - inliers = ok_est ? inliers_est : inliers_pnp; - matchCount = post_matches; - - integrate = true; - if(inliers < options.minInliers || matchCount < options.minMatches){ - integrate = false; - } - if(ok_est){ - double t_norm = 0.0, rot_angle = 0.0; - Mat t_d; t_est.convertTo(t_d, CV_64F); - t_norm = norm(t_d); - Mat R_d; R_est.convertTo(R_d, CV_64F); - double trace = R_d.at(0,0) + R_d.at(1,1) + R_d.at(2,2); - double cos_angle = std::min(1.0, std::max(-1.0, (trace - 1.0) * 0.5)); - rot_angle = std::acos(cos_angle); - if(t_norm < options.minTranslationNorm && std::abs(rot_angle) < options.minRotationRad - && meanDiff < options.diffZeroThresh && median_flow < options.flowZeroThresh){ - integrate = false; - } - } - if (inliers >= options.minInliers || (inliers >= 2 && matchCount > 50 && median_flow > 2.0)) { - integrate = true; - } - - if(integrate){ - // Pose convention: R_g camera->world, t_g camera center in world. - if(ok_est){ - Mat R_d, t_d; - R_use.convertTo(R_d, CV_64F); - t_use.convertTo(t_d, CV_64F); - Mat C2_in_c1 = (-R_d.t() * t_d) * scaleM; - t_g = t_g + R_g * C2_in_c1; - R_g = R_g * R_d.t(); - } else { - Mat R_abs, C_abs; - R_use.convertTo(R_abs, CV_64F); - t_use.convertTo(C_abs, CV_64F); - R_g = R_abs; - t_g = C_abs; - } - double x = t_g.at(0); - double z = t_g.at(2); - vis.addPose(x, z); - - // Log per-frame pose for timestamps. - double ts = extractTimestamp(imgPath, frame_id); - trajectory.push_back({frame_id, ts, R_g.clone(), t_g.clone()}); - - // Keyframe insertion gating - bool insertKeyframe = false; - const int minGap = std::max(1, options.keyframeMinGap); - const int maxGap = std::max(minGap, options.keyframeMaxGap); - if(map.keyframes().empty()){ - insertKeyframe = true; - } else { - const int lastKfId = map.keyframes().back().id; - const int gap = frame_id - lastKfId; - if(gap >= maxGap) insertKeyframe = true; - else if(gap >= minGap && median_flow >= options.keyframeMinParallaxPx) insertKeyframe = true; - } - - if(insertKeyframe){ - KeyFrame kf(frame_id, frame, kps, desc, R_g, t_g); - - // Triangulate against last KF - bool didTriangulate = false; - std::vector newPts; - if(!map.keyframes().empty() && !map.keyframes().back().desc.empty() && !kf.desc.empty()){ - const KeyFrame &last = map.keyframes().back(); - - // Match last KF -> current KF - std::vector> knn_kf12, knn_kf21; - matcher_->knnMatch(last.desc, kf.desc, knn_kf12, 2); - matcher_->knnMatch(kf.desc, last.desc, knn_kf21, 2); - std::vector kfMatches; - kfMatches.reserve(knn_kf12.size()); - const float ratio_kf = 0.75f; - for(size_t qi = 0; qi < knn_kf12.size(); ++qi){ - if(knn_kf12[qi].empty()) continue; - const DMatch &best = knn_kf12[qi][0]; - if(knn_kf12[qi].size() >= 2){ - const DMatch &second = knn_kf12[qi][1]; - if(second.distance > 0.0f && best.distance / second.distance > ratio_kf) continue; - } - int trainIdx = best.trainIdx; - if(trainIdx < 0 || trainIdx >= (int)knn_kf21.size() || knn_kf21[trainIdx].empty()) continue; - const DMatch &rbest = knn_kf21[trainIdx][0]; - if(rbest.trainIdx == (int)qi) kfMatches.push_back(best); - } - - if(static_cast(kfMatches.size()) > options.maxMatchesKeep){ - std::nth_element(kfMatches.begin(), kfMatches.begin() + options.maxMatchesKeep, kfMatches.end(), - [](const DMatch &a, const DMatch &b){ return a.distance < b.distance; }); - kfMatches.resize(options.maxMatchesKeep); - } - - if(!kfMatches.empty()){ - std::vector pts1n, pts2n; - pts1n.reserve(kfMatches.size()); - pts2n.reserve(kfMatches.size()); - std::vector pts1_kp_idx; pts1_kp_idx.reserve(kfMatches.size()); - std::vector pts2_kp_idx; pts2_kp_idx.reserve(kfMatches.size()); - double fx = loader.fx(), fy = loader.fy(), cx = loader.cx(), cy = loader.cy(); - for(const auto &m : kfMatches){ - const Point2f &p1 = last.kps[m.queryIdx].pt; - const Point2f &p2 = kf.kps[m.trainIdx].pt; - pts1n.emplace_back(float((p1.x - cx)/fx), float((p1.y - cy)/fy)); - pts2n.emplace_back(float((p2.x - cx)/fx), float((p2.y - cy)/fy)); - pts1_kp_idx.push_back(m.queryIdx); - pts2_kp_idx.push_back(m.trainIdx); - } - newPts = map.triangulateBetweenLastTwo(pts1n, pts2n, pts1_kp_idx, pts2_kp_idx, last, kf, fx, fy, cx, cy); - if(!newPts.empty()) didTriangulate = true; - } - } - - // insert keyframe and new points under lock - { - std::lock_guard lk(mapMutex); - keyframes.push_back(std::move(kf)); - map.addKeyFrame(keyframes.back()); - if(didTriangulate) map.addMapPoints(newPts); - - if(options.enableMapMaintenance){ - const int interval = std::max(1, options.maintenanceInterval); - if(static_cast(map.keyframes().size()) % interval == 0){ - map.cullBadMapPoints(); - map.cullRedundantKeyFrames(std::max(1, options.maxKeyframeCullsPerMaintenance)); - auto &mps = map.mappointsMutable(); - for(auto &mp : mps){ - if(mp.isBad) continue; - map.updateMapPointDescriptor(mp); - } - } - } - // Notify backend thread to run local BA asynchronously every N inserted keyframes - if(options.enableBackend){ - const int interval = std::max(1, options.backendTriggerInterval); - if(static_cast(map.keyframes().size()) % interval == 0){ - backendRequests.fetch_add(1); - backendCv.notify_one(); - } - } - } - } - } - } - - // Always show a single image; if we have matches, draw small boxes around matched keypoints - Mat visImg; - if(frame.channels() > 1) visImg = frame.clone(); - else cvtColor(gray, visImg, COLOR_GRAY2BGR); - std::string info = std::string("Frame ") + std::to_string(frame_id) + " matches=" + std::to_string(matchCount) + " inliers=" + std::to_string(inliers); - if(!goodMatches.empty()){ - for(size_t mi=0; mi(goodMatches.size())){ - isInlier = mask_use.at(static_cast(mi), 0) != 0; - } else if(mask_use.cols == static_cast(goodMatches.size())){ - isInlier = mask_use.at(0, static_cast(mi)) != 0; - } - } - Scalar col = isInlier ? Scalar(0,255,0) : Scalar(0,0,255); - Point ip(cvRound(p2.x), cvRound(p2.y)); - Rect r(ip - Point(4,4), Size(8,8)); - rectangle(visImg, r, col, 2, LINE_AA); - } - } - putText(visImg, info, Point(10,20), FONT_HERSHEY_SIMPLEX, 0.6, Scalar(0,255,0), 2); - vis.showFrame(visImg); - } else { - Mat visFrame; drawKeypoints(gray, kps, visFrame, Scalar::all(-1), DrawMatchesFlags::DRAW_RICH_KEYPOINTS); - vis.showFrame(visFrame); - } - - vis.showTopdown(); - // update prev - prevGray = gray.clone(); prevKp = kps; prevDesc = desc.clone(); - frame_id++; - char key = (char)waitKey(1); - if(key == 27) break; - } - - // Trigger final BA before saving trajectory and wait synchronously - if(options.enableBackend){ - CV_LOG_WARNING(NULL, "Triggering final backend optimization before saving trajectory..."); - { - std::unique_lock lk(mapMutex); - backendRequests.fetch_add(1); - backendCv.notify_one(); - backendCv.wait(lk, [&]{ return backendRequests.load() == 0 && !backendBusy; }); - } - - // Run a final full-map BA synchronously (larger than sliding window) - std::vector kfs_snapshot; - std::vector mps_snapshot; - { - std::lock_guard lk(mapMutex); - kfs_snapshot = map.keyframes(); - mps_snapshot = map.mappoints(); - } - int K = static_cast(kfs_snapshot.size()); - if(K > 0){ - std::vector allIdx(K); - std::iota(allIdx.begin(), allIdx.end(), 0); - std::vector fixed; - fixed.push_back(0); - if(K > 1) fixed.push_back(1); - #if defined(HAVE_SFM) - CV_LOG_WARNING(NULL, "Final BA: optimizing all keyframes"); - Optimizer::localBundleAdjustmentSFM(kfs_snapshot, mps_snapshot, allIdx, fixed, - loader.fx(), loader.fy(), loader.cx(), loader.cy(), std::max(20, options.backendIterations * 2)); - CV_LOG_WARNING(NULL, "Final BA: completed"); - { - std::lock_guard lk(mapMutex); - auto &kfs_ref = const_cast&>(map.keyframes()); - auto &mps_ref = const_cast&>(map.mappoints()); - for(const auto &kf_opt : kfs_snapshot){ - int idx = map.keyframeIndex(kf_opt.id); - if(idx >= 0 && idx < static_cast(kfs_ref.size())){ - kfs_ref[idx].R_w = kf_opt.R_w.clone(); - kfs_ref[idx].t_w = kf_opt.t_w.clone(); - } - } - for(const auto &mp_opt : mps_snapshot){ - if(mp_opt.id <= 0) continue; - int idx = map.mapPointIndex(mp_opt.id); - if(idx >= 0 && idx < static_cast(mps_ref.size())){ - mps_ref[idx].p = mp_opt.p; - } - } - } - #else - CV_LOG_WARNING(NULL, "Final BA: HAVE_SFM not defined, skipped"); - #endif - } - } - - // save trajectory image into result/ folder - try{ - // save trajectory into the per-run folder using a simple filename (no timestamp) - std::string outDir = resultDirStr + "/" + runTimestamp; - ensureDirectoryExists(outDir); - std::string outPath = outDir + "/trajectory.png"; - if(options.enableBackend && !map.keyframes().empty()){ - std::vector kfs = map.keyframes(); - std::sort(kfs.begin(), kfs.end(), [](const KeyFrame &a, const KeyFrame &b){ return a.id < b.id; }); - std::vector xz; - xz.reserve(kfs.size()); - for(const auto &kf : kfs){ - if(kf.t_w.empty()) continue; - xz.emplace_back(kf.t_w.at(0,0), kf.t_w.at(2,0)); - } - vis.setTrajectoryXZ(xz); - } - (void)vis.saveTrajectory(outPath); - } catch(const std::exception &e){ - CV_LOG_ERROR(NULL, cv::format("Error saving trajectory: %s", e.what())); - } - - // Write trajectory to TUM-format CSV file - // Extract final optimized poses from map keyframes (after backend BA) - std::vector finalTrajectory; - if(options.enableBackend && !map.keyframes().empty()){ - // Build map from frame_id to timestamp - std::unordered_map idToTimestamp; - for(const auto &trajEntry : trajectory){ - idToTimestamp[trajEntry.frame_id] = trajEntry.timestamp; - } - for(const auto &kf : map.keyframes()){ - if(kf.t_w.empty() || kf.R_w.empty()) continue; - double ts = idToTimestamp.count(kf.id) ? idToTimestamp[kf.id] : static_cast(kf.id); - finalTrajectory.push_back({kf.id, ts, kf.R_w.clone(), kf.t_w.clone()}); - } - if(options.verbose) CV_LOG_WARNING(NULL, "Backend enabled: using optimized poses"); - } else { - finalTrajectory = trajectory; // use front-end trajectory if backend disabled - if(options.verbose) CV_LOG_WARNING(NULL, "Backend disabled: using front-end poses"); - } - - try{ - std::string csvPath = resultDirStr + "/" + runTimestamp + "/trajectory_tum.csv"; - std::ofstream ofs(csvPath); - if(!ofs.is_open()){ - CV_LOG_ERROR(NULL, cv::format("Failed to open trajectory CSV: %s", csvPath.c_str())); - } else { - ofs << "# timestamp,tx,ty,tz,qx,qy,qz,qw" << std::endl; - for(const auto &entry : finalTrajectory){ - if(entry.R_w.empty() || entry.t_w.empty()) continue; - Mat Rmat, tvec_w; - entry.R_w.convertTo(Rmat, CV_64F); - entry.t_w.convertTo(tvec_w, CV_64F); - // Convert rotation matrix to quaternion - cv::Matx33d Rm; - Rmat.copyTo(Rm); - double tr = Rm(0,0) + Rm(1,1) + Rm(2,2); - double qw, qx, qy, qz; - if(tr > 0){ - double S = std::sqrt(tr + 1.0) * 2.0; - qw = 0.25 * S; - qx = (Rm(2,1) - Rm(1,2)) / S; - qy = (Rm(0,2) - Rm(2,0)) / S; - qz = (Rm(1,0) - Rm(0,1)) / S; - } else if((Rm(0,0) > Rm(1,1)) && (Rm(0,0) > Rm(2,2))){ - double S = std::sqrt(1.0 + Rm(0,0) - Rm(1,1) - Rm(2,2)) * 2.0; - qw = (Rm(2,1) - Rm(1,2)) / S; - qx = 0.25 * S; - qy = (Rm(0,1) + Rm(1,0)) / S; - qz = (Rm(0,2) + Rm(2,0)) / S; - } else if(Rm(1,1) > Rm(2,2)){ - double S = std::sqrt(1.0 + Rm(1,1) - Rm(0,0) - Rm(2,2)) * 2.0; - qw = (Rm(0,2) - Rm(2,0)) / S; - qx = (Rm(0,1) + Rm(1,0)) / S; - qy = 0.25 * S; - qz = (Rm(1,2) + Rm(2,1)) / S; - } else { - double S = std::sqrt(1.0 + Rm(2,2) - Rm(0,0) - Rm(1,1)) * 2.0; - qw = (Rm(1,0) - Rm(0,1)) / S; - qx = (Rm(0,2) + Rm(2,0)) / S; - qy = (Rm(1,2) + Rm(2,1)) / S; - qz = 0.25 * S; - } - // Write in TUM format: timestamp,tx,ty,tz,qx,qy,qz,qw (integer nanoseconds) - long long ts_ns = static_cast(std::llround(entry.timestamp * 1e9)); - ofs << ts_ns << "," - << std::setprecision(9) << tvec_w.at(0,0) << "," << tvec_w.at(1,0) << "," << tvec_w.at(2,0) << "," - << qx << "," << qy << "," << qz << "," << qw << "\n"; - } - ofs.close(); - if(options.verbose) CV_LOG_WARNING(NULL, "Saved trajectory CSV"); - } - } catch(const std::exception &e){ - CV_LOG_ERROR(NULL, cv::format("Error writing trajectory CSV: %s", e.what())); - } - - // Shutdown backend thread gracefully - if(options.enableBackend){ - backendStop.store(true); - backendCv.notify_one(); - if(backendThread.joinable()) backendThread.join(); - } - - cv::utils::logging::setLogLevel(prevLogLevel); - return 0; -} - -} // namespace vo -} // namespace cv \ No newline at end of file From 00db4a740526a50f5ee20f02709e2620e3f9763d Mon Sep 17 00:00:00 2001 From: solovesonxi <2172449563@qq.com> Date: Thu, 15 Jan 2026 01:46:53 +0800 Subject: [PATCH 29/29] Adds loop-closure and pose-graph optimization (g2o/DBoW3), improves pose conversions/gauge handling and exports, and tightens map/visualizer robustness. --- modules/slam/CMakeLists.txt | 88 +++- .../slam/include/opencv2/slam/keyframe.hpp | 4 + modules/slam/include/opencv2/slam/map.hpp | 5 +- .../slam/include/opencv2/slam/optimizer.hpp | 20 + .../slam/include/opencv2/slam/slam_system.hpp | 4 + .../include/opencv2/slam/visual_odometry.hpp | 7 + .../slam/include/opencv2/slam/visualizer.hpp | 7 +- modules/slam/samples/run_vo_sample.cpp | 190 ++++++-- modules/slam/shim/opencv2/core/core.hpp | 13 + modules/slam/src/initializer.cpp | 39 +- modules/slam/src/map.cpp | 27 +- modules/slam/src/optimizer.cpp | 199 +++++++- modules/slam/src/slam_system.cpp | 425 +++++++++++++++++- modules/slam/src/visual_odometry.cpp | 73 ++- modules/slam/src/visualizer.cpp | 75 +++- 15 files changed, 1079 insertions(+), 97 deletions(-) create mode 100644 modules/slam/shim/opencv2/core/core.hpp diff --git a/modules/slam/CMakeLists.txt b/modules/slam/CMakeLists.txt index 3b86f319862..931fc7843e9 100644 --- a/modules/slam/CMakeLists.txt +++ b/modules/slam/CMakeLists.txt @@ -16,6 +16,10 @@ ocv_define_module(slam # Detect optional dependencies and expose preprocessor macros so sources # can use #if defined(HAVE_SFM) / #if defined(HAVE_G2O) if(TARGET ${the_module}) + # Build-only shim for 3rd-party libs that include legacy + # (DBoW3 does this). The real header errors out during OpenCV's own build. + target_include_directories(${the_module} BEFORE PRIVATE ${CMAKE_CURRENT_LIST_DIR}/shim) + # opencv_sfm is declared OPTIONAL above; if its target exists enable HAVE_SFM if(TARGET opencv_sfm) message(STATUS "slam: opencv_sfm target found — enabling HAVE_SFM") @@ -28,20 +32,88 @@ if(TARGET ${the_module}) message(STATUS "slam: opencv_sfm not found — HAVE_SFM disabled") endif() - # Try to detect g2o (header + library). This is a best-effort detection: we - # look for a common g2o header and try to find a library name. Users on - # Windows should install/build g2o and make it discoverable to CMake. + # Optional DBoW3 for BoW-based loop detection + find_package(DBoW3 QUIET) + if(DBoW3_FOUND) + set(_dbow3_ok FALSE) + if(TARGET DBoW3::DBoW3) + target_link_libraries(${the_module} PRIVATE DBoW3::DBoW3) + set(_dbow3_ok TRUE) + else() + # Some DBoW3 installs do not export an imported target. In that case do + # NOT try to synthesize an imported target via IMPORTED_LOCATION because + # DBoW3_LIBRARIES is often a LIST (e.g. OpenCV targets + DBoW3), which + # would generate invalid Makefile rules. + if(DBoW3_INCLUDE_DIRS) + target_include_directories(${the_module} PRIVATE ${DBoW3_INCLUDE_DIRS}) + elseif(DBoW3_INCLUDE_DIR) + target_include_directories(${the_module} PRIVATE ${DBoW3_INCLUDE_DIR}) + endif() + # Link only the DBoW3 library itself. Some legacy FindDBoW3 scripts set + # DBoW3_LIBRARIES to include OpenCV libraries (or even opencv_slam), which + # would create circular/self links in an OpenCV superbuild. + set(_dbow3_libs "") + if(DBoW3_LIBRARIES) + set(_dbow3_libs ${DBoW3_LIBRARIES}) + elseif(DBoW3_LIBRARY) + set(_dbow3_libs ${DBoW3_LIBRARY}) + endif() + if(_dbow3_libs) + # Prefer entries that look like the DBoW3 library. + set(_dbow3_only ${_dbow3_libs}) + list(FILTER _dbow3_only INCLUDE REGEX ".*[Dd][Bb][Oo][Ww]3.*") + if(_dbow3_only) + set(_dbow3_libs ${_dbow3_only}) + else() + # Otherwise, remove OpenCV libs/targets to avoid circular dependencies. + list(FILTER _dbow3_libs EXCLUDE REGEX "^(opencv_.*|OpenCV::.*)$") + list(FILTER _dbow3_libs EXCLUDE REGEX "^opencv_slam$") + endif() + target_link_libraries(${the_module} PRIVATE ${_dbow3_libs}) + set(_dbow3_ok TRUE) + endif() + endif() + if(_dbow3_ok) + message(STATUS "slam: DBoW3 found — enabling HAVE_DBOW3") + target_compile_definitions(${the_module} PRIVATE HAVE_DBOW3=1) + add_definitions(-DHAVE_DBOW3=1) + else() + message(WARNING "slam: DBoW3 found but no link information (DBoW3::DBoW3 / DBoW3_LIBRARIES / DBoW3_LIBRARY). BoW disabled.") + endif() + else() + message(STATUS "slam: DBoW3 not found — HAVE_DBOW3 disabled") + endif() + + # Try to detect g2o (headers + required libs). We need core + stuff + + # types_sba for the SE3/pose graph edges used in this module. Optional libs + # are linked when available (slam3d/sim3/solvers) to improve compatibility + # with different g2o builds. find_path(G2O_INCLUDE_DIR NAMES g2o/core/sparse_optimizer.h) - find_library(G2O_LIBRARY NAMES g2o_core g2o) - if(G2O_INCLUDE_DIR AND G2O_LIBRARY) - message(STATUS "slam: g2o found (headers and library) — enabling HAVE_G2O") + find_library(G2O_CORE_LIBRARY NAMES g2o_core g2o) + find_library(G2O_STUFF_LIBRARY NAMES g2o_stuff) + find_library(G2O_TYPES_SBA_LIBRARY NAMES g2o_types_sba) + + set(_g2o_required_libs ${G2O_CORE_LIBRARY} ${G2O_STUFF_LIBRARY} ${G2O_TYPES_SBA_LIBRARY}) + list(FILTER _g2o_required_libs EXCLUDE REGEX "NOTFOUND") + + if(G2O_INCLUDE_DIR AND G2O_CORE_LIBRARY AND G2O_STUFF_LIBRARY AND G2O_TYPES_SBA_LIBRARY) + message(STATUS "slam: g2o found (core/stuff/types_sba) — enabling HAVE_G2O") target_include_directories(${the_module} PRIVATE ${G2O_INCLUDE_DIR}) - target_link_libraries(${the_module} PRIVATE ${G2O_LIBRARY}) + + set(_g2o_libs ${_g2o_required_libs}) + foreach(_g2o_candidate g2o_types_slam3d g2o_types_slam3d_addons g2o_types_sim3 g2o_types_data g2o_solver_csparse g2o_solver_cholmod g2o_solver_eigen g2o_solver_dense g2o_solver_pcg) + find_library(_g2o_lib NAMES ${_g2o_candidate}) + if(_g2o_lib) + list(APPEND _g2o_libs ${_g2o_lib}) + endif() + endforeach() + list(REMOVE_DUPLICATES _g2o_libs) + target_link_libraries(${the_module} PRIVATE ${_g2o_libs}) target_compile_definitions(${the_module} PRIVATE HAVE_G2O=1) # also expose globally so external header parsers can evaluate #if HAVE_G2O add_definitions(-DHAVE_G2O=1) else() - message(STATUS "slam: g2o not found — HAVE_G2O disabled") + message(STATUS "slam: g2o not found — HAVE_G2O disabled (need headers + core/stuff/types_sba)") endif() endif() diff --git a/modules/slam/include/opencv2/slam/keyframe.hpp b/modules/slam/include/opencv2/slam/keyframe.hpp index eb91bfd705f..b7c602bf66b 100644 --- a/modules/slam/include/opencv2/slam/keyframe.hpp +++ b/modules/slam/include/opencv2/slam/keyframe.hpp @@ -10,7 +10,11 @@ struct KeyFrame { // Custom constructor for convenient initialization KeyFrame(int id_, const Mat& image_, const std::vector& kps_, const Mat& desc_, const Mat& R_w_, const Mat& t_w_) : id(id_), image(image_.clone()), kps(kps_), desc(desc_.clone()), R_w(R_w_.clone()), t_w(t_w_.clone()) {} + // Constructor with timestamp + KeyFrame(int id_, double timestamp_, const Mat& image_, const std::vector& kps_, const Mat& desc_, const Mat& R_w_, const Mat& t_w_) + : id(id_), timestamp(timestamp_), image(image_.clone()), kps(kps_), desc(desc_.clone()), R_w(R_w_.clone()), t_w(t_w_.clone()) {} int id = -1; + double timestamp = 0.0; //!< Timestamp when this keyframe was captured Mat image; // optional std::vector kps; Mat desc; diff --git a/modules/slam/include/opencv2/slam/map.hpp b/modules/slam/include/opencv2/slam/map.hpp index 65e72a33b95..40c506568ea 100644 --- a/modules/slam/include/opencv2/slam/map.hpp +++ b/modules/slam/include/opencv2/slam/map.hpp @@ -65,7 +65,9 @@ class MapManager { const std::vector &pts1_kp_idx, const std::vector &pts2_kp_idx, const KeyFrame &lastKf, const KeyFrame &curKf, - double fx, double fy, double cx, double cy); + double fx, double fy, double cx, double cy, + double maxReprojPx = 2.0, + double minParallaxRad = 1.0 * CV_PI / 180.0); // Lookup keyframe index by id (-1 if not found) int keyframeIndex(int id) const; @@ -86,6 +88,7 @@ class MapManager { double computeReprojError(const MapPoint &mp, const KeyFrame &kf, double fx, double fy, double cx, double cy) const; void updateMapPointDescriptor(MapPoint &mp); + void updateAllMapPointDescriptors(); // Statistics int countGoodMapPoints() const; diff --git a/modules/slam/include/opencv2/slam/optimizer.hpp b/modules/slam/include/opencv2/slam/optimizer.hpp index 0576c7b59ac..fd6fac5f521 100644 --- a/modules/slam/include/opencv2/slam/optimizer.hpp +++ b/modules/slam/include/opencv2/slam/optimizer.hpp @@ -7,6 +7,14 @@ namespace cv { namespace vo { +struct PoseGraphEdge { + int i = -1; // from keyframe id + int j = -1; // to keyframe id + cv::Mat R_ij; // 3x3, camera->world of relative? stored as Rwc form of transform from i to j (R_ij is R_w component of T_ij when composed as X_j = R_ij * X_i + t_ij) + cv::Mat t_ij; // 3x1, translation in world frame of i (same convention as above) + double weight = 1.0; +}; + // Bundle Adjustment Optimizer using OpenCV-based Levenberg-Marquardt // Note: For production, should use g2o or Ceres for better performance class Optimizer { @@ -53,6 +61,18 @@ class Optimizer { int iterations = 20); #endif + // Pose-graph optimization (loop-closure constraints). + // Edges use module pose convention: keyframe pose is (R_w, C_w). + // The relative constraint T_ij (R_ij, t_ij) represents the expected transform from i to j: + // R_pred = R_i^T * R_j, t_pred = R_i^T * (C_j - C_i); + // Residual is formed on SE3 using small-angle approximation. + static void poseGraphOptimize( + std::vector &keyframes, + const std::vector &edges, + const std::vector &fixedKfIds, + int iterations = 10, + double step = 0.5); + private: // Compute reprojection error and Jacobian static double computeReprojectionError( diff --git a/modules/slam/include/opencv2/slam/slam_system.hpp b/modules/slam/include/opencv2/slam/slam_system.hpp index dabd6df1597..7163466171a 100644 --- a/modules/slam/include/opencv2/slam/slam_system.hpp +++ b/modules/slam/include/opencv2/slam/slam_system.hpp @@ -73,9 +73,13 @@ class CV_EXPORTS_W SlamSystem { CV_WRAP TrackingResult track(cv::InputArray frame, double timestamp = 0.0); CV_WRAP bool saveTrajectoryTUM(const std::string& path) const; + CV_WRAP bool saveOptimizedTrajectoryTUM(const std::string& path) const; CV_WRAP bool saveMap(const std::string& path) const; CV_WRAP bool loadMap(const std::string& path); + // Load a DBoW3 vocabulary for loop detection (optional; ignored if DBoW3 is not compiled in). + CV_WRAP bool setLoopVocabulary(const std::string& path); + CV_WRAP void reset(); const MapManager& getMap() const; diff --git a/modules/slam/include/opencv2/slam/visual_odometry.hpp b/modules/slam/include/opencv2/slam/visual_odometry.hpp index 4ab09baf375..c9a288ef159 100644 --- a/modules/slam/include/opencv2/slam/visual_odometry.hpp +++ b/modules/slam/include/opencv2/slam/visual_odometry.hpp @@ -35,6 +35,8 @@ struct CV_EXPORTS_W_SIMPLE TrackingResult { CV_PROP_RW bool keyframeInserted = false; //!< True if this frame became a keyframe CV_PROP_RW int frameId = -1; //!< Frame ID CV_PROP_RW double timestamp = 0.0; //!< Frame timestamp + CV_PROP_RW std::vector matchPoints; //!< Matched points in current frame (for visualization) + CV_PROP_RW std::vector inlierMask; //!< Inlier mask aligned with matchPoints }; /** @@ -53,12 +55,17 @@ struct CV_EXPORTS_W_SIMPLE VisualOdometryOptions { CV_PROP_RW double flowZeroThresh = 0.3; //!< Flow zero threshold CV_PROP_RW double minTranslationNorm = 1e-4; //!< Min translation norm CV_PROP_RW double minRotationRad = 0.5 * CV_PI / 180.0; //!< Min rotation (radians) + CV_PROP_RW double pnpMaxReprojError = 5.0; //!< Max mean reprojection error (px) to accept PnP // --- Keyframe insertion policy --- CV_PROP_RW int keyframeMinGap = 1; //!< Minimum frames between keyframes CV_PROP_RW int keyframeMaxGap = 8; //!< Maximum frames before forcing keyframe CV_PROP_RW double keyframeMinParallaxPx = 8.0; //!< Minimum parallax in pixels + // --- Triangulation filtering --- + CV_PROP_RW double triMaxReprojErrorPx = 2.0; //!< Max reprojection error to keep triangulated points + CV_PROP_RW double triMinParallaxDeg = 1.0; //!< Min parallax (degrees) between views + // --- Verbose logging --- CV_PROP_RW bool verbose = false; //!< Enable verbose logging }; diff --git a/modules/slam/include/opencv2/slam/visualizer.hpp b/modules/slam/include/opencv2/slam/visualizer.hpp index 59ea180c34e..160be7d09fb 100644 --- a/modules/slam/include/opencv2/slam/visualizer.hpp +++ b/modules/slam/include/opencv2/slam/visualizer.hpp @@ -18,18 +18,21 @@ class CV_EXPORTS Visualizer { void setTrajectoryXZ(const std::vector &xz); // Show frame window void showFrame(const Mat &frame); - // Overlay tracking info text onto a frame - void drawTrackingInfo(Mat &frame, const TrackingResult &res); + // Overlay tracking info text onto a frame. 'frame_id' is 1-based index of the frame. + void drawTrackingInfo(Mat &frame, const TrackingResult &res, int frame_id=0); // Show top-down trajectory void showTopdown(); // Save trajectory image to file bool saveTrajectory(const std::string &path); private: double meters_per_pixel_; + double dynamic_meters_per_pixel_; + Point2d world_center_; Size mapSize_; Mat map_; std::vector traj_; Point worldToPixel(const Point2d &p) const; + void updateViewport(); }; } // namespace vo diff --git a/modules/slam/samples/run_vo_sample.cpp b/modules/slam/samples/run_vo_sample.cpp index e83b1910025..ab8b207e559 100644 --- a/modules/slam/samples/run_vo_sample.cpp +++ b/modules/slam/samples/run_vo_sample.cpp @@ -9,10 +9,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include @@ -32,20 +34,30 @@ static std::string makeTimestampFolder(){ int main(int argc, char** argv) { if(argc < 2){ - std::cout << "Usage: " << argv[0] << " [image_dir] [scale_m=0.02] [output_dir] [mode=slam|localization] [map_path]\n" - << "- Default output: /slam_output//\n" - << "- Default map path: /map.yml.gz (gzip to shrink size)\n" - << "Example (SLAM): " << argv[0] << " /datasets/EuRoC/MH01/mav0/cam0/data 0.02 (auto) slam\n" - << "Example (LOC): " << argv[0] << " /datasets/EuRoC/MH01/mav0/cam0/data 0.02 (auto) localization /map.yml.gz" << std::endl; + std::cout << "Usage: " << argv[0] << " [image_dir] [scale_m=0.02] [output_dir] [test_mode] [map_path] [vocab_path]\n" + << "\nTest Modes (for comparison):\n" + << " vo - Visual Odometry only (no backend, no loop closure)\n" + << " vo_backend - VO + Backend BA (no loop closure)\n" + << " slam - Full SLAM (VO + Backend BA + Loop Closure, default)\n" + << " localization - Localization mode (requires map_path)\n" + << "\nOther parameters:\n" + << " - Default output: /slam_output//\n" + << " - Default map path: /map.yml.gz\n" + << " - vocab_path: ORB vocabulary file for DBoW3 (required for loop closure)\n" + << "\nExamples:\n" + << " " << argv[0] << " /datasets/EuRoC/MH01/mav0/cam0/data 0.02 (auto) vo\n" + << " " << argv[0] << " /datasets/EuRoC/MH01/mav0/cam0/data 0.02 (auto) vo_backend\n" + << " " << argv[0] << " /datasets/EuRoC/MH01/mav0/cam0/data 0.02 (auto) slam (auto) /root/orbvoc.dbow3\n" + << " " << argv[0] << " /datasets/EuRoC/MH01/mav0/cam0/data 0.02 (auto) localization " << std::endl; return 0; } std::string imgDir = argv[1]; // imgDir = "../../datasets/iphone/2025-11-05_170219"; - // std::string imgDir = "../../datasets/EuRoC/MH01/mav0/cam0/data"; + // imgDir = "../../datasets/EuRoC/MH01/mav0/cam0/data"; // imgDir = "../../datasets/vivo/room"; double scale_m = (argc >= 3) ? std::atof(argv[2]) : 0.02; - std::string modeStr = (argc >= 5) ? argv[4] : std::string("slam"); + std::string testMode = (argc >= 5) ? argv[4] : std::string("slam"); // Default output root: dataset folder / slam_output / std::string imgDirAbs = cv::utils::fs::canonical(imgDir); @@ -65,7 +77,19 @@ int main(int argc, char** argv) { std::string outDir = cv::utils::fs::join(outRoot, makeTimestampFolder()); std::string defaultMapName = "map.yml.gz"; // gzip reduces map size significantly - std::string mapPath = (argc >= 6) ? argv[5] : cv::utils::fs::join(outDir, defaultMapName); + std::string defaultMapPath = cv::utils::fs::join(outDir, defaultMapName); + std::string mapPath; + if(argc >= 6){ + mapPath = argv[5]; + if(mapPath.empty() || mapPath == "(auto)"){ + mapPath = defaultMapPath; + } else if(!isAbsolutePath(mapPath)){ + mapPath = cv::utils::fs::join(outDir, mapPath); + } + } else { + mapPath = defaultMapPath; + } + std::string vocabPath = (argc >= 7) ? argv[6] : std::string(); cv::Ptr feature = cv::ORB::create(2000); cv::Ptr matcher = cv::DescriptorMatcher::create(cv::DescriptorMatcher::BRUTEFORCE_HAMMING); @@ -73,21 +97,66 @@ int main(int argc, char** argv) { cv::vo::SlamSystem slam(feature, matcher); cv::vo::VisualOdometryOptions feOpts; cv::vo::SlamSystemOptions sysOpts; - // sysOpts.enableBackend = false; - + + // Configure based on test mode + bool enableBackend = false; + bool enableLoopClosure = false; + std::string modeDescription; + + if(testMode == "vo"){ + enableBackend = false; + enableLoopClosure = false; + modeDescription = "VO (Visual Odometry only, no backend, no loop closure)"; + } else if(testMode == "vo_backend"){ + enableBackend = true; + enableLoopClosure = false; + modeDescription = "VO_BACKEND (VO + Backend BA, no loop closure)"; + } else if(testMode == "slam"){ + enableBackend = true; + enableLoopClosure = true; + modeDescription = "SLAM (VO + Backend BA + Loop Closure)"; + } else if(testMode == "localization"){ + enableBackend = false; + enableLoopClosure = false; + modeDescription = "LOCALIZATION (map frozen, pose estimation only)"; + } else { + std::cerr << "Unknown test mode: " << testMode << std::endl; + std::cerr << "Valid modes: vo, vo_backend, slam, localization" << std::endl; + return -1; + } + + sysOpts.enableBackend = enableBackend; slam.setFrontendOptions(feOpts); slam.setSystemOptions(sysOpts); - if(modeStr == "localization"){ + // Set loop closure vocabulary only if enabled + if(enableLoopClosure){ + if(!vocabPath.empty() && vocabPath != "(auto)"){ + cv::utils::logging::setLogLevel(cv::utils::logging::LOG_LEVEL_INFO); + bool ok = slam.setLoopVocabulary(vocabPath); + std::cout << "Loop vocabulary: " << vocabPath << " => " << (ok ? "OK" : "FAILED") << std::endl; + if(!ok){ + std::cerr << "Warning: Failed to load vocabulary, loop closure will be disabled" << std::endl; + enableLoopClosure = false; + } + } else { + std::cout << "Loop vocabulary: (not set, loop closure disabled)" << std::endl; + enableLoopClosure = false; + } + } else { + std::cout << "Loop vocabulary: (disabled for this test mode)" << std::endl; + } + + if(testMode == "localization"){ if(!slam.loadMap(mapPath)){ std::cerr << "Failed to load map for localization: " << mapPath << std::endl; return -4; } slam.setMode(cv::vo::MODE_LOCALIZATION); - std::cout << "Mode: LOCALIZATION (map frozen)" << std::endl; + std::cout << "Mode: " << modeDescription << std::endl; } else { slam.setMode(cv::vo::MODE_SLAM); - std::cout << "Mode: SLAM" << std::endl; + std::cout << "Mode: " << modeDescription << std::endl; } std::cout << "Running OpenCV SlamSystem on " << imgDir << std::endl; @@ -111,13 +180,8 @@ int main(int argc, char** argv) { // meters_per_pixel=0.02 => 50 px per meter (roughly matches previous drawScale=50). cv::vo::Visualizer viz(600, 600, 0.02); - // TUM-like CSV: timestamp,tx,ty,tz,qx,qy,qz,qw - std::ofstream trajCsv(outDir + "/trajectory_tum.csv", std::ios::out); - if(!trajCsv.is_open()){ - std::cerr << "Failed to open trajectory output: " << (outDir + "/trajectory_tum.csv") << std::endl; - return -3; - } - trajCsv << "timestamp,tx,ty,tz,qx,qy,qz,qw\n"; + // Timing: measure total runtime using OpenCV TickMeter + cv::TickMeter tm; tm.start(); auto extractTimestamp = [](const std::string &p, int fallback)->double{ try{ @@ -152,23 +216,13 @@ int main(int argc, char** argv) { const double x = res.t_w.at(0); const double z = res.t_w.rows >= 3 ? res.t_w.at(2) : 0.0; - // Update trajectory view using existing Visualizer. + // Update trajectory view using existing Visualizer (for real-time display). viz.addPose(x, z); - - if(!res.R_w.empty() && res.t_w.rows >= 3){ - cv::Quatd q = cv::Quatd::createFromRotMat(res.R_w); - const cv::Vec4d qvec(q.at(1), q.at(2), q.at(3), q.at(0)); // [qx,qy,qz,qw] - const double tx = res.t_w.at(0); - const double ty = res.t_w.at(1); - const double tz = res.t_w.at(2); - trajCsv << std::fixed << std::setprecision(9) - << ts << "," << tx << "," << ty << "," << tz << "," - << qvec[0] << "," << qvec[1] << "," << qvec[2] << "," << qvec[3] << "\n"; - } } // Use Visualizer to overlay tracking info and present frames. - viz.drawTrackingInfo(vis, res); + // Pass 1-based frame index to visualizer for display + viz.drawTrackingInfo(vis, res, fid + 1); viz.showFrame(vis); viz.showTopdown(); @@ -177,16 +231,78 @@ int main(int argc, char** argv) { fid++; } - trajCsv.close(); + const bool mapSaved = slam.saveMap(mapPath); + if(!mapSaved){ + std::cerr << "Failed to save map: " << mapPath << std::endl; + } + + // Save trajectory as the final output (trajectory_tum.csv) + // For VO mode, save online trajectory (no optimization) + // For other modes, save optimized trajectory + const std::string trajFinal = outDir + "/trajectory_tum.csv"; + if(testMode == "vo"){ + // VO only mode: save online trajectory (no backend optimization) + slam.saveTrajectoryTUM(trajFinal); + std::cout << "Online trajectory saved (VO only, no optimization): " << trajFinal << std::endl; + } else { + // Backend enabled: save optimized trajectory + slam.saveOptimizedTrajectoryTUM(trajFinal); + std::cout << "Optimized trajectory saved: " << trajFinal << std::endl; + } + + // Update visualizer with optimized trajectory for final image + // Read the optimized trajectory and set it to visualizer + std::ifstream trajFile(trajFinal); + if(trajFile.is_open()){ + std::string line; + std::getline(trajFile, line); // Skip header + std::vector optimizedTraj; + while(std::getline(trajFile, line)){ + if(line.empty()) continue; + std::istringstream iss(line); + std::string token; + std::vector tokens; + while(std::getline(iss, token, ',')){ + tokens.push_back(token); + } + if(tokens.size() >= 4){ + try{ + double tx = std::stod(tokens[1]); + double tz = std::stod(tokens[3]); + // addPose(x, z) now stores (x, z) directly for top-down view + // CSV tz is world z (forward = positive), store directly + optimizedTraj.emplace_back(tx, tz); + } catch(...){ + continue; + } + } + } + if(!optimizedTraj.empty()){ + viz.setTrajectoryXZ(optimizedTraj); + } + } const std::string trajPng = outDir + "/trajectory.png"; if(!viz.saveTrajectory(trajPng)){ std::cerr << "Failed to write trajectory image: " << trajPng << std::endl; + } else { + std::cout << "Trajectory visualization saved: " << trajPng << std::endl; } - if(!slam.saveMap(mapPath)){ - std::cerr << "Failed to save map: " << mapPath << std::endl; - } + // Stop timer and print elapsed hours/minutes/seconds (seconds rounded) + tm.stop(); + double elapsed = tm.getTimeSec(); + int totalSec = static_cast(std::round(elapsed)); + int hrs = totalSec / 3600; + int mins = (totalSec % 3600) / 60; + int secs = totalSec % 60; + std::ostringstream timess; + timess << "Elapsed time: "; + bool printed = false; + if(hrs > 0){ timess << hrs << " hours "; printed = true; } + if(mins > 0){ timess << mins << " minutes "; printed = true; } + timess << secs << " seconds"; + std::cout << timess.str() << std::endl; return 0; } diff --git a/modules/slam/shim/opencv2/core/core.hpp b/modules/slam/shim/opencv2/core/core.hpp new file mode 100644 index 00000000000..11c38ef78e5 --- /dev/null +++ b/modules/slam/shim/opencv2/core/core.hpp @@ -0,0 +1,13 @@ +#pragma once + +// This is a build-only shim for third-party libraries that still include +// (legacy compatibility header). +// +// When building OpenCV itself, the real intentionally +// triggers an #error to prevent internal use of compatibility headers. +// +// We provide this shim via a PRIVATE include directory (added BEFORE others) so +// external deps like DBoW3 can compile while we are inside the OpenCV build. +// It is NOT installed and does not affect OpenCV's public headers. + +#include diff --git a/modules/slam/src/initializer.cpp b/modules/slam/src/initializer.cpp index dc6aa6d1fea..7745290a463 100644 --- a/modules/slam/src/initializer.cpp +++ b/modules/slam/src/initializer.cpp @@ -126,7 +126,7 @@ bool Initializer::reconstructF(const std::vector &pts1, if(inliers < 30) return false; - // Triangulate + // Triangulate (use normalized coordinates with P1=I, P2=[R|t]) points3D.resize(pts1.size()); isTriangulated.resize(pts1.size(), false); @@ -134,7 +134,15 @@ bool Initializer::reconstructF(const std::vector &pts1, Mat P2; hconcat(R, t, P2); - triangulate(P1, P2, pts1, pts2, points3D); + std::vector pts1n, pts2n; + pts1n.reserve(pts1.size()); + pts2n.reserve(pts2.size()); + for(size_t i = 0; i < pts1.size(); ++i){ + pts1n.emplace_back(static_cast((pts1[i].x - cx) / fx), static_cast((pts1[i].y - cy) / fy)); + pts2n.emplace_back(static_cast((pts2[i].x - cx) / fx), static_cast((pts2[i].y - cy) / fy)); + } + + triangulate(P1, P2, pts1n, pts2n, points3D); // Check quality std::vector isGood; @@ -176,7 +184,14 @@ bool Initializer::reconstructH(const std::vector &pts1, P2.at(r, 3) = ts[i].at(r, 0); } - triangulate(P1, P2, pts1, pts2, pts3D); + std::vector pts1n, pts2n; + pts1n.reserve(pts1.size()); + pts2n.reserve(pts2.size()); + for(size_t k = 0; k < pts1.size(); ++k){ + pts1n.emplace_back(static_cast((pts1[k].x - cx) / fx), static_cast((pts1[k].y - cy) / fy)); + pts2n.emplace_back(static_cast((pts2[k].x - cx) / fx), static_cast((pts2[k].y - cy) / fy)); + } + triangulate(P1, P2, pts1n, pts2n, pts3D); int nGood = checkRT(Rs[i], ts[i], pts1, pts2, pts3D, isGood, fx, fy, cx, cy, par); allPoints[i] = pts3D; @@ -279,10 +294,20 @@ void Initializer::triangulate(const Mat &P1, const Mat &P2, Mat pts4D; triangulatePoints(P1, P2, pts1, pts2, pts4D); - for(int i = 0; i < pts4D.cols; ++i) { - Mat x = pts4D.col(i); - x /= x.at(3, 0); - points3D[i] = Point3d(x.at(0,0), x.at(1,0), x.at(2,0)); + Mat pts4D64; + if(pts4D.type() != CV_64F) pts4D.convertTo(pts4D64, CV_64F); + else pts4D64 = pts4D; + + for(int i = 0; i < pts4D64.cols; ++i) { + const double w = pts4D64.at(3, i); + if(std::abs(w) < 1e-12) { + points3D[i] = Point3d(0,0,0); + continue; + } + const double X = pts4D64.at(0, i) / w; + const double Y = pts4D64.at(1, i) / w; + const double Z = pts4D64.at(2, i) / w; + points3D[i] = Point3d(X, Y, Z); } } diff --git a/modules/slam/src/map.cpp b/modules/slam/src/map.cpp index 06bd482ba5e..392ba08c9ef 100644 --- a/modules/slam/src/map.cpp +++ b/modules/slam/src/map.cpp @@ -89,7 +89,8 @@ std::vector MapManager::triangulateBetweenLastTwo(const std::vector &pts1_kp_idx, const std::vector &pts2_kp_idx, const KeyFrame &lastKf, const KeyFrame &curKf, - double fx, double fy, double cx, double cy){ + double fx, double fy, double cx, double cy, + double maxReprojPx, double minParallaxRad){ std::vector newPoints; if(pts1n.empty() || pts2n.empty()) return newPoints; // Relative pose from last camera -> current camera. @@ -161,22 +162,20 @@ std::vector MapManager::triangulateBetweenLastTwo(const std::vector= 0 && obs_v1 >= 0){ double e1 = std::hypot(u1 - obs_u1, v1 - obs_v1); - if(e1 > MAX_REPROJ_PX) pass = false; + if(e1 > maxReprojPx) pass = false; } if(obs_u2 >= 0 && obs_v2 >= 0){ double e2 = std::hypot(u2 - obs_u2, v2 - obs_v2); - if(e2 > MAX_REPROJ_PX) pass = false; + if(e2 > maxReprojPx) pass = false; } // parallax check: angle between viewing rays (in last frame) Mat ray1 = Xc_last / norm(Xc_last); Mat ray2 = Xc_cur / norm(Xc_cur); double cos_par = ray1.dot(ray2); double parallax = std::acos(std::min(1.0, std::max(-1.0, cos_par))); - const double MIN_PARALLAX_RAD = 1.0 * CV_PI / 180.0; // 1 degree - if(parallax < MIN_PARALLAX_RAD) pass = false; + if(parallax < minParallaxRad) pass = false; if(!pass) continue; // attach observations using provided keypoint indices when available int kp1idx = (c < static_cast(pts1_kp_idx.size())) ? pts1_kp_idx[c] : -1; @@ -443,6 +442,13 @@ void MapManager::updateMapPointDescriptor(MapPoint &mp) { } } +void MapManager::updateAllMapPointDescriptors(){ + for(auto &mp : mappoints_){ + if(mp.observations.empty()) continue; + updateMapPointDescriptor(mp); + } +} + int MapManager::countGoodMapPoints() const { int count = 0; for(const auto &mp : mappoints_) { @@ -464,6 +470,7 @@ bool MapManager::save(const std::string &path) const { for(const auto &kf : keyframes_){ fs << "{"; fs << "id" << kf.id; + fs << "timestamp" << kf.timestamp; fs << "R" << kf.R_w; fs << "t" << kf.t_w; std::vector kps = kf.kps; @@ -509,13 +516,17 @@ bool MapManager::load(const std::string &path){ cv::FileNode kfNode = fs["keyframes"]; if(kfNode.type() == cv::FileNode::SEQ){ for(const auto &n : kfNode){ - int id = -1; Mat R, t, desc; std::vector kps; + int id = -1; double timestamp = 0.0; Mat R, t, desc; std::vector kps; n["id"] >> id; + // timestamp is optional for backward compatibility + if(n["timestamp"].type() != cv::FileNode::NONE){ + n["timestamp"] >> timestamp; + } n["R"] >> R; n["t"] >> t; read(n["kps"], kps); n["desc"] >> desc; - KeyFrame kf(id, Mat(), kps, desc, R, t); + KeyFrame kf(id, timestamp, Mat(), kps, desc, R, t); keyframes_.push_back(kf); } } diff --git a/modules/slam/src/optimizer.cpp b/modules/slam/src/optimizer.cpp index 9fdc2b95ef6..42e51dd57fd 100644 --- a/modules/slam/src/optimizer.cpp +++ b/modules/slam/src/optimizer.cpp @@ -239,19 +239,83 @@ void Optimizer::localBundleAdjustmentSFM( Mat K = (Mat_(3,3) << fx, 0, cx, 0, fy, cy, 0, 0, 1); + // Pose convention in this module: + // R_w = Rwc (camera->world), t_w = Cw (camera center in world) + // cv::sfm::bundleAdjust expects: + // R = Rcw (world->camera), t = tcw where Xc = Rcw*Xw + tcw std::vector Rs, Ts; Rs.reserve(V); Ts.reserve(V); for (size_t vi = 0; vi < V; ++vi) { - Rs.push_back(keyframes[views[vi]].R_w.clone()); - Ts.push_back(keyframes[views[vi]].t_w.clone()); + const Mat Rwc = keyframes[views[vi]].R_w; + const Mat Cw = keyframes[views[vi]].t_w; + Mat Rcw = Rwc.t(); + Mat tcw = -Rcw * Cw; + Rs.push_back(Rcw.clone()); + Ts.push_back(tcw.clone()); + } + + // Gauge fixing: remember prior pose of the first fixed keyframe (if any) + // so we can re-anchor the BA solution back to the original world frame. + Mat Rwc_fixed_prior, Cw_fixed_prior; + int fixedViewKfIdx = -1; + if (!fixedKfIndices.empty()) { + int fixedIdx = fixedKfIndices.front(); + if (fixedIdx >= 0 && fixedIdx < static_cast(keyframes.size())) { + fixedViewKfIdx = fixedIdx; + Rwc_fixed_prior = keyframes[fixedIdx].R_w.clone(); + Cw_fixed_prior = keyframes[fixedIdx].t_w.clone(); + } } cv::sfm::bundleAdjust(points2dMats, K, Rs, Ts, points3dMat); + // Convert optimized poses back to module convention (Rwc, Cw) + std::vector Rwc_opt(V), Cw_opt(V); + for (size_t vi = 0; vi < V; ++vi) { + Mat Rcw = Rs[vi]; + Mat tcw = Ts[vi]; + if (Rcw.type() != CV_64F) Rcw.convertTo(Rcw, CV_64F); + if (tcw.type() != CV_64F) tcw.convertTo(tcw, CV_64F); + Mat Rwc = Rcw.t(); + Mat Cw = -Rwc * tcw; + Rwc_opt[vi] = Rwc; + Cw_opt[vi] = Cw; + } + + // If we have a fixed prior pose, compute a world-frame transform that + // aligns the optimized fixed keyframe back to its prior (anchors gauge). + Mat Rg = Mat::eye(3, 3, CV_64F); + Mat tg = Mat::zeros(3, 1, CV_64F); + if (fixedViewKfIdx >= 0 && !Rwc_fixed_prior.empty() && !Cw_fixed_prior.empty()) { + // Find the view index that corresponds to fixedViewKfIdx + int fixedVi = -1; + for (size_t vi = 0; vi < V; ++vi) { + if (views[vi] == fixedViewKfIdx) { fixedVi = static_cast(vi); break; } + } + if (fixedVi >= 0 && !Rwc_opt[fixedVi].empty() && !Cw_opt[fixedVi].empty()) { + const Mat &R0 = Rwc_opt[fixedVi]; + const Mat &C0 = Cw_opt[fixedVi]; + Rg = Rwc_fixed_prior * R0.t(); + tg = Cw_fixed_prior - Rg * C0; + + // Apply gauge transform to all optimized cameras + for (size_t vi = 0; vi < V; ++vi) { + Rwc_opt[vi] = Rg * Rwc_opt[vi]; + Cw_opt[vi] = Rg * Cw_opt[vi] + tg; + } + // Apply gauge transform to optimized points + for (int j = 0; j < N; ++j) { + Mat p = points3dMat.col(j); + Mat p2 = Rg * p + tg; + p2.copyTo(points3dMat.col(j)); + } + } + } + // Write back optimized poses (only for the views we optimized) for (size_t vi = 0; vi < V; ++vi) { - keyframes[views[vi]].R_w = Rs[vi].clone(); - keyframes[views[vi]].t_w = Ts[vi].clone(); + keyframes[views[vi]].R_w = Rwc_opt[vi].clone(); + keyframes[views[vi]].t_w = Cw_opt[vi].clone(); } // Write back optimized points for (int j = 0; j < N; ++j) { @@ -417,8 +481,17 @@ bool Optimizer::optimizePose( if(!success) return false; Mat R; Rodrigues(rvec, R); - kf.R_w = R; - kf.t_w = tvec; + + // solvePnP returns Rcw/tcw (world->camera): Xc = Rcw*Xw + tcw. + // Module pose convention is Rwc (camera->world) and Cw (camera center in world). + Mat Rcw = R; + Mat tcw = tvec; + if(Rcw.type() != CV_64F) Rcw.convertTo(Rcw, CV_64F); + if(tcw.type() != CV_64F) tcw.convertTo(tcw, CV_64F); + Mat Rwc = Rcw.t(); + Mat Cw = -Rwc * tcw; + kf.R_w = Rwc; + kf.t_w = Cw; for(int i = 0; i < inliersMask.rows && i < static_cast(inliers.size()); ++i) inliers[i] = (inliersMask.at(i,0) != 0); @@ -432,15 +505,125 @@ void Optimizer::globalBundleAdjustmentSFM( double fx, double fy, double cx, double cy, int iterations) { - CV_LOG_INFO(NULL, "Optimizer: Global BA"); std::vector localKfIndices; for(size_t i = 1; i < keyframes.size(); ++i) localKfIndices.push_back(static_cast(i)); std::vector fixedKfIndices = {0}; localBundleAdjustmentSFM(keyframes, mappoints, localKfIndices, fixedKfIndices, fx, fy, cx, cy, iterations); - CV_LOG_INFO(NULL, "Optimizer: Global BA completed"); } #endif +void Optimizer::poseGraphOptimize( + std::vector &keyframes, + const std::vector &edges, + const std::vector &fixedKfIds, + int iterations, + double step) { + + if(keyframes.empty() || edges.empty()) return; + +#if defined(HAVE_G2O) + { + typedef g2o::BlockSolver< g2o::BlockSolverTraits<6,3> > Block; + auto linearSolver = std::make_unique>(); + auto blockSolver = std::make_unique(std::move(linearSolver)); + auto solver = new g2o::OptimizationAlgorithmLevenberg(std::move(blockSolver)); + + g2o::SparseOptimizer optimizer; + optimizer.setAlgorithm(solver); + optimizer.setVerbose(false); + + std::unordered_map id2vid; + for(const auto &kf : keyframes){ id2vid[kf.id] = kf.id; } + + for(const auto &kf : keyframes){ + auto *v = new g2o::VertexSE3Expmap(); + Mat Rwc = kf.R_w, Cw = kf.t_w; + if(Rwc.type() != CV_64F) Rwc.convertTo(Rwc, CV_64F); + if(Cw.type() != CV_64F) Cw.convertTo(Cw, CV_64F); + Mat Rcw = Rwc.t(); + Mat tcw = -Rcw * Cw; + Eigen::Matrix3d Re; Eigen::Vector3d te; + cv2eigen(Rcw, Re); cv2eigen(tcw, te); + v->setId(kf.id); + v->setEstimate(g2o::SE3Quat(Re, te)); + if(std::find(fixedKfIds.begin(), fixedKfIds.end(), kf.id) != fixedKfIds.end()) v->setFixed(true); + optimizer.addVertex(v); + } + + for(const auto &e : edges){ + if(id2vid.find(e.i)==id2vid.end() || id2vid.find(e.j)==id2vid.end()) continue; + Mat Rij = e.R_ij, tij = e.t_ij; + if(Rij.empty() || tij.empty()) continue; + if(Rij.type() != CV_64F) Rij.convertTo(Rij, CV_64F); + if(tij.type() != CV_64F) tij.convertTo(tij, CV_64F); + Mat Rji = Rij.t(); + Mat tji = -Rji * tij; + Eigen::Matrix3d Re; Eigen::Vector3d te; + cv2eigen(Rji, Re); cv2eigen(tji, te); + auto *edge = new g2o::EdgeSE3Expmap(); + edge->setVertex(0, optimizer.vertex(id2vid[e.i])); + edge->setVertex(1, optimizer.vertex(id2vid[e.j])); + edge->setMeasurement(g2o::SE3Quat(Re, te)); + edge->setInformation(Eigen::Matrix::Identity() * e.weight); + auto *rk = new g2o::RobustKernelHuber(); rk->setDelta(1.0); edge->setRobustKernel(rk); + optimizer.addEdge(edge); + } + + optimizer.initializeOptimization(); + optimizer.optimize(iterations); + + for(auto &kf : keyframes){ + auto *v = dynamic_cast(optimizer.vertex(id2vid[kf.id])); + if(!v) continue; + g2o::SE3Quat est = v->estimate(); + Eigen::Matrix3d Rcw = est.rotation().toRotationMatrix(); + Eigen::Vector3d tcw = est.translation(); + Mat Rcw_cv, tcw_cv; eigen2cv(Rcw, Rcw_cv); eigen2cv(tcw, tcw_cv); + Mat Rwc = Rcw_cv.t(); Mat Cw = -Rwc * tcw_cv; + kf.R_w = Rwc.clone(); kf.t_w = Cw.clone(); + } + return; + } +#endif + + // Fallback: simple iterative small-angle correction + std::unordered_map id2idx; id2idx.reserve(keyframes.size()); + for(size_t i=0;i(i); + std::unordered_set fixed(fixedKfIds.begin(), fixedKfIds.end()); + + for(int it=0; itsecond, jj = itJ->second; + KeyFrame &Ki = keyframes[ii]; + KeyFrame &Kj = keyframes[jj]; + Mat Ri = Ki.R_w.clone(), Rj = Kj.R_w.clone(); + Mat Ci = Ki.t_w.clone(), Cj = Kj.t_w.clone(); + if(Ri.empty()||Rj.empty()||Ci.empty()||Cj.empty()) continue; + Mat R_pred = Ri.t() * Rj; + Mat t_pred = Ri.t() * (Cj - Ci); + Mat R_err = e.R_ij.t() * R_pred; + Mat t_err = e.R_ij.t() * t_pred - e.t_ij; + Mat w; Rodrigues(R_err, w); + Mat dt = t_err; + + if(!fixed.count(Kj.id)){ + Mat dRj; Rodrigues(w * (-step*0.5), dRj); + Rj = Rj * dRj; + Cj = Cj + Rj * (dt * (-step*0.5)); + } + if(!fixed.count(Ki.id)){ + Mat dRi; Rodrigues(w * (step*0.5), dRi); + Ri = dRi * Ri; + Ci = Ci + Ri * (dt * (step*0.5)); + } + Ki.R_w = Ri; Ki.t_w = Ci; + Kj.R_w = Rj; Kj.t_w = Cj; + } + } +} + double Optimizer::computeReprojectionError( const Point3d &point3D, const Mat &R, const Mat &t, diff --git a/modules/slam/src/slam_system.cpp b/modules/slam/src/slam_system.cpp index ef3928b324e..5600a8af09f 100644 --- a/modules/slam/src/slam_system.cpp +++ b/modules/slam/src/slam_system.cpp @@ -5,12 +5,18 @@ #include #include #include +#include +#if defined(HAVE_DBOW3) +#include +#endif #include #include #include #include #include #include +#include +#include namespace cv { namespace vo { @@ -27,7 +33,9 @@ static cv::Vec4d rotationMatrixToQuaternion_(const cv::Mat& R_in) class SlamSystem::Impl { public: Impl(Ptr det, Ptr matcher) - : frontend_(std::move(det), std::move(matcher)) {} + : frontend_(std::move(det), std::move(matcher)) { + loopMatcher_ = BFMatcher::create(NORM_HAMMING); + } ~Impl(){ stopBackend(); } void setFrontendOptions(const VisualOdometryOptions& o){ feOpts_ = o; frontend_.setOptions(o); } @@ -89,6 +97,13 @@ class SlamSystem::Impl { map_.cullBadMapPoints(); } } +#if defined(HAVE_DBOW3) + if(res.keyframeInserted && bowReady_){ + std::lock_guard lk(mapMutex_); + const auto &kfs = map_.keyframes(); + if(!kfs.empty()) addKeyframeToBoW(kfs.back()); + } +#endif if(res.ok){ TrajEntry te; te.frameId = res.frameId; te.ts = ts; te.R = res.R_w; te.t = res.t_w; trajectory_.push_back(te); @@ -112,13 +127,287 @@ class SlamSystem::Impl { return true; } + bool saveOptimizedTrajectoryTUM(const std::string& path) const { + std::ofstream out(path, std::ios::out); + if(!out.is_open()) return false; + out << "timestamp,tx,ty,tz,qx,qy,qz,qw\n"; + + // Build optimized keyframe poses with timestamps (use KeyFrame::timestamp directly) + struct KFPose { double ts; int id; Mat R; Mat t; }; + std::vector kfPoses; + const auto& kfs = map_.keyframes(); + for(const auto &kf : kfs){ + if(kf.R_w.empty() || kf.t_w.empty()) continue; + // Use KeyFrame::timestamp if available, otherwise fallback to trajectory_ lookup + double ts = kf.timestamp; + if(ts <= 0.0){ + // Fallback: find timestamp from trajectory_ for backward compatibility + for(const auto &te : trajectory_){ + if(te.frameId == kf.id){ ts = te.ts; break; } + } + } + if(ts > 0.0) kfPoses.push_back({ts, kf.id, kf.R_w.clone(), kf.t_w.clone()}); + } + + // Sort by timestamp + std::sort(kfPoses.begin(), kfPoses.end(), [](const KFPose &a, const KFPose &b){ return a.ts < b.ts; }); + + if(kfPoses.empty()){ + // Fallback: output original trajectory + for(const auto &te : trajectory_){ + if(te.R.empty() || te.t.empty()) continue; + cv::Vec4d q = rotationMatrixToQuaternion_(te.R); + Mat t; te.t.convertTo(t, CV_64F); + if(t.rows < 3) continue; + out << std::fixed << std::setprecision(9) + << te.ts << "," << t.at(0) << "," << t.at(1) << "," << t.at(2) << "," + << q[0] << "," << q[1] << "," << q[2] << "," << q[3] << "\n"; + } + return true; + } + + // For each frame in trajectory_, find nearest optimized keyframe pose by timestamp + for(const auto &te : trajectory_){ + if(te.R.empty() || te.t.empty()) continue; + + // Find nearest keyframe by timestamp + size_t bestIdx = 0; + double bestDist = std::abs(te.ts - kfPoses[0].ts); + for(size_t i = 1; i < kfPoses.size(); ++i){ + double dist = std::abs(te.ts - kfPoses[i].ts); + if(dist < bestDist){ bestDist = dist; bestIdx = i; } + } + + // Use the nearest keyframe's optimized pose + const auto &bestKF = kfPoses[bestIdx]; + cv::Vec4d q = rotationMatrixToQuaternion_(bestKF.R); + Mat t; bestKF.t.convertTo(t, CV_64F); + if(t.rows < 3) continue; + + out << std::fixed << std::setprecision(9) + << te.ts << "," << t.at(0) << "," << t.at(1) << "," << t.at(2) << "," + << q[0] << "," << q[1] << "," << q[2] << "," << q[3] << "\n"; + } + return true; + } + bool saveMap(const std::string& path) const { return map_.save(path); } bool loadMap(const std::string& path){ bool ok = map_.load(path); return ok; } void reset(){ stopBackend(); map_.clear(); frontend_.reset(); trajectory_.clear(); } const MapManager& map() const { return map_; } MapManager& mapMutable() { return map_; } +#if defined(HAVE_DBOW3) + bool setLoopVocabulary(const std::string& path){ + try { + bowVocab_ = std::make_unique(path); + if(!bowVocab_ || bowVocab_->empty()){ + bowVocab_.reset(); bowDb_.reset(); bowEntryToKfId_.clear(); bowReady_ = false; + return false; + } + bowDb_ = std::make_unique(*bowVocab_, false, 0); + bowEntryToKfId_.clear(); + bowReady_ = true; + + bowKeyframesAdded_.store(0); + bowQueries_.store(0); + bowCandidatesTotal_.store(0); + loopAttempts_.store(0); + loopAccepted_.store(0); + + CV_LOG_INFO(NULL, "DBoW3 vocabulary loaded: '" << path << "' (words=" << bowVocab_->size() << ")"); + return true; + } catch(const std::exception &){ + bowVocab_.reset(); bowDb_.reset(); bowEntryToKfId_.clear(); bowReady_ = false; + return false; + } + } + + void addKeyframeToBoW(const KeyFrame& kf){ + if(!bowReady_ || !bowDb_ || kf.desc.empty()) return; + int entryId = bowDb_->add(kf.desc); + if(entryId >= static_cast(bowEntryToKfId_.size())) bowEntryToKfId_.resize(entryId + 1, -1); + bowEntryToKfId_[entryId] = kf.id; + + const int n = bowKeyframesAdded_.fetch_add(1) + 1; + if(n == 1 || (n % 50) == 0) { + CV_LOG_INFO(NULL, "DBoW3 DB entries: keyframes_added=" << n << ", last_kf_id=" << kf.id); + } + } + + std::vector queryLoopCandidates(const KeyFrame& kf, int minGap, int topK){ + std::vector ids; + if(!bowReady_ || !bowDb_ || kf.desc.empty()) return ids; + bowQueries_.fetch_add(1); + DBoW3::QueryResults ret; + // DBoW3 API: query(features, QueryResults&, maxResults, maxId) + // NOTE: max_id < 0 means "all" in DBoW3. Passing 0 would restrict to entryId <= 0 + // and effectively break loop-candidate retrieval once the DB has more entries. + bowDb_->query(kf.desc, ret, topK + 10, -1); // extra results for filtering + for(const auto &r : ret){ + if(static_cast(r.Id) < bowEntryToKfId_.size()){ + int candId = bowEntryToKfId_[static_cast(r.Id)]; + if(candId < 0) continue; + if(std::abs(kf.id - candId) < minGap) continue; + ids.push_back(candId); + if(static_cast(ids.size()) >= topK) break; + } + } + bowCandidatesTotal_.fetch_add(static_cast(ids.size())); + return ids; + } +#endif + +#if !defined(HAVE_DBOW3) + bool setLoopVocabulary(const std::string&){ return false; } +#endif + private: + // Mutual ratio test with cross-check for loop candidate scoring. + static std::vector mutualRatioMatches(const Mat& desc1, const Mat& desc2, DescriptorMatcher& matcher, float ratio){ + std::vector> knn12, knn21; + matcher.knnMatch(desc1, desc2, knn12, 2); + matcher.knnMatch(desc2, desc1, knn21, 2); + std::vector good; + good.reserve(knn12.size()); + for(size_t qi = 0; qi < knn12.size(); ++qi){ + if(knn12[qi].empty()) continue; + const DMatch &best = knn12[qi][0]; + if(knn12[qi].size() >= 2){ + const DMatch &second = knn12[qi][1]; + if(second.distance > 0.0f && best.distance / second.distance > ratio) continue; + } + int trainIdx = best.trainIdx; + if(trainIdx < 0 || trainIdx >= (int)knn21.size() || knn21[trainIdx].empty()) continue; + const DMatch &rbest = knn21[trainIdx][0]; + if(rbest.trainIdx == (int)qi) good.push_back(best); + } + return good; + } + + // Try to detect a loop with the latest keyframe and optimize poses if successful. + bool tryLoopClosureAndOptimize(std::vector &kfs, std::vector &mps){ + const int K = static_cast(kfs.size()); + if(K < 5 || fx_ <= 1e-6 || fy_ <= 1e-6) return false; + const int lastIdx = K - 1; + const int lastId = kfs[lastIdx].id; + const int minGap = 30; // do not loop with very recent keyframes + if(lastIdx < minGap) return false; + + const int minMatches = 80; + const int minInliers = 50; + const double minInlierRatio = 0.30; + + loopAttempts_.fetch_add(1); + + std::unordered_map idToIdx; idToIdx.reserve(kfs.size()); + for(int i = 0; i < K; ++i) idToIdx[kfs[i].id] = i; + + std::vector candidates; +#if defined(HAVE_DBOW3) + if(bowReady_){ + candidates = queryLoopCandidates(kfs[lastIdx], minGap, 5); + } +#endif + + if(candidates.empty()){ + int bestIdx = -1; + int bestMatches = 0; + std::vector bestDMatches; + for(int i = 0; i <= lastIdx - minGap; ++i){ + if(kfs[i].desc.empty() || kfs[lastIdx].desc.empty()) continue; + auto dm = mutualRatioMatches(kfs[i].desc, kfs[lastIdx].desc, *loopMatcher_, 0.75f); + if((int)dm.size() > bestMatches){ + bestMatches = static_cast(dm.size()); + bestIdx = i; + bestDMatches = std::move(dm); + } + } + if(bestIdx >= 0 && bestMatches >= minMatches) candidates.push_back(kfs[bestIdx].id); + else return false; + } + + for(int candId : candidates){ + auto it = idToIdx.find(candId); + if(it == idToIdx.end()) continue; + int candIdx = it->second; + if(kfs[candIdx].desc.empty() || kfs[lastIdx].desc.empty()) continue; + + auto dm = mutualRatioMatches(kfs[candIdx].desc, kfs[lastIdx].desc, *loopMatcher_, 0.75f); + if((int)dm.size() < minMatches) continue; + + std::vector pts1, pts2; + pts1.reserve(dm.size()); pts2.reserve(dm.size()); + for(const auto &m : dm){ + pts1.push_back(kfs[candIdx].kps[m.queryIdx].pt); + pts2.push_back(kfs[lastIdx].kps[m.trainIdx].pt); + } + + Mat mask; + Mat E = findEssentialMat(pts1, pts2, fx_, Point2d(cx_, cy_), RANSAC, 0.999, 1.0, mask); + if(E.empty()) continue; + int inliers = mask.empty() ? 0 : cv::countNonZero(mask); + if(inliers < minInliers || inliers < static_cast(pts1.size() * minInlierRatio)) continue; + + Mat Rlc, tlc; + int validPts = recoverPose(E, pts1, pts2, Rlc, tlc, fx_, Point2d(cx_, cy_), mask); + if(validPts < minInliers) continue; + + // Compute actual baseline between candidate and last keyframe for consistent scale + Mat Ri = kfs[candIdx].R_w, Ci = kfs[candIdx].t_w; + Mat Rj = kfs[lastIdx].R_w, Cj = kfs[lastIdx].t_w; + double actualBaseline = cv::norm(Cj - Ci); + + // Normalize tlc and apply actual baseline for consistent scale + Mat tlc_norm = tlc; + if(tlc_norm.type() != CV_64F) tlc_norm.convertTo(tlc_norm, CV_64F); + double tlcNorm = cv::norm(tlc_norm); + if(tlcNorm > 1e-9){ + tlc_norm = tlc_norm / tlcNorm * actualBaseline; + } else { + tlc_norm = Mat::zeros(3,1,CV_64F); + } + + std::vector allIdx(K); std::iota(allIdx.begin(), allIdx.end(), 0); + std::vector fixed{0}; if(K > 1) fixed.push_back(1); + Optimizer::localBundleAdjustmentSFM(kfs, mps, allIdx, fixed, fx_, fy_, cx_, cy_, sysOpts_.backendIterations * 2); + + // Add pose-graph edge with consistent scale + // Use actual relative translation (Ri^T * (Cj - Ci)) as the constraint + // This ensures scale consistency with the current map + PoseGraphEdge edge; + edge.i = kfs[candIdx].id; edge.j = kfs[lastIdx].id; + edge.R_ij = Rlc.clone(); + // Use actual relative translation in frame i (consistent scale) + edge.t_ij = Ri.t() * (Cj - Ci); + // Optionally blend with geometric direction from loop estimate (with consistent scale) + // This provides geometric constraint while maintaining scale consistency + Mat t_geometric = Ri * tlc_norm; + // Blend: 70% actual baseline, 30% geometric direction (weighted average) + edge.t_ij = edge.t_ij * 0.7 + t_geometric * 0.3; + edge.weight = 1.0; + poseGraphEdges_.push_back(edge); + + // Pose-graph optimization on all keyframes + Optimizer::poseGraphOptimize(kfs, poseGraphEdges_, fixed, std::max(5, sysOpts_.backendIterations), 0.5); + +#if defined(HAVE_SFM) + // Refine with global BA if available (uses updated poses as initial guess) + Optimizer::globalBundleAdjustmentSFM(kfs, mps, fx_, fy_, cx_, cy_, sysOpts_.backendIterations); +#endif + // Write back optimized poses/points + { + std::lock_guard lk(mapMutex_); + for(const auto &kf : kfs){ map_.applyOptimizedKeyframePose(kf.id, kf.R_w, kf.t_w); } + for(const auto &mp : mps){ map_.applyOptimizedMapPoint(mp.id, mp.p); } + } + + loopAccepted_.fetch_add(1); + CV_LOG_INFO(NULL, "[LoopClosure] kf" << lastId << "-kf" << kfs[candIdx].id << ": " << inliers << "/" << dm.size() << " inliers"); + return true; + } + return false; + } void ensureBackend(){ if(backendStarted_ || !sysOpts_.enableBackend) return; backendStop_.store(false); @@ -132,6 +421,17 @@ class SlamSystem::Impl { backendCv_.notify_one(); if(backendThread_.joinable()) backendThread_.join(); backendStarted_ = false; + +#if defined(HAVE_DBOW3) + if(bowReady_){ + CV_LOG_INFO(NULL, + "Loop/BoW stats: bow_kf_added=" << bowKeyframesAdded_.load() + << ", bow_queries=" << bowQueries_.load() + << ", bow_candidates_total=" << bowCandidatesTotal_.load() + << ", loop_attempts=" << loopAttempts_.load() + << ", loop_accepted=" << loopAccepted_.load()); + } +#endif } void backendLoop(){ @@ -150,6 +450,9 @@ class SlamSystem::Impl { kfs = map_.keyframes(); mps = map_.mappoints(); } + // Preserve pre-optimization state for sanity checks before writeback. + const std::vector kfsPrior = kfs; + const std::vector mpsPrior = mps; lk.unlock(); int K = static_cast(kfs.size()); @@ -165,19 +468,107 @@ class SlamSystem::Impl { for(const auto &o : mp.observations){ if(o.first == lastId) continue; auto it = idToIdx.find(o.first); if(it != idToIdx.end()) shared[it->second]++; } } std::vector localIdx; localIdx.reserve(sysOpts_.backendWindow); - auto pushU = [&](int idx){ if(idx<0||idx>=K) return; if(idx==0||idx==1) return; if(std::find(localIdx.begin(), localIdx.end(), idx)!=localIdx.end()) return; localIdx.push_back(idx); }; + auto pushU = [&](int idx){ if(idx<0||idx>=K) return; if(std::find(localIdx.begin(), localIdx.end(), idx)!=localIdx.end()) return; localIdx.push_back(idx); }; pushU(lastIdx); + // Always include the first two keyframes to anchor gauge (if present). + if(K > 0) pushU(0); + if(K > 1) pushU(1); std::vector> scored(shared.begin(), shared.end()); std::sort(scored.begin(), scored.end(), [](auto a, auto b){ return a.second > b.second; }); for(const auto &p : scored){ if((int)localIdx.size() >= sysOpts_.backendWindow) break; pushU(p.first); } + // Fill remaining slots with the most recent keyframes to enforce a sliding window. + const int maxRecent = std::max(2, lastIdx - sysOpts_.backendWindow + 1); + for(int idx = lastIdx - 1; idx >= maxRecent && (int)localIdx.size() < sysOpts_.backendWindow; --idx){ + pushU(idx); + } if(localIdx.empty()) localIdx.push_back(lastIdx); std::vector fixed{0}; if(K>1) fixed.push_back(1); + // Keep only map points observed by the local window to reduce BA size and outliers. + std::unordered_set localSet(localIdx.begin(), localIdx.end()); + std::vector mpsLocal; mpsLocal.reserve(mps.size()); + for(const auto &mp : mps){ + MapPoint mpFiltered = mp; + mpFiltered.observations.erase( + std::remove_if(mpFiltered.observations.begin(), mpFiltered.observations.end(), + [&](const std::pair &o){ return localSet.find(idToIdx[o.first]) == localSet.end(); }), + mpFiltered.observations.end()); + if(mpFiltered.observations.size() < 2) continue; // need at least two obs in window + mpsLocal.push_back(std::move(mpFiltered)); + } #if defined(HAVE_SFM) - Optimizer::localBundleAdjustmentSFM(kfs, mps, localIdx, fixed, fx_, fy_, cx_, cy_, sysOpts_.backendIterations); - { - std::lock_guard mapLk(mapMutex_); - for(const auto &kf : kfs) map_.applyOptimizedKeyframePose(kf.id, kf.R_w, kf.t_w); - for(const auto &mp : mps) if(mp.id>0) map_.applyOptimizedMapPoint(mp.id, mp.p); + if(localIdx.size() >= 1){ + // Step 1: First do local BA for local consistency + Optimizer::localBundleAdjustmentSFM(kfs, mpsLocal, localIdx, fixed, fx_, fy_, cx_, cy_, sysOpts_.backendIterations); + + // Step 2: Then check for loop closure (global consistency) + bool loopOptimized = false; + if(lastLoopCheckKfId_ != kfs[lastIdx].id) { + lastLoopCheckKfId_ = kfs[lastIdx].id; + loopOptimized = tryLoopClosureAndOptimize(kfs, mps); + } + + // Step 3: If loop closure succeeded, do a light local BA refinement + // This helps integrate the global correction into the local window + if(loopOptimized){ + Optimizer::localBundleAdjustmentSFM(kfs, mpsLocal, localIdx, fixed, fx_, fy_, cx_, cy_, std::max(5, sysOpts_.backendIterations / 2)); + } + + // Build lookup tables for validation against prior state. + struct PosePrior { Mat R; Mat t; }; + std::unordered_map posePrior; posePrior.reserve(kfsPrior.size()); + for(const auto &kf : kfsPrior) posePrior.emplace(kf.id, PosePrior{kf.R_w.clone(), kf.t_w.clone()}); + std::unordered_map pointPrior; pointPrior.reserve(mpsPrior.size()); + for(const auto &mp : mpsPrior) pointPrior.emplace(mp.id, mp.p); + + auto isFinitePose = [](const Mat &R, const Mat &t){ + return !R.empty() && !t.empty() && R.rows == 3 && R.cols == 3 && t.rows == 3 && t.cols == 1 + && cv::checkRange(R) && cv::checkRange(t); + }; + auto poseAcceptable = [&](int id, const Mat &R, const Mat &t){ + if(!isFinitePose(R, t)) return false; + auto it = posePrior.find(id); + if(it == posePrior.end()) return true; + Mat diff = t - it->second.t; + double jump = cv::norm(diff); + return jump < 1000.0 * scale_; + }; + auto isFinitePoint = [](const Point3d &p){ + return std::isfinite(p.x) && std::isfinite(p.y) && std::isfinite(p.z); + }; + auto pointAcceptable = [&](int id, const Point3d &p){ + if(!isFinitePoint(p)) return false; + double normp = std::sqrt(p.x*p.x + p.y*p.y + p.z*p.z); + if(normp > 1e6 * scale_) return false; + auto it = pointPrior.find(id); + if(it == pointPrior.end()) return true; + Point3d dp(p.x - it->second.x, p.y - it->second.y, p.z - it->second.z); + double step = std::sqrt(dp.x*dp.x + dp.y*dp.y + dp.z*dp.z); + return step < 2000.0 * scale_; + }; + + const std::vector* mpsWrite = loopOptimized ? &mps : &mpsLocal; + int poseApplied = 0, pointApplied = 0; + { + std::lock_guard mapLk(mapMutex_); + for(const auto &kf : kfs){ + if(!poseAcceptable(kf.id, kf.R_w, kf.t_w)){ + continue; + } + map_.applyOptimizedKeyframePose(kf.id, kf.R_w, kf.t_w); + poseApplied++; + } + for(const auto &mp : *mpsWrite){ + if(mp.id <= 0) continue; + if(!pointAcceptable(mp.id, mp.p)){ + continue; + } + map_.applyOptimizedMapPoint(mp.id, mp.p); + pointApplied++; + } + } + if(loopOptimized) { + CV_LOG_INFO(NULL, "[PoseGraph] Poses=" << poseApplied << " Points=" << pointApplied); + } } #endif } @@ -197,6 +588,24 @@ class SlamSystem::Impl { double fx_ = 0.0, fy_ = 0.0, cx_ = 0.0, cy_ = 0.0; double scale_ = 1.0; + Ptr loopMatcher_; + int lastLoopCheckKfId_ = -1; + + std::vector poseGraphEdges_; + +#if defined(HAVE_DBOW3) + std::unique_ptr bowVocab_; + std::unique_ptr bowDb_; + std::vector bowEntryToKfId_; + bool bowReady_ = false; + + std::atomic bowKeyframesAdded_{0}; + std::atomic bowQueries_{0}; + std::atomic bowCandidatesTotal_{0}; + std::atomic loopAttempts_{0}; + std::atomic loopAccepted_{0}; +#endif + std::vector trajectory_; std::thread backendThread_; @@ -228,8 +637,10 @@ void SlamSystem::setBackendWindow(int w){ auto opts = impl_->systemOptions(); op void SlamSystem::setBackendIterations(int it){ auto opts = impl_->systemOptions(); opts.backendIterations = std::max(1, it); impl_->setSystemOptions(opts); } TrackingResult SlamSystem::track(InputArray frame, double ts){ return impl_->track(frame, ts); } bool SlamSystem::saveTrajectoryTUM(const std::string& path) const { return impl_->saveTrajectoryTUM(path); } +bool SlamSystem::saveOptimizedTrajectoryTUM(const std::string& path) const { return impl_->saveOptimizedTrajectoryTUM(path); } bool SlamSystem::saveMap(const std::string& path) const { return impl_->saveMap(path); } bool SlamSystem::loadMap(const std::string& path){ return impl_->loadMap(path); } +bool SlamSystem::setLoopVocabulary(const std::string& path){ return impl_->setLoopVocabulary(path); } void SlamSystem::reset(){ impl_->reset(); } const MapManager& SlamSystem::getMap() const { return impl_->map(); } MapManager& SlamSystem::getMapMutable(){ return impl_->mapMutable(); } diff --git a/modules/slam/src/visual_odometry.cpp b/modules/slam/src/visual_odometry.cpp index 8ba05b1af2e..99ea5ccb22c 100644 --- a/modules/slam/src/visual_odometry.cpp +++ b/modules/slam/src/visual_odometry.cpp @@ -41,6 +41,32 @@ class VisualOdometry::Impl { if(t_out.needed()) t_w_.copyTo(t_out); } + // Sync pose from map if current frame corresponds to an optimized keyframe + void syncFromMap(const MapManager* map, int currentFrameId) { + if(!map || currentFrameId < 0) return; + const auto& kfs = map->keyframes(); + if(kfs.empty()) return; + + // Check if current frame is a keyframe that has been optimized + int kfIdx = map->keyframeIndex(currentFrameId); + if(kfIdx >= 0 && kfIdx < static_cast(kfs.size())){ + const auto& kf = kfs[kfIdx]; + // Only sync if the keyframe pose is valid and different from current + if(!kf.R_w.empty() && !kf.t_w.empty() && + kf.R_w.rows == 3 && kf.R_w.cols == 3 && + kf.t_w.rows == 3 && kf.t_w.cols == 1){ + // Check if poses are significantly different (avoid unnecessary updates) + Mat diff = kf.t_w - t_w_; + double translationDiff = cv::norm(diff); + // Only sync if difference is meaningful (avoid noise) + if(translationDiff > 1e-6){ + R_w_ = kf.R_w.clone(); + t_w_ = kf.t_w.clone(); + } + } + } + } + private: Ptr detector_; Ptr matcher_; @@ -93,6 +119,11 @@ TrackingResult VisualOdometry::Impl::track(InputArray frameIn, double timestamp, return res; } + // Sync pose from map if this frame corresponds to an optimized keyframe + if(map && allowMapping && state_ == TrackingState::TRACKING){ + syncFromMap(map, frameId_); + } + Mat frame = frameIn.getMat(); Mat color = frame.channels() == 1 ? Mat() : frame; if(color.empty()){ cvtColor(frame, color, COLOR_GRAY2BGR); } @@ -138,6 +169,10 @@ TrackingResult VisualOdometry::Impl::track(InputArray frameIn, double timestamp, auto tmp = flows; size_t mid = tmp.size()/2; std::nth_element(tmp.begin(), tmp.begin()+mid, tmp.end()); median_flow = tmp[mid]; } + // For visualization: store current-frame match locations and initialize inlier mask. + res.matchPoints = pts2; + res.inlierMask.assign(pts2.size(), 0); + // Localization mode: do not modify the map, use PnP against an existing map with quality checks. if(map && !allowMapping){ Mat R_pnp, t_pnp; int inliers_pnp = 0; bool ok_pnp = false; @@ -175,10 +210,12 @@ TrackingResult VisualOdometry::Impl::track(InputArray frameIn, double timestamp, bool okInit = initializer_.initialize(prevKps_, kps, matches, fx_, fy_, cx_, cy_, R_init, t_init, pts3D, isTri); if(okInit){ Mat prevImg = prevColor_.empty() ? prevGray_ : prevColor_; - KeyFrame kf0(frameId_ - 1, prevImg, prevKps_, prevDesc_, Mat::eye(3,3,CV_64F), Mat::zeros(3,1,CV_64F)); + // Use previous timestamp (approximate) for kf0, current timestamp for kf1 + double prevTimestamp = timestamp > 0.0 ? timestamp - 0.033 : 0.0; // Assume ~30fps if unknown + KeyFrame kf0(frameId_ - 1, prevTimestamp, prevImg, prevKps_, prevDesc_, Mat::eye(3,3,CV_64F), Mat::zeros(3,1,CV_64F)); Mat Rwc1 = R_init.t(); Mat Cw1 = (-Rwc1 * t_init) * scale_; - KeyFrame kf1(frameId_, color, kps, desc, Rwc1, Cw1); + KeyFrame kf1(frameId_, timestamp, color, kps, desc, Rwc1, Cw1); std::vector newMps; newMps.reserve(pts3D.size()); for(size_t i=0;iaddKeyFrame(kf0); map->addKeyFrame(kf1); if(!newMps.empty()) map->addMapPoints(newMps); + map->updateAllMapPointDescriptors(); R_w_ = kf1.R_w.clone(); t_w_ = kf1.t_w.clone(); @@ -211,6 +249,13 @@ TrackingResult VisualOdometry::Impl::track(InputArray frameIn, double timestamp, Mat R_est, t_est, mask_est; int inliers_est = 0; bool ok_est = false; if(pts1.size() >= static_cast(std::max(8, options_.minMatches))){ ok_est = poseEst_.estimate(pts1, pts2, fx_, fy_, cx_, cy_, R_est, t_est, mask_est, inliers_est); + if(ok_est && !mask_est.empty() && mask_est.total() == pts2.size()){ + // Copy inlier flags (mask is CV_8U 0/1 or 0/255) + for(size_t i = 0; i < pts2.size(); ++i){ + uchar v = mask_est.at(static_cast(i)); + res.inlierMask[i] = (v != 0) ? 1 : 0; + } + } } Mat R_pnp, t_pnp; int inliers_pnp = 0; bool ok_pnp = false; int pnpMatches = 0; double pnpMeanReproj = std::numeric_limits::infinity(); @@ -220,7 +265,7 @@ TrackingResult VisualOdometry::Impl::track(InputArray frameIn, double timestamp, nullptr, &pnpMatches, &pnpMeanReproj); if(ok_pnp){ if(pnpMatches > 0 && inliers_pnp < static_cast(pnpMatches * options_.minInlierRatio)) ok_pnp = false; - if(pnpMeanReproj > 5.0) ok_pnp = false; + if(pnpMeanReproj > options_.pnpMaxReprojError) ok_pnp = false; } } @@ -283,7 +328,22 @@ TrackingResult VisualOdometry::Impl::track(InputArray frameIn, double timestamp, if(matchCount > 0 && inliers_use < static_cast(matchCount * options_.minInlierRatio)) insertKf = false; if(insertKf){ - KeyFrame kf(frameId_, color, kps, desc, R_w_, t_w_); + // Before inserting keyframe, check if map has optimized pose for this frame + if(map){ + int kfIdx = map->keyframeIndex(frameId_); + if(kfIdx >= 0){ + const auto& kfs = map->keyframes(); + if(kfIdx < static_cast(kfs.size())){ + const auto& optimizedKf = kfs[kfIdx]; + // Use optimized pose if available + if(!optimizedKf.R_w.empty() && !optimizedKf.t_w.empty()){ + R_w_ = optimizedKf.R_w.clone(); + t_w_ = optimizedKf.t_w.clone(); + } + } + } + } + KeyFrame kf(frameId_, timestamp, color, kps, desc, R_w_, t_w_); map->addKeyFrame(kf); res.keyframeInserted = true; @@ -306,8 +366,11 @@ TrackingResult VisualOdometry::Impl::track(InputArray frameIn, double timestamp, idx1.push_back(m.queryIdx); idx2.push_back(m.trainIdx); } - auto newMps = map->triangulateBetweenLastTwo(pts1n, pts2n, idx1, idx2, lastKf, curKf, fx_, fy_, cx_, cy_); + double parallaxRad = options_.triMinParallaxDeg * CV_PI / 180.0; + auto newMps = map->triangulateBetweenLastTwo(pts1n, pts2n, idx1, idx2, lastKf, curKf, + fx_, fy_, cx_, cy_, options_.triMaxReprojErrorPx, parallaxRad); if(!newMps.empty()) map->addMapPoints(newMps); + map->updateAllMapPointDescriptors(); } } } diff --git a/modules/slam/src/visualizer.cpp b/modules/slam/src/visualizer.cpp index cc1f3947e9b..295824f6085 100644 --- a/modules/slam/src/visualizer.cpp +++ b/modules/slam/src/visualizer.cpp @@ -2,25 +2,53 @@ #include "opencv2/slam/visual_odometry.hpp" #include #include +#include namespace cv { namespace vo { Visualizer::Visualizer(int W, int H, double meters_per_pixel) - : meters_per_pixel_(meters_per_pixel), mapSize_(W,H) + : meters_per_pixel_(meters_per_pixel), dynamic_meters_per_pixel_(meters_per_pixel), world_center_(0.0, 0.0), mapSize_(W,H) { map_ = Mat::zeros(mapSize_, CV_8UC3); } +void Visualizer::updateViewport(){ + if(traj_.empty()){ + dynamic_meters_per_pixel_ = meters_per_pixel_; + world_center_ = Point2d(0.0, 0.0); + return; + } + + double minx = traj_[0].x, maxx = traj_[0].x; + double minz = traj_[0].y, maxz = traj_[0].y; + for(const auto &p : traj_){ + minx = std::min(minx, p.x); maxx = std::max(maxx, p.x); + minz = std::min(minz, p.y); maxz = std::max(maxz, p.y); + } + double rangeX = std::max(1e-6, maxx - minx); + double rangeZ = std::max(1e-6, maxz - minz); + world_center_ = Point2d(0.5 * (minx + maxx), 0.5 * (minz + maxz)); + + const double marginPx = 40.0; + double availX = std::max(1.0, mapSize_.width - 2.0 * marginPx); + double availZ = std::max(1.0, mapSize_.height - 2.0 * marginPx); + double required = std::max(rangeX / availX, rangeZ / availZ); + // If required is tiny (small trajectory), zoom in but cap by nominal scale to avoid extreme zoom. + dynamic_meters_per_pixel_ = std::max(required, meters_per_pixel_ * 0.1); +} + Point Visualizer::worldToPixel(const Point2d &p) const { Point2d origin(mapSize_.width/2.0, mapSize_.height/2.0); - int x = int(origin.x + p.x / meters_per_pixel_); - int y = int(origin.y - p.y / meters_per_pixel_); + // p.x is world x, p.y is world z (stored directly from addPose) + int x = int(origin.x + (p.x - world_center_.x) / dynamic_meters_per_pixel_); + int y = int(origin.y - (p.y - world_center_.y) / dynamic_meters_per_pixel_); // -p.y because image y increases downward return Point(x,y); } void Visualizer::addPose(double x, double z){ - traj_.emplace_back(x,-z); + // worldToPixel will handle the coordinate transformation + traj_.emplace_back(x, z); } void Visualizer::clearTrajectory(){ @@ -37,17 +65,36 @@ void Visualizer::showFrame(const Mat &frame){ imshow("frame", frame); } -void Visualizer::drawTrackingInfo(Mat &frame, const TrackingResult &res){ +void Visualizer::drawTrackingInfo(Mat &frame, const TrackingResult &res, int frame_id){ if(frame.empty()) return; - std::string info = res.ok ? "TRACKING" : "NOT TRACKING"; - info += " matches=" + std::to_string(res.numMatches); - info += " inliers=" + std::to_string(res.numInliers); - info += " state=" + std::to_string(res.state); + // Display frame index (1-based if provided), then status and simple stats. + std::ostringstream ss; + if(frame_id > 0) ss << "Frame=" << frame_id << " "; + ss << (res.ok ? "TRACKING" : "NOT TRACKING"); + ss << " matches=" << res.numMatches; + ss << " inliers=" << res.numInliers; + Scalar color = res.ok ? Scalar(0, 255, 0) : Scalar(0, 0, 255); - putText(frame, info, Point(10, 30), FONT_HERSHEY_SIMPLEX, 0.7, color, 2, LINE_AA); + putText(frame, ss.str(), Point(10, 30), FONT_HERSHEY_SIMPLEX, 0.7, color, 2, LINE_AA); + + // Overlay colors: pure green = matches, pure blue = inliers (inliers override matches) + const Scalar matchColor(0, 255, 0); // pure green (B,G,R) + const Scalar inlierColor(255, 0, 0); // pure blue (B,G,R) + const int radius = 2; + const int thickness = 1; + size_t N = res.matchPoints.size(); + if(N){ + for(size_t i = 0; i < N; ++i){ + bool isInlier = (i < res.inlierMask.size() && res.inlierMask[i]); + const Scalar &c = isInlier ? inlierColor : matchColor; + Point pt(cvRound(res.matchPoints[i].x), cvRound(res.matchPoints[i].y)); + circle(frame, pt, radius, c, thickness, LINE_AA); + } + } } void Visualizer::showTopdown(){ + updateViewport(); map_ = Mat::zeros(mapSize_, CV_8UC3); for (int gx = 0; gx < mapSize_.width; gx += 50) line(map_, Point(gx,0), Point(gx,mapSize_.height), Scalar(30,30,30), 1); for (int gy = 0; gy < mapSize_.height; gy += 50) line(map_, Point(0,gy), Point(mapSize_.width,gy), Scalar(30,30,30), 1); @@ -72,10 +119,10 @@ void Visualizer::showTopdown(){ double norm = std::hypot(dx, dz); if(norm > 1e-6){ dx /= norm; dz /= norm; - // arrow length in world meters - double arrow_m = 0.5; // 0.5 meters - // tail is behind the current position by arrow_m, head (tip) at current position - Point2d tail_world(traj_.back().x - dx * arrow_m, traj_.back().y - dz * arrow_m); + // Keep arrow length constant in screen pixels regardless of world scaling. + const double arrow_px = 25.0; // pixels + const double arrow_world = arrow_px * dynamic_meters_per_pixel_; + Point2d tail_world(traj_.back().x - dx * arrow_world, traj_.back().y - dz * arrow_world); Point tail_px = worldToPixel(tail_world); arrowedLine(map_, tail_px, p, Scalar(0,0,255), 2, LINE_AA, 0, 0.3); }