diff --git a/CMakeLists.txt b/CMakeLists.txt index 3613d42..49ada1e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,6 +33,8 @@ if(NOT HEADLESS) include_directories(${Pangolin_INCLUDE_DIRS}) endif() +# Executables +# Test initial setup # Core libraries add_library(realsense_camera src/realsense_camera.cpp) target_link_libraries(realsense_camera ${OpenCV_LIBS} ${realsense2_LIBRARY}) @@ -83,6 +85,23 @@ set_target_properties(test_initial_setup PROPERTIES INSTALL_RPATH "/usr/local/lib" ) +# Basic Visual Odometry Tracker +add_executable(basic_vo_tracker src/basic_vo_tracker.cpp) +target_link_libraries(basic_vo_tracker + realsense_camera + orb_detector + initializer + ${OpenCV_LIBS} + ${realsense2_LIBRARY} + Eigen3::Eigen +) + +# RPATH +set_target_properties(basic_vo_tracker PROPERTIES + BUILD_WITH_INSTALL_RPATH TRUE + INSTALL_RPATH_USE_LINK_PATH TRUE + INSTALL_RPATH "/usr/local/lib" +) # Enable testing enable_testing() add_subdirectory(tests) diff --git a/include/initializer.hpp b/include/initializer.hpp index 0bad997..bcef7e2 100644 --- a/include/initializer.hpp +++ b/include/initializer.hpp @@ -2,14 +2,42 @@ #include "realsense_camera.hpp" #include "orb_detector.hpp" #include "map_point.hpp" +#include +#include #include +// Structure to hold initial map data +struct InitialMapData { + cv::Mat color; + cv::Mat depth; + std::vector keypoints; + cv::Mat descriptors; + std::vector map_points; + Eigen::Matrix4d initial_pose; + int num_frames_used; // Track how many frames were used for initialization + + InitialMapData() { + initial_pose = Eigen::Matrix4d::Identity(); + num_frames_used = 1; + } +}; + class Initializer { public: + // Single-frame initialization (backward compatibility) bool initialize(RealSenseCamera &camera); + bool initialize(RealSenseCamera &camera, InitialMapData &initial_data); + + // Multi-frame initialization (recommended for better reliability) + bool initializeMultiFrame(RealSenseCamera &camera, InitialMapData &initial_data, int num_frames = 3); + std::vector backProjectKeypoints( const std::vector &keypoints, const cv::Mat &depth, const rs2_intrinsics &intrinsics); + private: + // Helper methods for multi-frame initialization + bool validateFrameQuality(const std::vector &keypoints, const cv::Mat &depth); + bool validateMatchQuality(const std::vector &matches, int min_matches = 50); }; diff --git a/src/basic_vo_tracker.cpp b/src/basic_vo_tracker.cpp new file mode 100644 index 0000000..2c9f2f3 --- /dev/null +++ b/src/basic_vo_tracker.cpp @@ -0,0 +1,276 @@ +#include "realsense_camera.hpp" +#include "orb_detector.hpp" +#include "initializer.hpp" +#include +#include +#include +#include +#include +#include +#include +#include + +struct Frame { + cv::Mat color; + cv::Mat depth; + std::vector keypoints; + cv::Mat descriptors; +}; + +// Convert pixel + depth to 3D point using camera intrinsics +cv::Point3f deproject(const cv::Point2f& pixel, float depth_value, const rs2_intrinsics& intrinsics) { + float z = depth_value; + float x = (pixel.x - intrinsics.ppx) * z / intrinsics.fx; + float y = (pixel.y - intrinsics.ppy) * z / intrinsics.fy; + return cv::Point3f(x, y, z); +} + +// Estimate pose using PnP RANSAC +bool estimatePose(const std::vector& points3d_prev, + const std::vector& points2d_curr, + const rs2_intrinsics& intrinsics, + cv::Mat& rvec, cv::Mat& tvec) { + + if (points3d_prev.size() < 10) { + return false; + } + + // Build camera intrinsic matrix from RealSense intrinsics + cv::Mat K = (cv::Mat_(3, 3) << intrinsics.fx, 0, intrinsics.ppx, + 0, intrinsics.fy, intrinsics.ppy, + 0, 0, 1); + + // Use PnP RANSAC to estimate pose + std::vector inliers; + bool success = cv::solvePnPRansac( + points3d_prev, + points2d_curr, + K, + cv::Mat(), // no distortion + rvec, + tvec, + false, + 100, // iterations + 8.0, // reprojection error threshold + 0.99, // confidence + inliers + ); + + if (success && inliers.size() >= 10) { + std::cout << " [PnP] Inliers: " << inliers.size() + << "/" << points3d_prev.size() << std::endl; + return true; + } + + return false; +} + +// Convert rotation vector and translation to transformation matrix +Eigen::Matrix4d toMatrix4d(const cv::Mat& rvec, const cv::Mat& tvec) { + cv::Mat R; + cv::Rodrigues(rvec, R); + + Eigen::Matrix4d T = Eigen::Matrix4d::Identity(); + T(0, 0) = R.at(0, 0); + T(0, 1) = R.at(0, 1); + T(0, 2) = R.at(0, 2); + T(1, 0) = R.at(1, 0); + T(1, 1) = R.at(1, 1); + T(1, 2) = R.at(1, 2); + T(2, 0) = R.at(2, 0); + T(2, 1) = R.at(2, 1); + T(2, 2) = R.at(2, 2); + T(0, 3) = tvec.at(0); + T(1, 3) = tvec.at(1); + T(2, 3) = tvec.at(2); + + return T; +} + +void printPose(const Eigen::Matrix4d& pose, int frame_num) { + Eigen::Vector3d translation = pose.block<3, 1>(0, 3); + Eigen::Matrix3d rotation = pose.block<3, 3>(0, 0); + + // Convert rotation matrix to Euler angles (roll, pitch, yaw) + Eigen::Vector3d euler = rotation.eulerAngles(0, 1, 2); + + std::cout << "\n=== Frame " << frame_num << " Camera Pose ===" << std::endl; + std::cout << "Translation (x, y, z): " + << translation.x() << ", " + << translation.y() << ", " + << translation.z() << " meters" << std::endl; + std::cout << "Rotation (roll, pitch, yaw): " + << euler.x() * 180.0 / M_PI << ", " + << euler.y() * 180.0 / M_PI << ", " + << euler.z() * 180.0 / M_PI << " degrees" << std::endl; + std::cout << "Transformation Matrix:\n" << pose << std::endl; +} + +int main(int argc, char** argv) { + try { + std::cout << "=== Integrated Visual Odometry System ===" << std::endl; + + // Initialize camera + std::cout << "\n[Step 1] Initializing camera..." << std::endl; + RealSenseCamera camera; + rs2_intrinsics intrinsics = camera.getIntrinsics(); + + // Initialize map using multi-frame approach (2-3 frames) for better reliability + std::cout << "\n[Step 2] Initializing map with multi-frame approach..." << std::endl; + Initializer initializer; + InitialMapData initial_data; + + // Use 3 frames for initialization (configurable) + const int NUM_INIT_FRAMES = 3; + + if (!initializer.initializeMultiFrame(camera, initial_data, NUM_INIT_FRAMES)) { + std::cerr << "[ERROR] Multi-frame map initialization failed!" << std::endl; + std::cerr << " Falling back to single-frame initialization..." << std::endl; + + // Fallback to single-frame initialization + if (!initializer.initialize(camera, initial_data)) { + std::cerr << "[ERROR] Single-frame initialization also failed!" << std::endl; + return EXIT_FAILURE; + } + } + + std::cout << "[Step 2] Map initialization successful!" << std::endl; + std::cout << " - Frames used: " << initial_data.num_frames_used << std::endl; + std::cout << " - Initial keypoints: " << initial_data.keypoints.size() << std::endl; + std::cout << " - Initial 3D map points: " << initial_data.map_points.size() << std::endl; + + // Create ORB feature detector for subsequent frames + ORBDetector orb_detector(1000); + + // BFMatcher for feature matching + cv::BFMatcher matcher(cv::NORM_HAMMING); + + // Initialize tracking with the first frame from initializer + Frame prev_frame, curr_frame; + prev_frame.color = initial_data.color; + prev_frame.depth = initial_data.depth; + prev_frame.keypoints = initial_data.keypoints; + prev_frame.descriptors = initial_data.descriptors; + + Eigen::Matrix4d cumulative_pose = initial_data.initial_pose; + + int frame_count = 1; // Start from 1 since frame 0 was used for initialization + + std::cout << "\n[Step 3] Starting visual odometry tracking...\n" << std::endl; + + // Print initial pose + printPose(cumulative_pose, 0); + + // Main tracking loop + while (true) { + // 1. Grab synchronized RGB and Depth frames + if (!camera.getFrame(curr_frame.color, curr_frame.depth)) { + std::cerr << "Failed to get frame, skipping..." << std::endl; + continue; + } + + // 2. Extract ORB features from RGB frame + orb_detector.detectAndCompute(curr_frame.color, curr_frame.keypoints, curr_frame.descriptors); + + std::cout << "Frame " << frame_count << ": Detected " + << curr_frame.keypoints.size() << " features" << std::endl; + + // 3. Track features from previous frame to current frame + if (prev_frame.descriptors.empty() || curr_frame.descriptors.empty()) { + std::cout << " [Warning] No descriptors in one of the frames, skipping..." << std::endl; + prev_frame = curr_frame; + frame_count++; + continue; + } + + std::vector matches; + matcher.match(prev_frame.descriptors, curr_frame.descriptors, matches); + + // Filter matches by distance + double min_dist = 100.0; + for (const auto& match : matches) { + if (match.distance < min_dist) { + min_dist = match.distance; + } + } + + std::vector good_matches; + for (const auto& match : matches) { + if (match.distance <= std::max(2.0 * min_dist, 30.0)) { + good_matches.push_back(match); + } + } + + std::cout << " [Matching] Good matches: " << good_matches.size() + << "/" << matches.size() << std::endl; + + if (good_matches.size() < 10) { + std::cout << " [Warning] Not enough good matches, skipping pose estimation..." << std::endl; + prev_frame = curr_frame; + frame_count++; + continue; + } + + // 4. Estimate camera's new pose + // Build 3D-2D correspondences + std::vector points3d_prev; + std::vector points2d_curr; + + for (const auto& match : good_matches) { + cv::Point2f pt_prev = prev_frame.keypoints[match.queryIdx].pt; + cv::Point2f pt_curr = curr_frame.keypoints[match.trainIdx].pt; + + // Get depth at previous frame keypoint + int x = static_cast(pt_prev.x); + int y = static_cast(pt_prev.y); + + if (x >= 0 && x < prev_frame.depth.cols && + y >= 0 && y < prev_frame.depth.rows) { + + float depth_val = prev_frame.depth.at(y, x) / 1000.0f; // Convert mm to meters + + if (depth_val > 0.1f && depth_val < 10.0f) { // valid depth (0.1m - 10m) + cv::Point3f point3d = deproject(pt_prev, depth_val, intrinsics); + points3d_prev.push_back(point3d); + points2d_curr.push_back(pt_curr); + } + } + } + + std::cout << " [3D-2D] Valid correspondences: " << points3d_prev.size() << std::endl; + + // Estimate pose using PnP + cv::Mat rvec, tvec; + if (estimatePose(points3d_prev, points2d_curr, intrinsics, rvec, tvec)) { + // Convert to transformation matrix + Eigen::Matrix4d relative_pose = toMatrix4d(rvec, tvec); + + // Update cumulative pose (T_new = T_old * T_relative^-1) + cumulative_pose = cumulative_pose * relative_pose.inverse(); + + // 5. Print estimated camera pose + printPose(cumulative_pose, frame_count); + } else { + std::cout << " [Warning] Pose estimation failed, keeping previous pose..." << std::endl; + } + + // Update for next iteration + prev_frame = curr_frame; + frame_count++; + + // Optional: limit frame rate + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + + // Optional: break after certain number of frames for testing + // if (frame_count >= 50) break; + } + + std::cout << "\nTracking finished." << std::endl; + + } catch (const std::exception& e) { + std::cerr << "[EXCEPTION] " << e.what() << std::endl; + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/src/initializer.cpp b/src/initializer.cpp index 004f93e..1c7bd1e 100644 --- a/src/initializer.cpp +++ b/src/initializer.cpp @@ -1,8 +1,15 @@ #include "initializer.hpp" #include #include +#include +#include bool Initializer::initialize(RealSenseCamera &camera) { + InitialMapData dummy_data; + return initialize(camera, dummy_data); +} + +bool Initializer::initialize(RealSenseCamera &camera, InitialMapData &initial_data) { std::cout << "[Initializer] Starting map initialization..." << std::endl; // Capture a single RGB-D frame @@ -12,6 +19,10 @@ bool Initializer::initialize(RealSenseCamera &camera) { return false; } + // Store frame data + initial_data.color = color.clone(); + initial_data.depth = depth.clone(); + std::cout << "[Initializer] Captured RGB-D frame: " << color.cols << "x" << color.rows << std::endl; @@ -42,15 +53,22 @@ bool Initializer::initialize(RealSenseCamera &camera) { return false; } + // Store keypoints and descriptors + initial_data.keypoints = keypoints; + initial_data.descriptors = descriptors.clone(); + // Extract 3D coordinates using depth map rs2_intrinsics intrinsics = camera.getIntrinsics(); std::vector map_points = backProjectKeypoints(keypoints, depth, intrinsics); + // Store map points + initial_data.map_points = map_points; + std::cout << "[Initializer] Created " << map_points.size() << " 3D map points" << std::endl; // Set the first camera pose (identity matrix for the initial frame) - cv::Mat camera_pose = cv::Mat::eye(4, 4, CV_64F); + initial_data.initial_pose = Eigen::Matrix4d::Identity(); std::cout << "[Initializer] Set initial camera pose (identity matrix)" << std::endl; // Print some statistics @@ -106,3 +124,203 @@ std::vector Initializer::backProjectKeypoints( return map_points; } + +// Multi-frame initialization for more reliable tracking +bool Initializer::initializeMultiFrame(RealSenseCamera &camera, InitialMapData &initial_data, int num_frames) { + std::cout << "[Initializer] Starting multi-frame initialization (" << num_frames << " frames)..." << std::endl; + + if (num_frames < 2) { + std::cerr << "[Initializer] Warning: num_frames < 2, falling back to single-frame initialization" << std::endl; + return initialize(camera, initial_data); + } + + // Clamp num_frames to reasonable range + if (num_frames > 5) { + std::cout << "[Initializer] Clamping num_frames from " << num_frames << " to 5" << std::endl; + num_frames = 5; + } + + // Setup ORB detector and matcher + ORBDetector orb_detector(1000); + cv::BFMatcher matcher(cv::NORM_HAMMING); + rs2_intrinsics intrinsics = camera.getIntrinsics(); + + // Storage for all frames + struct FrameData { + cv::Mat color; + cv::Mat depth; + std::vector keypoints; + cv::Mat descriptors; + }; + + std::vector frames; + frames.reserve(num_frames); + + // Step 1: Capture multiple frames + std::cout << "[Initializer] Capturing " << num_frames << " frames..." << std::endl; + for (int i = 0; i < num_frames; ++i) { + FrameData frame; + + if (!camera.getFrame(frame.color, frame.depth)) { + std::cerr << "[Initializer] Failed to capture frame " << i << std::endl; + return false; + } + + // Detect features + orb_detector.detectAndCompute(frame.color, frame.keypoints, frame.descriptors); + + std::cout << " Frame " << i << ": " << frame.keypoints.size() << " keypoints" << std::endl; + + // Validate frame quality + if (!validateFrameQuality(frame.keypoints, frame.depth)) { + std::cerr << "[Initializer] Frame " << i << " quality check failed, retrying..." << std::endl; + --i; // Retry this frame + continue; + } + + frames.push_back(frame); + + // Small delay between frames to allow camera motion + std::this_thread::sleep_for(std::chrono::milliseconds(200)); + } + + // Step 2: Match features across consecutive frames to validate motion + std::cout << "[Initializer] Validating feature tracking across frames..." << std::endl; + int total_good_matches = 0; + + for (size_t i = 0; i < frames.size() - 1; ++i) { + std::vector matches; + matcher.match(frames[i].descriptors, frames[i+1].descriptors, matches); + + // Filter matches by distance + double min_dist = 100.0; + for (const auto& match : matches) { + if (match.distance < min_dist) { + min_dist = match.distance; + } + } + + std::vector good_matches; + for (const auto& match : matches) { + if (match.distance <= std::max(2.0 * min_dist, 30.0)) { + good_matches.push_back(match); + } + } + + std::cout << " Frame " << i << " -> Frame " << (i+1) << ": " + << good_matches.size() << " good matches" << std::endl; + + if (!validateMatchQuality(good_matches)) { + std::cerr << "[Initializer] Insufficient matches between frames " << i + << " and " << (i+1) << std::endl; + return false; + } + + total_good_matches += good_matches.size(); + } + + std::cout << "[Initializer] Average matches per frame pair: " + << (total_good_matches / (frames.size() - 1)) << std::endl; + + // Step 3: Use the middle frame as reference (most stable) + int reference_idx = frames.size() / 2; + std::cout << "[Initializer] Using frame " << reference_idx << " as reference" << std::endl; + + FrameData& ref_frame = frames[reference_idx]; + + // Step 4: Aggregate map points from all frames + std::cout << "[Initializer] Building initial map from all frames..." << std::endl; + std::vector all_map_points; + + for (size_t i = 0; i < frames.size(); ++i) { + std::vector frame_points = backProjectKeypoints( + frames[i].keypoints, + frames[i].depth, + intrinsics + ); + + all_map_points.insert(all_map_points.end(), frame_points.begin(), frame_points.end()); + std::cout << " Frame " << i << ": " << frame_points.size() << " 3D points" << std::endl; + } + + // Step 5: Populate initial_data with reference frame and all map points + initial_data.color = ref_frame.color.clone(); + initial_data.depth = ref_frame.depth.clone(); + initial_data.keypoints = ref_frame.keypoints; + initial_data.descriptors = ref_frame.descriptors.clone(); + initial_data.map_points = all_map_points; + initial_data.initial_pose = Eigen::Matrix4d::Identity(); + initial_data.num_frames_used = num_frames; + + // Save debug images from reference frame + cv::imwrite("debug_captured_color.png", ref_frame.color); + cv::imwrite("debug_captured_depth.png", ref_frame.depth); + + cv::Mat gray; + cv::cvtColor(ref_frame.color, gray, cv::COLOR_BGR2GRAY); + cv::imwrite("debug_captured_gray.png", gray); + + cv::Mat enhanced; + cv::equalizeHist(gray, enhanced); + cv::imwrite("debug_captured_enhanced.png", enhanced); + + // Step 6: Print statistics + int valid_3d_points = 0; + for (const auto &mp : all_map_points) { + if (mp.position.z > 0.1f && mp.position.z < 10.0f) { + valid_3d_points++; + } + } + + std::cout << "[Initializer] Multi-frame initialization completed successfully!" << std::endl; + std::cout << " - Frames used: " << num_frames << std::endl; + std::cout << " - Reference frame keypoints: " << ref_frame.keypoints.size() << std::endl; + std::cout << " - Total 3D map points: " << all_map_points.size() << std::endl; + std::cout << " - Valid 3D points: " << valid_3d_points << std::endl; + + return true; +} + +// Validate frame quality based on keypoint count and depth coverage +bool Initializer::validateFrameQuality(const std::vector &keypoints, const cv::Mat &depth) { + const int MIN_KEYPOINTS = 100; + + if (keypoints.size() < MIN_KEYPOINTS) { + std::cerr << " [Quality Check] Too few keypoints: " << keypoints.size() + << " < " << MIN_KEYPOINTS << std::endl; + return false; + } + + // Check depth validity + int valid_depth_count = 0; + for (const auto &kp : keypoints) { + int u = static_cast(kp.pt.x); + int v = static_cast(kp.pt.y); + + if (u >= 0 && u < depth.cols && v >= 0 && v < depth.rows) { + float depth_value = depth.at(v, u) / 1000.0f; + if (depth_value > 0.1f && depth_value < 10.0f) { + valid_depth_count++; + } + } + } + + float valid_ratio = static_cast(valid_depth_count) / keypoints.size(); + const float MIN_VALID_RATIO = 0.3f; + + if (valid_ratio < MIN_VALID_RATIO) { + std::cerr << " [Quality Check] Too few valid depth values: " + << valid_ratio * 100.0f << "% < " << MIN_VALID_RATIO * 100.0f << "%" << std::endl; + return false; + } + + return true; +} + +// Validate match quality between frames +bool Initializer::validateMatchQuality(const std::vector &matches, int min_matches) { + if (matches.size() < static_cast(min_matches)) { + return false; + } + return true; +}