Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ if(NOT HEADLESS)
include_directories(${Pangolin_INCLUDE_DIRS})
endif()

# Executables
# Test initial setup
# Core libraries
add_library(realsense_camera src/realsense_camera.cpp)
target_link_libraries(realsense_camera ${OpenCV_LIBS} ${realsense2_LIBRARY})
Expand Down Expand Up @@ -83,6 +85,23 @@ set_target_properties(test_initial_setup PROPERTIES
INSTALL_RPATH "/usr/local/lib"
)

# Basic Visual Odometry Tracker
add_executable(basic_vo_tracker src/basic_vo_tracker.cpp)
target_link_libraries(basic_vo_tracker
realsense_camera
orb_detector
initializer
${OpenCV_LIBS}
${realsense2_LIBRARY}
Eigen3::Eigen
)

# RPATH
set_target_properties(basic_vo_tracker PROPERTIES
BUILD_WITH_INSTALL_RPATH TRUE
INSTALL_RPATH_USE_LINK_PATH TRUE
INSTALL_RPATH "/usr/local/lib"
)
# Enable testing
enable_testing()
add_subdirectory(tests)
28 changes: 28 additions & 0 deletions include/initializer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,42 @@
#include "realsense_camera.hpp"
#include "orb_detector.hpp"
#include "map_point.hpp"
#include <opencv2/core.hpp>
#include <Eigen/Core>
#include <vector>

// Structure to hold initial map data
struct InitialMapData {
cv::Mat color;
cv::Mat depth;
std::vector<cv::KeyPoint> keypoints;
cv::Mat descriptors;
std::vector<MapPoint> map_points;
Eigen::Matrix4d initial_pose;
int num_frames_used; // Track how many frames were used for initialization

InitialMapData() {
initial_pose = Eigen::Matrix4d::Identity();
num_frames_used = 1;
}
};

class Initializer {
public:
// Single-frame initialization (backward compatibility)
bool initialize(RealSenseCamera &camera);
bool initialize(RealSenseCamera &camera, InitialMapData &initial_data);

// Multi-frame initialization (recommended for better reliability)
bool initializeMultiFrame(RealSenseCamera &camera, InitialMapData &initial_data, int num_frames = 3);

std::vector<MapPoint> backProjectKeypoints(
const std::vector<cv::KeyPoint> &keypoints,
const cv::Mat &depth,
const rs2_intrinsics &intrinsics);

private:
// Helper methods for multi-frame initialization
bool validateFrameQuality(const std::vector<cv::KeyPoint> &keypoints, const cv::Mat &depth);
bool validateMatchQuality(const std::vector<cv::DMatch> &matches, int min_matches = 50);
};
276 changes: 276 additions & 0 deletions src/basic_vo_tracker.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,276 @@
#include "realsense_camera.hpp"
#include "orb_detector.hpp"
#include "initializer.hpp"
#include <opencv2/opencv.hpp>
#include <opencv2/calib3d.hpp>
#include <Eigen/Core>
#include <Eigen/Geometry>
#include <iostream>
#include <vector>
#include <thread>
#include <chrono>

struct Frame {
cv::Mat color;
cv::Mat depth;
std::vector<cv::KeyPoint> keypoints;
cv::Mat descriptors;
};

// Convert pixel + depth to 3D point using camera intrinsics
cv::Point3f deproject(const cv::Point2f& pixel, float depth_value, const rs2_intrinsics& intrinsics) {
float z = depth_value;
float x = (pixel.x - intrinsics.ppx) * z / intrinsics.fx;
float y = (pixel.y - intrinsics.ppy) * z / intrinsics.fy;
return cv::Point3f(x, y, z);
}

// Estimate pose using PnP RANSAC
bool estimatePose(const std::vector<cv::Point3f>& points3d_prev,
const std::vector<cv::Point2f>& points2d_curr,
const rs2_intrinsics& intrinsics,
cv::Mat& rvec, cv::Mat& tvec) {

if (points3d_prev.size() < 10) {
return false;
}

// Build camera intrinsic matrix from RealSense intrinsics
cv::Mat K = (cv::Mat_<double>(3, 3) << intrinsics.fx, 0, intrinsics.ppx,
0, intrinsics.fy, intrinsics.ppy,
0, 0, 1);

// Use PnP RANSAC to estimate pose
std::vector<int> inliers;
bool success = cv::solvePnPRansac(
points3d_prev,
points2d_curr,
K,
cv::Mat(), // no distortion
rvec,
tvec,
false,
100, // iterations
8.0, // reprojection error threshold
0.99, // confidence
inliers
);

if (success && inliers.size() >= 10) {
std::cout << " [PnP] Inliers: " << inliers.size()
<< "/" << points3d_prev.size() << std::endl;
return true;
}

return false;
}

// Convert rotation vector and translation to transformation matrix
Eigen::Matrix4d toMatrix4d(const cv::Mat& rvec, const cv::Mat& tvec) {
cv::Mat R;
cv::Rodrigues(rvec, R);

Eigen::Matrix4d T = Eigen::Matrix4d::Identity();
T(0, 0) = R.at<double>(0, 0);
T(0, 1) = R.at<double>(0, 1);
T(0, 2) = R.at<double>(0, 2);
T(1, 0) = R.at<double>(1, 0);
T(1, 1) = R.at<double>(1, 1);
T(1, 2) = R.at<double>(1, 2);
T(2, 0) = R.at<double>(2, 0);
T(2, 1) = R.at<double>(2, 1);
T(2, 2) = R.at<double>(2, 2);
T(0, 3) = tvec.at<double>(0);
T(1, 3) = tvec.at<double>(1);
T(2, 3) = tvec.at<double>(2);

return T;
}

void printPose(const Eigen::Matrix4d& pose, int frame_num) {
Eigen::Vector3d translation = pose.block<3, 1>(0, 3);
Eigen::Matrix3d rotation = pose.block<3, 3>(0, 0);

// Convert rotation matrix to Euler angles (roll, pitch, yaw)
Eigen::Vector3d euler = rotation.eulerAngles(0, 1, 2);

std::cout << "\n=== Frame " << frame_num << " Camera Pose ===" << std::endl;
std::cout << "Translation (x, y, z): "
<< translation.x() << ", "
<< translation.y() << ", "
<< translation.z() << " meters" << std::endl;
std::cout << "Rotation (roll, pitch, yaw): "
<< euler.x() * 180.0 / M_PI << ", "
<< euler.y() * 180.0 / M_PI << ", "
<< euler.z() * 180.0 / M_PI << " degrees" << std::endl;
std::cout << "Transformation Matrix:\n" << pose << std::endl;
}

int main(int argc, char** argv) {
try {
std::cout << "=== Integrated Visual Odometry System ===" << std::endl;

// Initialize camera
std::cout << "\n[Step 1] Initializing camera..." << std::endl;
RealSenseCamera camera;
rs2_intrinsics intrinsics = camera.getIntrinsics();

// Initialize map using multi-frame approach (2-3 frames) for better reliability
std::cout << "\n[Step 2] Initializing map with multi-frame approach..." << std::endl;
Initializer initializer;
InitialMapData initial_data;

// Use 3 frames for initialization (configurable)
const int NUM_INIT_FRAMES = 3;

if (!initializer.initializeMultiFrame(camera, initial_data, NUM_INIT_FRAMES)) {
std::cerr << "[ERROR] Multi-frame map initialization failed!" << std::endl;
std::cerr << " Falling back to single-frame initialization..." << std::endl;

// Fallback to single-frame initialization
if (!initializer.initialize(camera, initial_data)) {
std::cerr << "[ERROR] Single-frame initialization also failed!" << std::endl;
return EXIT_FAILURE;
}
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems current initialization only captures a single frame. For more reliable tracking, we can consider multi-frame initialization (like 2–3 frames with triangulated points) to reduce failures in low-texture areas

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Im implemented the multi-frame initialisation (2-3 frames) for more robust tracking.


std::cout << "[Step 2] Map initialization successful!" << std::endl;
std::cout << " - Frames used: " << initial_data.num_frames_used << std::endl;
std::cout << " - Initial keypoints: " << initial_data.keypoints.size() << std::endl;
std::cout << " - Initial 3D map points: " << initial_data.map_points.size() << std::endl;

// Create ORB feature detector for subsequent frames
ORBDetector orb_detector(1000);

// BFMatcher for feature matching
cv::BFMatcher matcher(cv::NORM_HAMMING);

// Initialize tracking with the first frame from initializer
Frame prev_frame, curr_frame;
prev_frame.color = initial_data.color;
prev_frame.depth = initial_data.depth;
prev_frame.keypoints = initial_data.keypoints;
prev_frame.descriptors = initial_data.descriptors;

Eigen::Matrix4d cumulative_pose = initial_data.initial_pose;

int frame_count = 1; // Start from 1 since frame 0 was used for initialization

std::cout << "\n[Step 3] Starting visual odometry tracking...\n" << std::endl;

// Print initial pose
printPose(cumulative_pose, 0);

// Main tracking loop
while (true) {
// 1. Grab synchronized RGB and Depth frames
if (!camera.getFrame(curr_frame.color, curr_frame.depth)) {
std::cerr << "Failed to get frame, skipping..." << std::endl;
continue;
}

// 2. Extract ORB features from RGB frame
orb_detector.detectAndCompute(curr_frame.color, curr_frame.keypoints, curr_frame.descriptors);

std::cout << "Frame " << frame_count << ": Detected "
<< curr_frame.keypoints.size() << " features" << std::endl;

// 3. Track features from previous frame to current frame
if (prev_frame.descriptors.empty() || curr_frame.descriptors.empty()) {
std::cout << " [Warning] No descriptors in one of the frames, skipping..." << std::endl;
prev_frame = curr_frame;
frame_count++;
continue;
}

std::vector<cv::DMatch> matches;
matcher.match(prev_frame.descriptors, curr_frame.descriptors, matches);

// Filter matches by distance
double min_dist = 100.0;
for (const auto& match : matches) {
if (match.distance < min_dist) {
min_dist = match.distance;
}
}

std::vector<cv::DMatch> good_matches;
for (const auto& match : matches) {
if (match.distance <= std::max(2.0 * min_dist, 30.0)) {
good_matches.push_back(match);
}
}

std::cout << " [Matching] Good matches: " << good_matches.size()
<< "/" << matches.size() << std::endl;

if (good_matches.size() < 10) {
std::cout << " [Warning] Not enough good matches, skipping pose estimation..." << std::endl;
prev_frame = curr_frame;
frame_count++;
continue;
}

// 4. Estimate camera's new pose
// Build 3D-2D correspondences
std::vector<cv::Point3f> points3d_prev;
std::vector<cv::Point2f> points2d_curr;
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It currently only uses previous frame points. We can consider using a local map with multiple keyframes instead of just the last frame to improve stability

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you still needing me to implement this or is Damien implementing it?


for (const auto& match : good_matches) {
cv::Point2f pt_prev = prev_frame.keypoints[match.queryIdx].pt;
cv::Point2f pt_curr = curr_frame.keypoints[match.trainIdx].pt;

// Get depth at previous frame keypoint
int x = static_cast<int>(pt_prev.x);
int y = static_cast<int>(pt_prev.y);

if (x >= 0 && x < prev_frame.depth.cols &&
y >= 0 && y < prev_frame.depth.rows) {

float depth_val = prev_frame.depth.at<uint16_t>(y, x) / 1000.0f; // Convert mm to meters

if (depth_val > 0.1f && depth_val < 10.0f) { // valid depth (0.1m - 10m)
cv::Point3f point3d = deproject(pt_prev, depth_val, intrinsics);
points3d_prev.push_back(point3d);
points2d_curr.push_back(pt_curr);
}
}
}

std::cout << " [3D-2D] Valid correspondences: " << points3d_prev.size() << std::endl;

// Estimate pose using PnP
cv::Mat rvec, tvec;
if (estimatePose(points3d_prev, points2d_curr, intrinsics, rvec, tvec)) {
// Convert to transformation matrix
Eigen::Matrix4d relative_pose = toMatrix4d(rvec, tvec);

// Update cumulative pose (T_new = T_old * T_relative^-1)
cumulative_pose = cumulative_pose * relative_pose.inverse();
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we use .inverse() ?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The .inverse() is needed because solvePnPRansac returns the transformation from the current camera frame to the previous camera frame, but we want to accumulate how the camera moved from the previous frame to the current frame in world coordinates.


// 5. Print estimated camera pose
printPose(cumulative_pose, frame_count);
} else {
std::cout << " [Warning] Pose estimation failed, keeping previous pose..." << std::endl;
}

// Update for next iteration
prev_frame = curr_frame;
frame_count++;

// Optional: limit frame rate
std::this_thread::sleep_for(std::chrono::milliseconds(100));

// Optional: break after certain number of frames for testing
// if (frame_count >= 50) break;
}

std::cout << "\nTracking finished." << std::endl;

} catch (const std::exception& e) {
std::cerr << "[EXCEPTION] " << e.what() << std::endl;
return EXIT_FAILURE;
}

return EXIT_SUCCESS;
}
Loading