HomeProjectsContactGitHub

Cord Path

Repository

This is a CLI tool that finds the optimal path over a set of 2D coordinates. I created this project to learn more about the Rust programming language and how to use CUDA C++ for high performance computing.

Nearest Neighbor TSP

This is a simple implementation of the Nearest Neighbor algorithm for the Traveling Salesman Problem. It starts at a given node and always visits the nearest unvisited node.

1fn nearest_neighbor_tsp(dist_matrix: &[f32], n: usize, start: usize) -> Vec<usize> {
2    let mut visited = vec![false; n];
3    let mut path = Vec::with_capacity(n);
4    let mut current = start;
5
6    path.push(current);
7    visited[current] = true;
8
9    for _ in 1..n {
10        let mut next = None;
11        let mut min_dist = f32::MAX;
12
13        for candidate in 0..n {
14            if !visited[candidate] && dist_matrix[current * n + candidate] < min_dist {
15                min_dist = dist_matrix[current * n + candidate];
16                next = Some(candidate);
17            }
18        }
19
20        if let Some(next_node) = next {
21            path.push(next_node);
22            visited[next_node] = true;
23            current = next_node;
24        } else {
25            break;
26        }
27    }
28
29    path
30}

2-opt Optimization

This is a local search optimization algorithm. It removes two edges and replaces them with two different edges to shorten the total path length.

1// 2-opt local optimization
2fn two_opt(dist_matrix: &[f32], n: usize, path: &mut Vec<usize>) {
3    let mut improved = true;
4
5    while improved {
6        improved = false;
7        for i in 1..(n - 1) {
8            for j in (i + 1)..n {
9                let a = path[i - 1];
10                let b = path[i];
11                let c = path[j - 1];
12                let d = path[j];
13
14                let old_dist = dist_matrix[a * n + b] + dist_matrix[c * n + d];
15                let new_dist = dist_matrix[a * n + c] + dist_matrix[b * n + d];
16
17                if new_dist < old_dist {
18                    path[i..j].reverse();
19                    improved = true;
20                }
21            }
22        }
23    }
24}

CUDA Distance Matrix Kernel

This is the CUDA kernel that computes the distance matrix on the GPU. This significantly improves the performance of the algorithm for a large number of coordinates.

1#include <cuda_runtime.h>
2#include <math.h>
3#include <stdio.h>
4
5#define CHECK_CUDA_ERROR(val) check((val), #val, __FILE__, __LINE__)
6void check(cudaError_t err, const char* func, const char* file, int line) {
7    if (err != cudaSuccess) {
8        fprintf(stderr, "CUDA Error at %s:%d: %s failed with error %s\n", file, line, func, cudaGetErrorString(err));
9        exit(1);
10    }
11}
12
13// Kernel computes dist(i,j) = sqrt((x_i - x_j)^2 + (y_i - y_j)^2)
14__global__ void compute_distance_matrix_kernel(const int* xs, const int* ys, float* dist_matrix, int n) {
15    int i = blockIdx.y * blockDim.y + threadIdx.y; // row
16    int j = blockIdx.x * blockDim.x + threadIdx.x; // col
17
18    if (i < n && j < n) {
19        int dx = xs[i] - xs[j];
20        int dy = ys[i] - ys[j];
21        dist_matrix[i * n + j] = sqrtf(float(dx * dx + dy * dy));
22    }
23}
24
25extern "C" void compute_distance_matrix(const int* h_xs, const int* h_ys, float* h_dist_matrix, int n) {
26    int* d_xs = nullptr;
27    int* d_ys = nullptr;
28    float* d_dist_matrix = nullptr;
29
30    size_t int_size = n * sizeof(int);
31    size_t matrix_size = n * n * sizeof(float);
32
33    CHECK_CUDA_ERROR(cudaMalloc((void**)&d_xs, int_size));
34    CHECK_CUDA_ERROR(cudaMalloc((void**)&d_ys, int_size));
35    CHECK_CUDA_ERROR(cudaMalloc((void**)&d_dist_matrix, matrix_size));
36
37    CHECK_CUDA_ERROR(cudaMemcpy(d_xs, h_xs, int_size, cudaMemcpyHostToDevice));
38    CHECK_CUDA_ERROR(cudaMemcpy(d_ys, h_ys, int_size, cudaMemcpyHostToDevice));
39
40    dim3 threadsPerBlock(16, 16);
41    dim3 blocks((n + threadsPerBlock.x - 1) / threadsPerBlock.x,
42                (n + threadsPerBlock.y - 1) / threadsPerBlock.y);
43
44    compute_distance_matrix_kernel<<<blocks, threadsPerBlock>>>(d_xs, d_ys, d_dist_matrix, n);
45    CHECK_CUDA_ERROR(cudaDeviceSynchronize());
46
47    CHECK_CUDA_ERROR(cudaMemcpy(h_dist_matrix, d_dist_matrix, matrix_size, cudaMemcpyDeviceToHost));
48
49    cudaFree(d_xs);
50    cudaFree(d_ys);
51    cudaFree(d_dist_matrix);
52}