Cord Path
RepositoryThis is a CLI tool that finds the optimal path over a set of 2D coordinates. I created this project to learn more about the Rust programming language and how to use CUDA C++ for high performance computing.
Nearest Neighbor TSP
This is a simple implementation of the Nearest Neighbor algorithm for the Traveling Salesman Problem. It starts at a given node and always visits the nearest unvisited node.
1fn nearest_neighbor_tsp(dist_matrix: &[f32], n: usize, start: usize) -> Vec<usize> {
2 let mut visited = vec![false; n];
3 let mut path = Vec::with_capacity(n);
4 let mut current = start;
5
6 path.push(current);
7 visited[current] = true;
8
9 for _ in 1..n {
10 let mut next = None;
11 let mut min_dist = f32::MAX;
12
13 for candidate in 0..n {
14 if !visited[candidate] && dist_matrix[current * n + candidate] < min_dist {
15 min_dist = dist_matrix[current * n + candidate];
16 next = Some(candidate);
17 }
18 }
19
20 if let Some(next_node) = next {
21 path.push(next_node);
22 visited[next_node] = true;
23 current = next_node;
24 } else {
25 break;
26 }
27 }
28
29 path
30}
2-opt Optimization
This is a local search optimization algorithm. It removes two edges and replaces them with two different edges to shorten the total path length.
1// 2-opt local optimization
2fn two_opt(dist_matrix: &[f32], n: usize, path: &mut Vec<usize>) {
3 let mut improved = true;
4
5 while improved {
6 improved = false;
7 for i in 1..(n - 1) {
8 for j in (i + 1)..n {
9 let a = path[i - 1];
10 let b = path[i];
11 let c = path[j - 1];
12 let d = path[j];
13
14 let old_dist = dist_matrix[a * n + b] + dist_matrix[c * n + d];
15 let new_dist = dist_matrix[a * n + c] + dist_matrix[b * n + d];
16
17 if new_dist < old_dist {
18 path[i..j].reverse();
19 improved = true;
20 }
21 }
22 }
23 }
24}
CUDA Distance Matrix Kernel
This is the CUDA kernel that computes the distance matrix on the GPU. This significantly improves the performance of the algorithm for a large number of coordinates.
1#include <cuda_runtime.h>
2#include <math.h>
3#include <stdio.h>
4
5#define CHECK_CUDA_ERROR(val) check((val), #val, __FILE__, __LINE__)
6void check(cudaError_t err, const char* func, const char* file, int line) {
7 if (err != cudaSuccess) {
8 fprintf(stderr, "CUDA Error at %s:%d: %s failed with error %s\n", file, line, func, cudaGetErrorString(err));
9 exit(1);
10 }
11}
12
13// Kernel computes dist(i,j) = sqrt((x_i - x_j)^2 + (y_i - y_j)^2)
14__global__ void compute_distance_matrix_kernel(const int* xs, const int* ys, float* dist_matrix, int n) {
15 int i = blockIdx.y * blockDim.y + threadIdx.y; // row
16 int j = blockIdx.x * blockDim.x + threadIdx.x; // col
17
18 if (i < n && j < n) {
19 int dx = xs[i] - xs[j];
20 int dy = ys[i] - ys[j];
21 dist_matrix[i * n + j] = sqrtf(float(dx * dx + dy * dy));
22 }
23}
24
25extern "C" void compute_distance_matrix(const int* h_xs, const int* h_ys, float* h_dist_matrix, int n) {
26 int* d_xs = nullptr;
27 int* d_ys = nullptr;
28 float* d_dist_matrix = nullptr;
29
30 size_t int_size = n * sizeof(int);
31 size_t matrix_size = n * n * sizeof(float);
32
33 CHECK_CUDA_ERROR(cudaMalloc((void**)&d_xs, int_size));
34 CHECK_CUDA_ERROR(cudaMalloc((void**)&d_ys, int_size));
35 CHECK_CUDA_ERROR(cudaMalloc((void**)&d_dist_matrix, matrix_size));
36
37 CHECK_CUDA_ERROR(cudaMemcpy(d_xs, h_xs, int_size, cudaMemcpyHostToDevice));
38 CHECK_CUDA_ERROR(cudaMemcpy(d_ys, h_ys, int_size, cudaMemcpyHostToDevice));
39
40 dim3 threadsPerBlock(16, 16);
41 dim3 blocks((n + threadsPerBlock.x - 1) / threadsPerBlock.x,
42 (n + threadsPerBlock.y - 1) / threadsPerBlock.y);
43
44 compute_distance_matrix_kernel<<<blocks, threadsPerBlock>>>(d_xs, d_ys, d_dist_matrix, n);
45 CHECK_CUDA_ERROR(cudaDeviceSynchronize());
46
47 CHECK_CUDA_ERROR(cudaMemcpy(h_dist_matrix, d_dist_matrix, matrix_size, cudaMemcpyDeviceToHost));
48
49 cudaFree(d_xs);
50 cudaFree(d_ys);
51 cudaFree(d_dist_matrix);
52}