Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
69 commits
Select commit Hold shift + click to select a range
9929b80
add C code to test kernels
BiradarSiddhant02 Jul 9, 2024
870ad96
change compiling and linking process
BiradarSiddhant02 Jul 10, 2024
48d2e3a
implement most basic column major matrix multiplication kernel in c-lisp
BiradarSiddhant02 Jul 10, 2024
d6114c3
change format
BiradarSiddhant02 Jul 10, 2024
3e2d36b
add performance testing functions
BiradarSiddhant02 Jul 10, 2024
c51a0ae
include more error handling
BiradarSiddhant02 Jul 10, 2024
093922f
abstract away functions
BiradarSiddhant02 Jul 10, 2024
3e60c5b
add makefile
BiradarSiddhant02 Jul 10, 2024
33eb715
add all_close function
BiradarSiddhant02 Jul 10, 2024
6389398
remove reinitialization of functions in c-lisp file
BiradarSiddhant02 Jul 10, 2024
470decb
change filename : runtime.c to main.c
BiradarSiddhant02 Jul 10, 2024
091ebb2
.ll files for analysis and benchmarks
BiradarSiddhant02 Jul 11, 2024
c8706ae
delete : .ll files
BiradarSiddhant02 Jul 11, 2024
b295aab
change compiling and linking routine
BiradarSiddhant02 Jul 11, 2024
2c12e50
fix loop variables in MMult1.sexp
BiradarSiddhant02 Jul 11, 2024
48d4426
returns 0 is NaN
BiradarSiddhant02 Jul 12, 2024
76f61b2
kernel file MMult2.sexp
BiradarSiddhant02 Jul 12, 2024
b42616f
make indexing consistent with reference code
BiradarSiddhant02 Jul 12, 2024
f9c693b
kernel passes tests
BiradarSiddhant02 Jul 13, 2024
b854e1d
supress warnings
BiradarSiddhant02 Jul 13, 2024
7cdf36f
fix bug where C was not initialized to 0
BiradarSiddhant02 Jul 13, 2024
d012b35
compile and link each kernel seperately
BiradarSiddhant02 Jul 15, 2024
b7aa98f
Add kernel MMult1_1x4_3.sexp
BiradarSiddhant02 Jul 15, 2024
2dd2232
C code equivalent to MMult_1x4_4
BiradarSiddhant02 Jul 15, 2024
5c7ad18
add kernel MMult_1x4_4.sexp
BiradarSiddhant02 Jul 15, 2024
de3a3dd
Merge branch 'chsasank:main' into kernel_branch
BiradarSiddhant02 Jul 16, 2024
09df759
change compiling and linking routine
BiradarSiddhant02 Jul 16, 2024
f55909b
make kernel function interfaces consistent
BiradarSiddhant02 Jul 16, 2024
6d2d59a
ignore assembly files
BiradarSiddhant02 Jul 16, 2024
1dc6e21
abstract away each kernels
BiradarSiddhant02 Jul 16, 2024
8926ddc
change how then benchmark routine is run
BiradarSiddhant02 Jul 16, 2024
043f9ec
change function names to differentiate between C and C-lisp
BiradarSiddhant02 Jul 16, 2024
8539a38
bash script to support new abstraction with kernels
BiradarSiddhant02 Jul 16, 2024
5a0b043
header file containing parameters for benchmarking
BiradarSiddhant02 Jul 16, 2024
21b80a6
change filename : draw.py -> bench.py
BiradarSiddhant02 Jul 16, 2024
9082661
add folder to be ignored
BiradarSiddhant02 Jul 16, 2024
8902bdd
fix loops
BiradarSiddhant02 Jul 18, 2024
f81177d
add kernel MMult_1x4_5.sexp
BiradarSiddhant02 Jul 18, 2024
27ef255
add new argument for mode of benchmarking
BiradarSiddhant02 Jul 18, 2024
8e59b25
make sure y-axis is fixed
BiradarSiddhant02 Jul 18, 2024
1798117
include all kernels
BiradarSiddhant02 Jul 18, 2024
4dfaa6f
add kernel MMult_4x4_3
BiradarSiddhant02 Jul 18, 2024
911797d
add kernel MMult_4x4_4
BiradarSiddhant02 Jul 18, 2024
d627a67
add kernel MMult_4x4_5
BiradarSiddhant02 Jul 18, 2024
5b93eb7
clean up
BiradarSiddhant02 Jul 18, 2024
ccec401
bug fixes
BiradarSiddhant02 Jul 20, 2024
2da5da0
add option to print the matrix and plot benchmark results if desired
BiradarSiddhant02 Jul 20, 2024
7c5c389
add kernel MMult_4x4_6
BiradarSiddhant02 Jul 20, 2024
ea7aa6f
fix code mistake : for loops and array indices
BiradarSiddhant02 Jul 24, 2024
9c15864
Merge branch 'chsasank:main' into kernel_branch
BiradarSiddhant02 Jul 26, 2024
4690708
move c-lisp kernels
BiradarSiddhant02 Jul 26, 2024
2dfd964
move C kernels
BiradarSiddhant02 Jul 26, 2024
a5bd5ac
change compiling. linking, executing and benchmarking routine
BiradarSiddhant02 Jul 26, 2024
d850086
MMult1 : use macros
BiradarSiddhant02 Jul 27, 2024
82ac370
MMult2 : use macros
BiradarSiddhant02 Jul 27, 2024
4068bf7
fix bug where IR of the c-lisp code was not being generated
BiradarSiddhant02 Jul 27, 2024
7fa58fd
MMult_1x4_3 : use macros
BiradarSiddhant02 Jul 28, 2024
1c1f94f
MMult_1x4_4 : use macros
BiradarSiddhant02 Jul 28, 2024
780862d
delete file : MMult_4x4_6
BiradarSiddhant02 Jul 28, 2024
5c15f43
MMult_1x4_5 : add macros
BiradarSiddhant02 Jul 28, 2024
148442f
MMult_4x4_3 : add macros
BiradarSiddhant02 Jul 28, 2024
99dd3aa
MMult_4x4_4 : add macros
BiradarSiddhant02 Jul 28, 2024
e83f384
MMult_4x4_5 : add macros
BiradarSiddhant02 Jul 28, 2024
7455843
clean up and formatting
BiradarSiddhant02 Jul 28, 2024
719deb7
formatting
BiradarSiddhant02 Jul 28, 2024
51b0d24
change benchmarking parameters
BiradarSiddhant02 Jul 28, 2024
a39da9f
fix macros in MMult_4x4_4 and MMult_4x4_5
BiradarSiddhant02 Jul 31, 2024
7f3e42b
potential error fix
BiradarSiddhant02 Jul 31, 2024
1756af8
add inline docs
BiradarSiddhant02 Sep 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/backend/tests/c-lisp/kernel/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
build
tmp
plots
*.s
168 changes: 168 additions & 0 deletions src/backend/tests/c-lisp/kernel/bench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns # type: ignore
import os
import argparse
from datetime import datetime
import subprocess

"""
Kernel Benchmarking Script
This script benchmarks various kernels written in C and Lisp.
It compiles them to LLVM IR, generates object files, executes the benchmarks,
and records performance metrics. The results are visualized using matplotlib and seaborn.

Requirements:
- Python 3.x
- pandas: For handling CSV data.
- matplotlib: For plotting graphs.
- seaborn: For improved plotting aesthetics.
- Guile: For processing .sexp files.
- Clang: For compiling C code.
- Linux/MacOS: Designed to run in a Unix-like environment.

Usage:
To run the benchmarking script, use the command line to specify the kernel you want to benchmark
along with any desired options:
python benchmark.py -k <kernel_name> -m <mode> -p <print_flag>

Command-Line Arguments:
-k (string): Name of the kernel to benchmark. Default is "MMult1". Refer to kernels.txt for options.
-m (string): Mode of execution. Default is "once".
-p (int): Print resultant matrix flag. Default is 0.
"""

parser = argparse.ArgumentParser(
description=" -k, type of kernel to be benchmarked. cat ./kernels.txt for more info"
)

parser.add_argument("-k", type=str, default="MMult1", help="kernel name")
parser.add_argument("-m", type=str, default="once", help="Mode")
parser.add_argument("-p", type=int, default=0, help="Print resultant matrix")

args = parser.parse_args()

BUILD_DIRECTORY = "build"
C_KERNELS_DIRECTORY = "kernels_C"
C_LISP_KERNELS_DIRECTORY = "kernels_c-lisp"
RUNTIME_DIRECTORY = "runtime"
TMP = "tmp"
PLOTS = "plots"

CC = "clang"
OPTIMIZATION = "-O1"
FLAGS = "-Wno-implicit-function-declaration -Wno-override-module"

# Create directories if they don't exist
os.makedirs(BUILD_DIRECTORY, exist_ok=True)
os.makedirs(f"{BUILD_DIRECTORY}/ir", exist_ok=True)
os.makedirs(f"{BUILD_DIRECTORY}/obj", exist_ok=True)
os.makedirs(TMP, exist_ok=True)
os.makedirs(PLOTS, exist_ok=True)

# Clean the obj directory
for file in os.listdir(f"{BUILD_DIRECTORY}/obj"):
file_path = os.path.join(f"{BUILD_DIRECTORY}/obj", file)
if os.path.isfile(file_path):
os.unlink(file_path)

# Clean the ir directory
for file in os.listdir(f"{BUILD_DIRECTORY}/ir"):
file_path = os.path.join(f"{BUILD_DIRECTORY}/ir", file)
if os.path.isfile(file_path):
os.unlink(file_path)

# Compile runtime and both kernels
C_kernel = f"{C_KERNELS_DIRECTORY}/{args.k}.c"
c_lisp_kernel = f"{C_LISP_KERNELS_DIRECTORY}/{args.k}.sexp"
macro_file = f"{C_LISP_KERNELS_DIRECTORY}.{args.k}.py"
ir_file = f"{BUILD_DIRECTORY}/ir/{args.k}.ll"
print(ir_file)
C_obj = f"{BUILD_DIRECTORY}/obj/{args.k}_c.o"
c_lisp_obj = f"{BUILD_DIRECTORY}/obj/{args.k}_c-lisp.o"
executable = f"{BUILD_DIRECTORY}/kernel_bench"

# Compile the .sexp kernel
cmd = (
f"guile ../../../utils/sexp-json.scm < {c_lisp_kernel} | "
f"python ../../../prelisp.py {macro_file} |"
f"python ../../../c-lisp.py | "
f"python ../../../brilisp.py | "
f"python ../../../llvm.py > {ir_file}"
)
print(f"Executing command to generate LLVM IR:\n{cmd}")
subprocess.run(cmd, shell=True, check=True)

if os.path.exists(ir_file):
print(f"Successfully created {ir_file}")
if not os.path.exists(c_lisp_obj):
compile_cmd = f"{CC} {OPTIMIZATION} {FLAGS} -c -o {c_lisp_obj} {ir_file}"
print(f"Compiling LLVM IR to object file:\n{compile_cmd}")
subprocess.run(compile_cmd, shell=True, check=True)
else:
print(f"Failed to create {ir_file}")

# Compile the C kernel
if not os.path.exists(C_obj):
compile_cmd = f"{CC} {OPTIMIZATION} {FLAGS} -c -o {C_obj} {C_kernel}"
print(f"Compiling C kernel to object file:\n{compile_cmd}")
subprocess.run(compile_cmd, shell=True, check=True)

# Compile main.c and matrix.c
main_file = f"{RUNTIME_DIRECTORY}/main.c"
main_object = f"{BUILD_DIRECTORY}/obj/main.o"
matrix_file = f"{RUNTIME_DIRECTORY}/matrix.c"
matrix_object = f"{BUILD_DIRECTORY}/obj/matrix.o"

compile_cmd = f"{CC} {OPTIMIZATION} {FLAGS} -c -o {main_object} {main_file}"
print(f"Compiling main.c to object file:\n{compile_cmd}")
subprocess.run(compile_cmd, shell=True, check=True)

compile_cmd = f"{CC} {OPTIMIZATION} {FLAGS} -c -o {matrix_object} {matrix_file}"
print(f"Compiling matrix.c to object file:\n{compile_cmd}")
subprocess.run(compile_cmd, shell=True, check=True)

# Link all objects
objects = " ".join(
map(lambda x: f"{BUILD_DIRECTORY}/obj/{x}", os.listdir(f"{BUILD_DIRECTORY}/obj"))
)
link_cmd = f"{CC} {OPTIMIZATION} {FLAGS} -o {executable} {objects}"
print(f"Linking object files to create executable:\n{link_cmd}")
subprocess.run(link_cmd, shell=True, check=True)

# Execute the executable and store output
now = datetime.now()
formatted_time = now.strftime("%Y:%m:%d:%H:%M:%S")
output_csv = f"{TMP}/{args.k}_{formatted_time}.csv"
execution_cmd = f"{executable} {args.m} {args.p} | tee {output_csv}"
print(f"Executing kernel benchmark:\n{execution_cmd}")

try:
subprocess.run(execution_cmd, shell=True, check=True)

if args.m == "many":
names = ["size", "allclose", "ref_gflops", "kernel_gflops"]
print(f"Reading output CSV file: {output_csv}")
df = pd.read_csv(output_csv, names=names, header=None)

# Plotting the results
sns.set_style("darkgrid")
sns.lineplot(x="size", y="ref_gflops", data=df, label="Reference GFLOPS")
sns.lineplot(x="size", y="kernel_gflops", data=df, label="Kernel GFLOPS")
plt.legend(title="GFLOPS")
plt.xlabel("Size")
plt.ylabel("GFLOPS")
plt.ylim(0, 10)
plt.title(f"Performance of Kernel: {args.k}")
save_path = f"{PLOTS}/{args.k}_{formatted_time}.png"
print(f"Saving plot to: {save_path}")
plt.savefig(save_path)
plt.close()

else:
os.system(f"rm {output_csv}")

except KeyboardInterrupt:
print("\nBenchmarking interrupted")
os.system(f"rm {output_csv}")
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This commit also does not store data or plot when the benchmarking is run for a single time or the process is interupted

exit()
8 changes: 8 additions & 0 deletions src/backend/tests/c-lisp/kernel/kernels.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
MMult1
MMult2
MMult_1x4_3
MMult_1x4_4
MMult_1x4_5
MMult_4x4_3
MMult_4x4_4
MMult_4x4_5
12 changes: 12 additions & 0 deletions src/backend/tests/c-lisp/kernel/kernels_C/MMult1.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
int kernel(float* A, float* B, float* C, int m, int n, int k) {
for (int i = 0; i < m; i++) {
for (int j = 0; j < n; j++) {
float sum = 0.0;
for (int p = 0; p < k; p++) {
sum += A[p * m + i] * B[j * k + p];
}
C[j * m + i] = sum;
}
}
return 1;
}
17 changes: 17 additions & 0 deletions src/backend/tests/c-lisp/kernel/kernels_C/MMult2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include "add_dot.c"

void kernel(float* a, float* b, float* c, int m, int n, int k){

int lda = m;
int ldb = n;
int ldc = k;

for(int j = 0; j < n; j += 4) {
for(int i = 0; i < m; i++) {
add_dot(k, &a[0 * lda + i], lda, &b[(j + 0) * ldb], &c[(j + 0) * ldc + i]);
add_dot(k, &a[0 * lda + i], lda, &b[(j + 1) * ldb], &c[(j + 1) * ldc + i]);
add_dot(k, &a[0 * lda + i], lda, &b[(j + 2) * ldb], &c[(j + 2) * ldc + i]);
add_dot(k, &a[0 * lda + i], lda, &b[(j + 3) * ldb], &c[(j + 3) * ldc + i]);
}
}
}
28 changes: 28 additions & 0 deletions src/backend/tests/c-lisp/kernel/kernels_C/MMult_1x4_3.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#include "add_dot.c"

void add_dot1x4(int k, float* a, int lda, float* b, int ldb, float* c, int ldc) {
add_dot(k, a, lda, &b[0 * ldb], &c[0 * ldc]);
add_dot(k, a, lda, &b[1 * ldb], &c[1 * ldc]);
add_dot(k, a, lda, &b[2 * ldb], &c[2 * ldc]);
add_dot(k, a, lda, &b[3 * ldb], &c[3 * ldc]);
}

void kernel(float* a, float* b, float* c, int m, int n, int k) {

int lda = m;
int ldb = n;
int ldc = k;

for(int j = 0; j < n; j += 4)
for(int i = 0; i < m; i++)
add_dot1x4(
k,
&a[i],
lda,
&b[j * ldb],
ldb,
&c[j * ldc + i],
ldc
);

}
32 changes: 32 additions & 0 deletions src/backend/tests/c-lisp/kernel/kernels_C/MMult_1x4_4.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
void add_dot1x4(int k, float* a, int lda, float* b, int ldb, float* c, int ldc) {
c[0] = 0.;
c[1 * ldc] = 0.;
c[2 * ldc] = 0.;
c[3 * ldc] = 0.;

int p;

for(p = 0; p < k; p++)
c[0 + 0 * ldc] += a[0 + p * lda] * b[p + 0 * ldb];

for(p = 0; p < k; p++)
c[0 + 1 * ldc] += a[0 + p * lda] * b[p + 1 * ldb];

for(p = 0; p < k; p++)
c[0 + 2 * ldc] += a[0 + p * lda] * b[p + 2 * ldb];

for(p = 0; p < k; p++)
c[0 + 3 * ldc] += a[0 + p * lda] * b[p + 3 * ldb];

}

void kernel(float* a, float* b, float*c, int m, int n, int k) {

int lda = m;
int ldb = n;
int ldc = k;

for(int j = 0; j < n; j += 4)
for(int i = 0; i < m; i++)
add_dot1x4(k, &a[i], lda, &b[j * ldb], ldb, &c[j * ldc + i], ldc);
}
26 changes: 26 additions & 0 deletions src/backend/tests/c-lisp/kernel/kernels_C/MMult_1x4_5.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
void add_dot1x4(int k, float* a, int lda, float* b, int ldb, float* c, int ldc) {
c[0] = 0.;
c[1 * ldc] = 0.;
c[2 * ldc] = 0.;
c[3 * ldc] = 0.;

int p;

for(p = 0; p < k; p++){
c[0] += a[p * lda] * b[p];
c[1 * ldc] += a[p * lda] * b[p + ldb * 1];
c[2 * ldc] += a[p * lda] * b[p + ldb * 2];
c[3 * ldc] += a[p * lda] * b[p + ldb * 3];
}
}

void kernel(float* a, float* b, float*c, int m, int n, int k) {

int lda = m;
int ldb = n;
int ldc = k;

for(int j = 0; j < n; j += 4)
for(int i = 0; i < m; i++)
add_dot1x4(k, &a[i], lda, &b[j * ldb], ldb, &c[j * ldc + i], ldc);
}
39 changes: 39 additions & 0 deletions src/backend/tests/c-lisp/kernel/kernels_C/MMult_4x4_3.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#include "add_dot.c"

void add_dot4x4(int k, float* a, int lda, float* b, int ldb, float* c, int ldc) {

// First row
add_dot(k, &a[0], lda, &b[0 * ldb], &c[0 + 0 * ldc]);
add_dot(k, &a[0], lda, &b[1 * ldb], &c[0 + 1 * ldc]);
add_dot(k, &a[0], lda, &b[2 * ldb], &c[0 + 2 * ldc]);
add_dot(k, &a[0], lda, &b[3 * ldb], &c[0 + 3 * ldc]);

// Second row
add_dot(k, &a[1], lda, &b[0 * ldb], &c[1 + 0 * ldc]);
add_dot(k, &a[1], lda, &b[1 * ldb], &c[1 + 1 * ldc]);
add_dot(k, &a[1], lda, &b[2 * ldb], &c[1 + 2 * ldc]);
add_dot(k, &a[1], lda, &b[3 * ldb], &c[1 + 3 * ldc]);

// Third row
add_dot(k, &a[2], lda, &b[0 * ldb], &c[2 + 0 * ldc]);
add_dot(k, &a[2], lda, &b[1 * ldb], &c[2 + 1 * ldc]);
add_dot(k, &a[2], lda, &b[2 * ldb], &c[2 + 2 * ldc]);
add_dot(k, &a[2], lda, &b[3 * ldb], &c[2 + 3 * ldc]);

// Fourth row
add_dot(k, &a[3], lda, &b[0 * ldb], &c[3 + 0 * ldc]);
add_dot(k, &a[3], lda, &b[1 * ldb], &c[3 + 1 * ldc]);
add_dot(k, &a[3], lda, &b[2 * ldb], &c[3 + 2 * ldc]);
add_dot(k, &a[3], lda, &b[3 * ldb], &c[3 + 3 * ldc]);
}

void kernel(float* a, float* b, float* c, int m, int n, int k) {

int lda = m;
int ldb = n;
int ldc = k;

for(int j = 0; j < n; j += 4)
for(int i = 0; i < m; i += 4)
add_dot4x4(k, &a[i], lda, &b[j * ldb], ldb, &c[j * ldc + i], ldc);
}
Loading