forked from antirez/ds4
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMakefile
More file actions
131 lines (98 loc) · 4.14 KB
/
Makefile
File metadata and controls
131 lines (98 loc) · 4.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
CC ?= cc
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Darwin)
NATIVE_CPU_FLAG ?= -mcpu=native
else
NATIVE_CPU_FLAG ?= -march=native
endif
CFLAGS ?= -O3 -ffast-math $(NATIVE_CPU_FLAG) -Wall -Wextra -std=c99
OBJCFLAGS ?= -O3 -ffast-math $(NATIVE_CPU_FLAG) -Wall -Wextra -fobjc-arc
LDLIBS ?= -lm -pthread
METAL_SRCS := $(wildcard metal/*.metal)
ifeq ($(UNAME_S),Darwin)
METAL_LDLIBS := $(LDLIBS) -framework Foundation -framework Metal
CORE_OBJS = ds4.o ds4_metal.o
CPU_CORE_OBJS = ds4_cpu.o
else
CFLAGS += -D_GNU_SOURCE -fno-finite-math-only
CUDA_HOME ?= /usr/local/cuda
NVCC ?= $(CUDA_HOME)/bin/nvcc
CUDA_ARCH ?= native
ifneq ($(strip $(CUDA_ARCH)),)
NVCC_ARCH_FLAGS := -arch=$(CUDA_ARCH)
endif
NVCCFLAGS ?= -O3 --use_fast_math $(NVCC_ARCH_FLAGS) -Xcompiler $(NATIVE_CPU_FLAG) -Xcompiler -pthread
CUDA_LDLIBS ?= -lm -Xcompiler -pthread -L$(CUDA_HOME)/targets/sbsa-linux/lib -L$(CUDA_HOME)/lib64 -lcudart -lcublas
CORE_OBJS = ds4.o ds4_cuda.o
CPU_CORE_OBJS = ds4_cpu.o
METAL_LDLIBS := $(LDLIBS)
endif
.PHONY: all clean test cpu cuda-regression
all: ds4 ds4-server ds4-bench
ifeq ($(UNAME_S),Darwin)
ds4: ds4_cli.o linenoise.o $(CORE_OBJS)
$(CC) $(CFLAGS) -o $@ ds4_cli.o linenoise.o $(CORE_OBJS) $(METAL_LDLIBS)
ds4-server: ds4_server.o rax.o $(CORE_OBJS)
$(CC) $(CFLAGS) -o $@ ds4_server.o rax.o $(CORE_OBJS) $(METAL_LDLIBS)
ds4-bench: ds4_bench.o $(CORE_OBJS)
$(CC) $(CFLAGS) -o $@ ds4_bench.o $(CORE_OBJS) $(METAL_LDLIBS)
cpu: ds4_cli_cpu.o ds4_server_cpu.o ds4_bench_cpu.o linenoise.o rax.o $(CPU_CORE_OBJS)
$(CC) $(CFLAGS) -o ds4 ds4_cli_cpu.o linenoise.o $(CPU_CORE_OBJS) $(LDLIBS)
$(CC) $(CFLAGS) -o ds4-server ds4_server_cpu.o rax.o $(CPU_CORE_OBJS) $(LDLIBS)
$(CC) $(CFLAGS) -o ds4-bench ds4_bench_cpu.o $(CPU_CORE_OBJS) $(LDLIBS)
cuda-regression:
@echo "cuda-regression requires a CUDA build"
else
ds4: ds4_cli.o linenoise.o $(CORE_OBJS)
$(NVCC) $(NVCCFLAGS) -o $@ $^ $(CUDA_LDLIBS)
ds4-server: ds4_server.o rax.o $(CORE_OBJS)
$(NVCC) $(NVCCFLAGS) -o $@ $^ $(CUDA_LDLIBS)
ds4-bench: ds4_bench.o $(CORE_OBJS)
$(NVCC) $(NVCCFLAGS) -o $@ $^ $(CUDA_LDLIBS)
cpu: ds4_cli_cpu.o ds4_server_cpu.o ds4_bench_cpu.o linenoise.o rax.o $(CPU_CORE_OBJS)
$(CC) $(CFLAGS) -o ds4 ds4_cli_cpu.o linenoise.o $(CPU_CORE_OBJS) $(LDLIBS)
$(CC) $(CFLAGS) -o ds4-server ds4_server_cpu.o rax.o $(CPU_CORE_OBJS) $(LDLIBS)
$(CC) $(CFLAGS) -o ds4-bench ds4_bench_cpu.o $(CPU_CORE_OBJS) $(LDLIBS)
cuda-regression: tests/cuda_long_context_smoke
./tests/cuda_long_context_smoke
endif
ds4.o: ds4.c ds4.h ds4_gpu.h
$(CC) $(CFLAGS) -c -o $@ ds4.c
ds4_cli.o: ds4_cli.c ds4.h linenoise.h
$(CC) $(CFLAGS) -c -o $@ ds4_cli.c
ds4_server.o: ds4_server.c ds4.h rax.h
$(CC) $(CFLAGS) -c -o $@ ds4_server.c
ds4_bench.o: ds4_bench.c ds4.h
$(CC) $(CFLAGS) -c -o $@ ds4_bench.c
ds4_test.o: tests/ds4_test.c ds4_server.c ds4.h rax.h
$(CC) $(CFLAGS) -Wno-unused-function -c -o $@ tests/ds4_test.c
tests/cuda_long_context_smoke.o: tests/cuda_long_context_smoke.c ds4_gpu.h
$(CC) $(CFLAGS) -I. -c -o $@ tests/cuda_long_context_smoke.c
rax.o: rax.c rax.h rax_malloc.h
$(CC) $(CFLAGS) -c -o $@ rax.c
linenoise.o: linenoise.c linenoise.h
$(CC) $(CFLAGS) -c -o $@ linenoise.c
ds4_cpu.o: ds4.c ds4.h ds4_gpu.h
$(CC) $(CFLAGS) -DDS4_NO_GPU -c -o $@ ds4.c
ds4_cli_cpu.o: ds4_cli.c ds4.h linenoise.h
$(CC) $(CFLAGS) -DDS4_NO_GPU -c -o $@ ds4_cli.c
ds4_server_cpu.o: ds4_server.c ds4.h rax.h
$(CC) $(CFLAGS) -DDS4_NO_GPU -c -o $@ ds4_server.c
ds4_bench_cpu.o: ds4_bench.c ds4.h
$(CC) $(CFLAGS) -DDS4_NO_GPU -c -o $@ ds4_bench.c
ds4_metal.o: ds4_metal.m ds4_gpu.h $(METAL_SRCS)
$(CC) $(OBJCFLAGS) -c -o $@ ds4_metal.m
ds4_cuda.o: ds4_cuda.cu ds4_gpu.h ds4_iq2_tables_cuda.inc
$(NVCC) $(NVCCFLAGS) -c -o $@ ds4_cuda.cu
tests/cuda_long_context_smoke: tests/cuda_long_context_smoke.o ds4_cuda.o
$(NVCC) $(NVCCFLAGS) -o $@ $^ $(CUDA_LDLIBS)
ds4_test: ds4_test.o rax.o $(CORE_OBJS)
ifeq ($(UNAME_S),Darwin)
$(CC) $(CFLAGS) -o $@ ds4_test.o rax.o $(CORE_OBJS) $(METAL_LDLIBS)
else
$(NVCC) $(NVCCFLAGS) -o $@ ds4_test.o rax.o $(CORE_OBJS) $(CUDA_LDLIBS)
endif
test: ds4_test
./ds4_test
clean:
rm -f ds4 ds4-server ds4-bench ds4_cpu ds4_native ds4_server_test ds4_test *.o tests/cuda_long_context_smoke tests/cuda_long_context_smoke.o