Skip to content

Commit

Permalink
Introduce avx512 optimization mode and FAISS_OPT_LEVEL env variable (f…
Browse files Browse the repository at this point in the history
…acebookresearch#3150)

Summary:
Enables avx512 optimized code (AVX512 subsets F, CD, VL, DQ and BW, which are available for Intel Skylake+ and all AMD Zen4).
Also, introduces `FAISS_OPT_LEVEL` environment variable. Set it to `AVX2`, `AVX512` or empty to pick the appropriate x86_64 instruction set.

Compiled via the following
```
cmake -B build -DCMAKE_BUILD_TYPE=Release -DFAISS_ENABLE_GPU=OFF -DFAISS_OPT_LEVEL=avx512 -DBUILD_TESTING=ON .
make -C build -j 8 faiss_test
make -C build -j 8 swigfaiss
make -C build -j 8 swigfaiss_avx2
make -C build -j 8 swigfaiss_avx512
cd build/faiss/python
python3 setup.py build
python3 setup.py install --force
```

Now, running the following script `1.py`
```
import logging
logging.basicConfig(level=logging.DEBUG)

import faiss
```
produces the following:
```
root@6179abeef23c:~/faiss# LOGLEVEL=DEBUG FAISS_OPT_LEVEL= python3 1.py
DEBUG:faiss.loader:Using  as an instruction set.
INFO:faiss.loader:Loading faiss.
INFO:faiss.loader:Successfully loaded faiss.
root@6179abeef23c:~/faiss# LOGLEVEL=DEBUG FAISS_OPT_LEVEL=AVX2 python3 1.py
DEBUG:faiss.loader:Using AVX2 as an instruction set.
INFO:faiss.loader:Loading faiss with AVX2 support.
INFO:faiss.loader:Successfully loaded faiss with AVX2 support.
root@6179abeef23c:~/faiss# LOGLEVEL=DEBUG FAISS_OPT_LEVEL=AVX512 python3 1.py
DEBUG:faiss.loader:Using AVX512 as an instruction set.
INFO:faiss.loader:Loading faiss with AVX512 support.
INFO:faiss.loader:Successfully loaded faiss with AVX512 support.
root@6179abeef23c:~/faiss# LOGLEVEL=DEBUG python3 1.py
DEBUG:faiss.loader:Environment variable FAISS_OPT_LEVEL is not set, so let's pick the instruction set according to the current CPU
INFO:faiss.loader:Loading faiss with AVX512 support.
INFO:faiss.loader:Successfully loaded faiss with AVX512 support.
```

Pull Request resolved: facebookresearch#3150

Reviewed By: algoriddle

Differential Revision: D51701077

Pulled By: mdouze

fbshipit-source-id: 4db05a287e763ff1ce1f676df7f7402532bf1e9e
  • Loading branch information
alexanderguzhva authored and facebook-github-bot committed Dec 5, 2023
1 parent 4c83965 commit eefa391
Show file tree
Hide file tree
Showing 11 changed files with 131 additions and 16 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ set(CMAKE_CXX_STANDARD 17)

list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")

# Valid values are "generic", "avx2".
# Valid values are "generic", "avx2", "avx512".
option(FAISS_OPT_LEVEL "" "generic")
option(FAISS_ENABLE_GPU "Enable support for GPU indexes." ON)
option(FAISS_ENABLE_RAFT "Enable RAFT for GPU indexes." OFF)
Expand Down
4 changes: 2 additions & 2 deletions INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,8 @@ Several options can be passed to CMake, among which:
- `-DCMAKE_BUILD_TYPE=Release` in order to enable generic compiler
optimization options (enables `-O3` on gcc for instance),
- `-DFAISS_OPT_LEVEL=avx2` in order to enable the required compiler flags to
generate code using optimized SIMD instructions (possible values are `generic`
and `avx2`, by increasing order of optimization),
generate code using optimized SIMD instructions (possible values are `generic`,
`avx2` and `avx512`, by increasing order of optimization),
- BLAS-related options:
- `-DBLA_VENDOR=Intel10_64_dyn -DMKL_LIBRARIES=/path/to/mkl/libs` to use the
Intel MKL BLAS implementation, which is significantly faster than OpenBLAS
Expand Down
34 changes: 34 additions & 0 deletions faiss/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -244,12 +244,29 @@ else()
add_compile_options(/bigobj)
endif()

add_library(faiss_avx512 ${FAISS_SRC})
if(NOT FAISS_OPT_LEVEL STREQUAL "avx512")
set_target_properties(faiss_avx512 PROPERTIES EXCLUDE_FROM_ALL TRUE)
endif()
if(NOT WIN32)
# All modern CPUs support F, CD, VL, DQ, BW extensions.
# Ref: https://en.wikipedia.org/wiki/AVX512
target_compile_options(faiss_avx512 PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-mavx2 -mfma -mf16c -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw -mpopcnt>)
else()
target_compile_options(faiss_avx512 PRIVATE $<$<COMPILE_LANGUAGE:CXX>:/arch:AVX512>)
# we need bigobj for the swig wrapper
add_compile_options(/bigobj)
endif()

# Handle `#include <faiss/foo.h>`.
target_include_directories(faiss PUBLIC
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>)
# Handle `#include <faiss/foo.h>`.
target_include_directories(faiss_avx2 PUBLIC
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>)
# Handle `#include <faiss/foo.h>`.
target_include_directories(faiss_avx512 PUBLIC
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>)

set_target_properties(faiss PROPERTIES
POSITION_INDEPENDENT_CODE ON
Expand All @@ -259,31 +276,41 @@ set_target_properties(faiss_avx2 PROPERTIES
POSITION_INDEPENDENT_CODE ON
WINDOWS_EXPORT_ALL_SYMBOLS ON
)
set_target_properties(faiss_avx512 PROPERTIES
POSITION_INDEPENDENT_CODE ON
WINDOWS_EXPORT_ALL_SYMBOLS ON
)

if(WIN32)
target_compile_definitions(faiss PRIVATE FAISS_MAIN_LIB)
target_compile_definitions(faiss_avx2 PRIVATE FAISS_MAIN_LIB)
target_compile_definitions(faiss_avx512 PRIVATE FAISS_MAIN_LIB)
endif()

target_compile_definitions(faiss PRIVATE FINTEGER=int)
target_compile_definitions(faiss_avx2 PRIVATE FINTEGER=int)
target_compile_definitions(faiss_avx512 PRIVATE FINTEGER=int)

find_package(OpenMP REQUIRED)
target_link_libraries(faiss PRIVATE OpenMP::OpenMP_CXX)
target_link_libraries(faiss_avx2 PRIVATE OpenMP::OpenMP_CXX)
target_link_libraries(faiss_avx512 PRIVATE OpenMP::OpenMP_CXX)

find_package(MKL)
if(MKL_FOUND)
target_link_libraries(faiss PRIVATE ${MKL_LIBRARIES})
target_link_libraries(faiss_avx2 PRIVATE ${MKL_LIBRARIES})
target_link_libraries(faiss_avx512 PRIVATE ${MKL_LIBRARIES})
else()
find_package(BLAS REQUIRED)
target_link_libraries(faiss PRIVATE ${BLAS_LIBRARIES})
target_link_libraries(faiss_avx2 PRIVATE ${BLAS_LIBRARIES})
target_link_libraries(faiss_avx512 PRIVATE ${BLAS_LIBRARIES})

find_package(LAPACK REQUIRED)
target_link_libraries(faiss PRIVATE ${LAPACK_LIBRARIES})
target_link_libraries(faiss_avx2 PRIVATE ${LAPACK_LIBRARIES})
target_link_libraries(faiss_avx512 PRIVATE ${LAPACK_LIBRARIES})
endif()

install(TARGETS faiss
Expand All @@ -300,6 +327,13 @@ if(FAISS_OPT_LEVEL STREQUAL "avx2")
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
)
endif()
if(FAISS_OPT_LEVEL STREQUAL "avx512")
install(TARGETS faiss_avx512
EXPORT faiss-targets
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
)
endif()

foreach(header ${FAISS_HEADERS})
get_filename_component(dir ${header} DIRECTORY )
Expand Down
28 changes: 28 additions & 0 deletions faiss/python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,20 +44,24 @@ endmacro()
# CMake's SWIG wrappers only allow tweaking certain settings at source level, so
# we duplicate the source in order to override the module name.
configure_file(swigfaiss.swig ${CMAKE_CURRENT_SOURCE_DIR}/swigfaiss_avx2.swig COPYONLY)
configure_file(swigfaiss.swig ${CMAKE_CURRENT_SOURCE_DIR}/swigfaiss_avx512.swig COPYONLY)

configure_swigfaiss(swigfaiss.swig)
configure_swigfaiss(swigfaiss_avx2.swig)
configure_swigfaiss(swigfaiss_avx512.swig)

if(TARGET faiss)
# Manually add headers as extra dependencies of swigfaiss.
set(SWIG_MODULE_swigfaiss_EXTRA_DEPS)
foreach(h ${FAISS_HEADERS})
list(APPEND SWIG_MODULE_swigfaiss_EXTRA_DEPS "${faiss_SOURCE_DIR}/faiss/${h}")
list(APPEND SWIG_MODULE_swigfaiss_avx2_EXTRA_DEPS "${faiss_SOURCE_DIR}/faiss/${h}")
list(APPEND SWIG_MODULE_swigfaiss_avx512_EXTRA_DEPS "${faiss_SOURCE_DIR}/faiss/${h}")
endforeach()
foreach(h ${FAISS_GPU_HEADERS})
list(APPEND SWIG_MODULE_swigfaiss_EXTRA_DEPS "${faiss_SOURCE_DIR}/faiss/gpu/${h}")
list(APPEND SWIG_MODULE_swigfaiss_avx2_EXTRA_DEPS "${faiss_SOURCE_DIR}/faiss/gpu/${h}")
list(APPEND SWIG_MODULE_swigfaiss_avx512_EXTRA_DEPS "${faiss_SOURCE_DIR}/faiss/gpu/${h}")
endforeach()
else()
find_package(faiss REQUIRED)
Expand All @@ -82,14 +86,28 @@ if(NOT FAISS_OPT_LEVEL STREQUAL "avx2")
set_target_properties(swigfaiss_avx2 PROPERTIES EXCLUDE_FROM_ALL TRUE)
endif()

set_property(SOURCE swigfaiss_avx512.swig
PROPERTY SWIG_MODULE_NAME swigfaiss_avx512)
swig_add_library(swigfaiss_avx512
TYPE SHARED
LANGUAGE python
SOURCES swigfaiss_avx512.swig
)
set_property(TARGET swigfaiss_avx512 PROPERTY SWIG_COMPILE_OPTIONS -doxygen)
if(NOT FAISS_OPT_LEVEL STREQUAL "avx512")
set_target_properties(swigfaiss_avx512 PROPERTIES EXCLUDE_FROM_ALL TRUE)
endif()

if(NOT WIN32)
# NOTE: Python does not recognize the dylib extension.
set_target_properties(swigfaiss PROPERTIES SUFFIX .so)
set_target_properties(swigfaiss_avx2 PROPERTIES SUFFIX .so)
set_target_properties(swigfaiss_avx512 PROPERTIES SUFFIX .so)
else()
# we need bigobj for the swig wrapper
target_compile_options(swigfaiss PRIVATE /bigobj)
target_compile_options(swigfaiss_avx2 PRIVATE /bigobj)
target_compile_options(swigfaiss_avx512 PRIVATE /bigobj)
endif()

if(FAISS_ENABLE_GPU)
Expand All @@ -99,6 +117,7 @@ if(FAISS_ENABLE_GPU)
endif()
target_link_libraries(swigfaiss PRIVATE CUDA::cudart $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft> $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
target_link_libraries(swigfaiss_avx2 PRIVATE CUDA::cudart $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft> $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
target_link_libraries(swigfaiss_avx512 PRIVATE CUDA::cudart $<$<BOOL:${FAISS_ENABLE_RAFT}>:raft::raft> $<$<BOOL:${FAISS_ENABLE_RAFT}>:nvidia::cutlass::cutlass>)
endif()

find_package(OpenMP REQUIRED)
Expand All @@ -117,10 +136,18 @@ target_link_libraries(swigfaiss_avx2 PRIVATE
OpenMP::OpenMP_CXX
)

target_link_libraries(swigfaiss_avx512 PRIVATE
faiss_avx512
Python::Module
Python::NumPy
OpenMP::OpenMP_CXX
)

# Hack so that python_callbacks.h can be included as
# `#include <faiss/python/python_callbacks.h>`.
target_include_directories(swigfaiss PRIVATE ${PROJECT_SOURCE_DIR}/../..)
target_include_directories(swigfaiss_avx2 PRIVATE ${PROJECT_SOURCE_DIR}/../..)
target_include_directories(swigfaiss_avx512 PRIVATE ${PROJECT_SOURCE_DIR}/../..)

find_package(Python REQUIRED
COMPONENTS Development NumPy
Expand All @@ -140,6 +167,7 @@ target_include_directories(faiss_python_callbacks PRIVATE ${Python_INCLUDE_DIRS}

target_link_libraries(swigfaiss PRIVATE faiss_python_callbacks)
target_link_libraries(swigfaiss_avx2 PRIVATE faiss_python_callbacks)
target_link_libraries(swigfaiss_avx512 PRIVATE faiss_python_callbacks)

configure_file(setup.py setup.py COPYONLY)
configure_file(__init__.py __init__.py COPYONLY)
Expand Down
45 changes: 39 additions & 6 deletions faiss/python/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def supported_instruction_sets():
Example:
>>> supported_instruction_sets() # for x86
{"SSE2", "AVX2", ...}
{"SSE2", "AVX2", "AVX512", ...}
>>> supported_instruction_sets() # for PPC
{"VSX", "VSX2", ...}
>>> supported_instruction_sets() # for ARM
Expand All @@ -41,25 +41,58 @@ def supported_instruction_sets():
return {"AVX2"}
elif platform.system() == "Linux":
import numpy.distutils.cpuinfo
result = set()
if "avx2" in numpy.distutils.cpuinfo.cpu.info[0].get('flags', ""):
return {"AVX2"}
result.add("AVX2")
if "avx512" in numpy.distutils.cpuinfo.cpu.info[0].get('flags', ""):
result.add("AVX512")
return result
return set()


logger = logging.getLogger(__name__)

has_AVX2 = "AVX2" in supported_instruction_sets()
if has_AVX2:
instruction_sets = None

# try to load optimization level from env variable
opt_env_variable_name = "FAISS_OPT_LEVEL"
opt_level = os.environ.get(opt_env_variable_name, None)

if opt_level is None:
logger.debug(f"Environment variable {opt_env_variable_name} is not set, " \
"so let's pick the instruction set according to the current CPU")
instruction_sets = supported_instruction_sets()
else:
logger.debug(f"Using {opt_level} as an instruction set.")
instruction_sets = set()
instruction_sets.add(opt_level)

loaded = False
has_AVX512 = any("AVX512" in x.upper() for x in instruction_sets)
if has_AVX512:
try:
logger.info("Loading faiss with AVX512 support.")
from .swigfaiss_avx512 import *
logger.info("Successfully loaded faiss with AVX512 support.")
loaded = True
except ImportError as e:
logger.info(f"Could not load library with AVX512 support due to:\n{e!r}")
# reset so that we load without AVX512 below
loaded = False

has_AVX2 = "AVX2" in instruction_sets
if has_AVX2 and not loaded:
try:
logger.info("Loading faiss with AVX2 support.")
from .swigfaiss_avx2 import *
logger.info("Successfully loaded faiss with AVX2 support.")
loaded = True
except ImportError as e:
logger.info(f"Could not load library with AVX2 support due to:\n{e!r}")
# reset so that we load without AVX2 below
has_AVX2 = False
loaded = False

if not has_AVX2:
if not loaded:
# we import * so that the symbol X can be accessed as faiss.X
logger.info("Loading faiss.")
from .swigfaiss import *
Expand Down
11 changes: 9 additions & 2 deletions faiss/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,15 @@

swigfaiss_generic_lib = f"{prefix}_swigfaiss{ext}"
swigfaiss_avx2_lib = f"{prefix}_swigfaiss_avx2{ext}"
swigfaiss_avx512_lib = f"{prefix}_swigfaiss_avx512{ext}"

found_swigfaiss_generic = os.path.exists(swigfaiss_generic_lib)
found_swigfaiss_avx2 = os.path.exists(swigfaiss_avx2_lib)
found_swigfaiss_avx512 = os.path.exists(swigfaiss_avx512_lib)

assert (found_swigfaiss_generic or found_swigfaiss_avx2), \
assert (found_swigfaiss_generic or found_swigfaiss_avx2 or found_swigfaiss_avx512), \
f"Could not find {swigfaiss_generic_lib} or " \
f"{swigfaiss_avx2_lib}. Faiss may not be compiled yet."
f"{swigfaiss_avx2_lib} or {swigfaiss_avx512_lib}. Faiss may not be compiled yet."

if found_swigfaiss_generic:
print(f"Copying {swigfaiss_generic_lib}")
Expand All @@ -43,6 +45,11 @@
shutil.copyfile("swigfaiss_avx2.py", "faiss/swigfaiss_avx2.py")
shutil.copyfile(swigfaiss_avx2_lib, f"faiss/_swigfaiss_avx2{ext}")

if found_swigfaiss_avx512:
print(f"Copying {swigfaiss_avx512_lib}")
shutil.copyfile("swigfaiss_avx512.py", "faiss/swigfaiss_avx512.py")
shutil.copyfile(swigfaiss_avx512_lib, f"faiss/_swigfaiss_avx512{ext}")

long_description="""
Faiss is a library for efficient similarity search and clustering of dense
vectors. It contains algorithms that search in sets of vectors of any size,
Expand Down
2 changes: 1 addition & 1 deletion faiss/utils/distances_fused/avx512.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

#include <faiss/utils/distances_fused/avx512.h>

#ifdef __AVX512__
#ifdef __AVX512F__

#include <immintrin.h>

Expand Down
2 changes: 1 addition & 1 deletion faiss/utils/distances_fused/avx512.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

#include <faiss/utils/Heap.h>

#ifdef __AVX512__
#ifdef __AVX512F__

namespace faiss {

Expand Down
2 changes: 1 addition & 1 deletion faiss/utils/distances_fused/distances_fused.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ bool exhaustive_L2sqr_fused_cmax(
return true;
}

#ifdef __AVX512__
#ifdef __AVX512F__
// avx512 kernel
return exhaustive_L2sqr_fused_cmax_AVX512(x, y, d, nx, ny, res, y_norms);
#elif defined(__AVX2__) || defined(__aarch64__)
Expand Down
2 changes: 2 additions & 0 deletions faiss/utils/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ std::string get_compile_options() {

#ifdef __AVX2__
options += "AVX2 ";
#elif __AVX512F__
options += "AVX512";
#elif defined(__aarch64__)
options += "NEON ";
#else
Expand Down
15 changes: 13 additions & 2 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,26 @@ set(FAISS_TEST_SRC

add_executable(faiss_test ${FAISS_TEST_SRC})

if(NOT FAISS_OPT_LEVEL STREQUAL "avx2" AND NOT FAISS_OPT_LEVEL STREQUAL "avx512")
target_link_libraries(faiss_test PRIVATE faiss)
endif()

if(FAISS_OPT_LEVEL STREQUAL "avx2")
if(NOT WIN32)
target_compile_options(faiss_test PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-mavx2 -mfma>)
else()
target_compile_options(faiss_test PRIVATE $<$<COMPILE_LANGUAGE:CXX>:/arch:AVX2>)
endif()
target_link_libraries(faiss_test PRIVATE faiss_avx2)
else()
target_link_libraries(faiss_test PRIVATE faiss)
endif()

if(FAISS_OPT_LEVEL STREQUAL "avx512")
if(NOT WIN32)
target_compile_options(faiss_test PRIVATE $<$<COMPILE_LANGUAGE:CXX>:-mavx2 -mfma -mavx512f -mavx512f -mavx512cd -mavx512vl -mavx512dq -mavx512bw>)
else()
target_compile_options(faiss_test PRIVATE $<$<COMPILE_LANGUAGE:CXX>:/arch:AVX512>)
endif()
target_link_libraries(faiss_test PRIVATE faiss_avx512)
endif()

include(FetchContent)
Expand Down

0 comments on commit eefa391

Please sign in to comment.