Commit 3040be47 authored by Roel Aaij's avatar Roel Aaij
Browse files

Move to vectorclass v2

parent e20cd172
...@@ -63,23 +63,38 @@ else() ...@@ -63,23 +63,38 @@ else()
endif() endif()
# vectorclass # vectorclass
set(vectorclass_VERSION "1.30") set(vectorclass_VERSION "2.00.01")
set(vectorclass_SRC_URI "https://www.agner.org/optimize/vectorclass.zip") set(vectorclass_SRC_URI "https://github.com/vectorclass/version2/archive/v${vectorclass_VERSION}.tar.gz")
set(vectorclass_DESTDIR "${CMAKE_BINARY_DIR}/include") set(vectorclass_DESTDIR "${CMAKE_BINARY_DIR}/include")
set(vectorclass_ROOTDIR "${vectorclass_DESTDIR}/vectorclass") set(vectorclass_ROOTDIR "${vectorclass_DESTDIR}/vectorclass")
ExternalProject_Add(vectorclass ExternalProject_Add(vectorclass
URL ${vectorclass_SRC_URI} URL ${vectorclass_SRC_URI}
URL_HASH SHA256=f9cb70a3e865dd019b58f449d11f90147ce8ba5f2c60410389ec0ead92944b97 URL_HASH SHA256=73b239876ff3453abbc644a367e7cf2af6738099c60bd4bd32b6985f7ff23aef
SOURCE_DIR vectorclass SOURCE_DIR vectorclass
INSTALL_DIR ${vectorclass_ROOTDIR} INSTALL_DIR ${vectorclass_ROOTDIR}
LOG_DOWNLOAD 1 LOG_BUILD 1 LOG_INSTALL 1 LOG_DOWNLOAD 1 LOG_BUILD 1 LOG_INSTALL 1
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
BUILD_COMMAND unzip -d <SOURCE_DIR> -o <SOURCE_DIR>/special.zip BUILD_COMMAND ""
INSTALL_COMMAND ${CMAKE_COMMAND} -E copy_directory <SOURCE_DIR> <INSTALL_DIR> INSTALL_COMMAND ${CMAKE_COMMAND} -E copy_directory <SOURCE_DIR> <INSTALL_DIR>
STEP_TARGETS install STEP_TARGETS install
) )
set(vectorclass_addon_TARBALL "20415ff928ef0586f1574d8598bf4a52aed8d706.tar.gz")
ExternalProject_Add(vectorclass_addon
URL "https://github.com/vectorclass/add-on/archive/${vectorclass_addon_TARBALL}"
URL_HASH SHA256=007e9e99b4561d511bf8a77ea3f350b251d7af00a2fb92fafa6fc402b7e5bf06
DOWNLOAD_NAME "vectorclass_addon.tar.gz"
DOWNLOAD_NO_EXTRACT TRUE
INSTALL_DIR ${vectorclass_ROOTDIR}
LOG_DOWNLOAD 1 LOG_BUILD 1 LOG_INSTALL 1
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND tar -C <INSTALL_DIR> -xvf <DOWNLOADED_FILE> --strip-components 1
STEP_TARGETS install
)
ExternalProject_Add_StepDependencies(vectorclass_addon configure vectorclass-install)
# FIXME: This is a workaround to let ROOT find the headers at runtime if # FIXME: This is a workaround to let ROOT find the headers at runtime if
# they are in the build directory. This is necessary until we decide how to # they are in the build directory. This is necessary until we decide how to
# treat externals with headers used by ROOT # treat externals with headers used by ROOT
...@@ -92,8 +107,6 @@ if(NOT EXISTS ${CMAKE_BINARY_DIR}/include/Vc) ...@@ -92,8 +107,6 @@ if(NOT EXISTS ${CMAKE_BINARY_DIR}/include/Vc)
endif() endif()
# end of workaround # end of workaround
# install(DIRECTORY ${Vc_DESTDIR}/ DESTINATION ".")
list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD 17)
...@@ -138,7 +151,7 @@ if (USE_AVX2) ...@@ -138,7 +151,7 @@ if (USE_AVX2)
target_link_libraries(generate PUBLIC ${Vc_LIBRARIES}) target_link_libraries(generate PUBLIC ${Vc_LIBRARIES})
target_include_directories(generate target_include_directories(generate
PUBLIC ${CMAKE_BINARY_DIR}/include/vectorclass) PUBLIC ${CMAKE_BINARY_DIR}/include/vectorclass)
add_dependencies(generate vectorclass-install) add_dependencies(generate vectorclass_addon-install)
target_compile_definitions(generate PUBLIC "-DVc_IMPL=AVX2") target_compile_definitions(generate PUBLIC "-DVc_IMPL=AVX2")
if (Vc_BUILTIN) if (Vc_BUILTIN)
add_dependencies(generate VC-install) add_dependencies(generate VC-install)
......
...@@ -18,9 +18,8 @@ ...@@ -18,9 +18,8 @@
#include <functional> #include <functional>
#include <Vc/Vc> #include <Vc/Vc>
#include <instrset.h> #include <vectorclass.h>
#include <vectori256.h> #include <random/ranvec1.h>
#include <ranvec1.h>
#include <storage.h> #include <storage.h>
#include <generate_common.h> #include <generate_common.h>
...@@ -45,17 +44,17 @@ float GenAVX2::dot_product(const Vc::SimdArray<float, 3>& left, const Vc::SimdAr ...@@ -45,17 +44,17 @@ float GenAVX2::dot_product(const Vc::SimdArray<float, 3>& left, const Vc::SimdAr
inline int_v scan_AVX(int_v x) { inline int_v scan_AVX(int_v x) {
// first shift then add // first shift then add
auto t0 = permute8i<3, 0, 1, 2, 7, 4, 5, 6>(x.data()); auto t0 = permute8<3, 0, 1, 2, 7, 4, 5, 6>(Vec8i{x.data()});
auto t1 = permute8i<-1, -1, -1, -1, 0, 1, 2, 3>(t0); auto t1 = permute8<-1, -1, -1, -1, 0, 1, 2, 3>(t0);
x += _mm256_blend_epi32(t0, t1, 0x11); x += _mm256_blend_epi32(t0, t1, 0x11);
// second shift then add // second shift then add
t0 = permute8i<2, 3, 0, 1, 6, 7, 4, 5>(x.data()); t0 = permute8<2, 3, 0, 1, 6, 7, 4, 5>(Vec8i{x.data()});
t1 = permute8i<-1, -1, -1, -1, 0, 1, 2, 3>(t0); t1 = permute8<-1, -1, -1, -1, 0, 1, 2, 3>(t0);
x += _mm256_blend_epi32(t0, t1, 0x33); x += _mm256_blend_epi32(t0, t1, 0x33);
// final shift and add // final shift and add
x += int_v{permute8i<-1, -1, -1, -1, 0, 1, 2, 3>(x.data())}; x += int_v{permute8<-1, -1, -1, -1, 0, 1, 2, 3>(Vec8i{x.data()})};
return x; return x;
} }
...@@ -148,7 +147,7 @@ std::tuple<storage_t, storage_t> generate_avx2(const long time_start, const long ...@@ -148,7 +147,7 @@ std::tuple<storage_t, storage_t> generate_avx2(const long time_start, const long
last = second[long_v::size() - 1]; last = second[long_v::size() - 1];
//broadcast last element //broadcast last element
offset.data() = permute4q<3, 3, 3, 3>(second.data()); offset.data() = permute4<3, 3, 3, 3>(Vec4q{second.data()});
// Generate ToT as a gauss and pmt flat. // Generate ToT as a gauss and pmt flat.
// Only do it every other pass to make use of the double // Only do it every other pass to make use of the double
......
#include <ranvec1.cpp> #include <random/ranvec1.cpp>
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment