Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Roel Aaij
k40gen
Commits
3040be47
Commit
3040be47
authored
Oct 23, 2019
by
Roel Aaij
Browse files
Move to vectorclass v2
parent
e20cd172
Changes
3
Hide whitespace changes
Inline
Side-by-side
CMakeLists.txt
View file @
3040be47
...
...
@@ -63,23 +63,38 @@ else()
endif
()
# vectorclass
set
(
vectorclass_VERSION
"
1.30
"
)
set
(
vectorclass_SRC_URI
"https://
www.agner.org/optimize/vectorclass.zip
"
)
set
(
vectorclass_VERSION
"
2.00.01
"
)
set
(
vectorclass_SRC_URI
"https://
github.com/vectorclass/version2/archive/v
${
vectorclass_VERSION
}
.tar.gz
"
)
set
(
vectorclass_DESTDIR
"
${
CMAKE_BINARY_DIR
}
/include"
)
set
(
vectorclass_ROOTDIR
"
${
vectorclass_DESTDIR
}
/vectorclass"
)
ExternalProject_Add
(
vectorclass
URL
${
vectorclass_SRC_URI
}
URL_HASH SHA256=
f9cb70a3e865dd019b58f449d11f90147ce8ba5f2c60410389ec0ead92944b97
URL_HASH SHA256=
73b239876ff3453abbc644a367e7cf2af6738099c60bd4bd32b6985f7ff23aef
SOURCE_DIR vectorclass
INSTALL_DIR
${
vectorclass_ROOTDIR
}
LOG_DOWNLOAD 1 LOG_BUILD 1 LOG_INSTALL 1
CONFIGURE_COMMAND
""
BUILD_COMMAND
unzip -d <SOURCE_DIR> -o <SOURCE_DIR>/special.zip
BUILD_COMMAND
""
INSTALL_COMMAND
${
CMAKE_COMMAND
}
-E copy_directory <SOURCE_DIR> <INSTALL_DIR>
STEP_TARGETS install
)
set
(
vectorclass_addon_TARBALL
"20415ff928ef0586f1574d8598bf4a52aed8d706.tar.gz"
)
ExternalProject_Add
(
vectorclass_addon
URL
"https://github.com/vectorclass/add-on/archive/
${
vectorclass_addon_TARBALL
}
"
URL_HASH SHA256=007e9e99b4561d511bf8a77ea3f350b251d7af00a2fb92fafa6fc402b7e5bf06
DOWNLOAD_NAME
"vectorclass_addon.tar.gz"
DOWNLOAD_NO_EXTRACT TRUE
INSTALL_DIR
${
vectorclass_ROOTDIR
}
LOG_DOWNLOAD 1 LOG_BUILD 1 LOG_INSTALL 1
CONFIGURE_COMMAND
""
BUILD_COMMAND
""
INSTALL_COMMAND tar -C <INSTALL_DIR> -xvf <DOWNLOADED_FILE> --strip-components 1
STEP_TARGETS install
)
ExternalProject_Add_StepDependencies
(
vectorclass_addon configure vectorclass-install
)
# FIXME: This is a workaround to let ROOT find the headers at runtime if
# they are in the build directory. This is necessary until we decide how to
# treat externals with headers used by ROOT
...
...
@@ -92,8 +107,6 @@ if(NOT EXISTS ${CMAKE_BINARY_DIR}/include/Vc)
endif
()
# end of workaround
# install(DIRECTORY ${Vc_DESTDIR}/ DESTINATION ".")
list
(
APPEND CMAKE_MODULE_PATH
"
${
CMAKE_SOURCE_DIR
}
/cmake"
)
set
(
CMAKE_CXX_STANDARD 17
)
...
...
@@ -138,7 +151,7 @@ if (USE_AVX2)
target_link_libraries
(
generate PUBLIC
${
Vc_LIBRARIES
}
)
target_include_directories
(
generate
PUBLIC
${
CMAKE_BINARY_DIR
}
/include/vectorclass
)
add_dependencies
(
generate vectorclass-install
)
add_dependencies
(
generate vectorclass
_addon
-install
)
target_compile_definitions
(
generate PUBLIC
"-DVc_IMPL=AVX2"
)
if
(
Vc_BUILTIN
)
add_dependencies
(
generate VC-install
)
...
...
src/generate/generate_avx2.cpp
View file @
3040be47
...
...
@@ -18,9 +18,8 @@
#include
<functional>
#include
<Vc/Vc>
#include
<instrset.h>
#include
<vectori256.h>
#include
<ranvec1.h>
#include
<vectorclass.h>
#include
<random/ranvec1.h>
#include
<storage.h>
#include
<generate_common.h>
...
...
@@ -45,17 +44,17 @@ float GenAVX2::dot_product(const Vc::SimdArray<float, 3>& left, const Vc::SimdAr
inline
int_v
scan_AVX
(
int_v
x
)
{
// first shift then add
auto
t0
=
permute8
i
<
3
,
0
,
1
,
2
,
7
,
4
,
5
,
6
>
(
x
.
data
());
auto
t1
=
permute8
i
<-
1
,
-
1
,
-
1
,
-
1
,
0
,
1
,
2
,
3
>
(
t0
);
auto
t0
=
permute8
<
3
,
0
,
1
,
2
,
7
,
4
,
5
,
6
>
(
Vec8i
{
x
.
data
()
}
);
auto
t1
=
permute8
<-
1
,
-
1
,
-
1
,
-
1
,
0
,
1
,
2
,
3
>
(
t0
);
x
+=
_mm256_blend_epi32
(
t0
,
t1
,
0x11
);
// second shift then add
t0
=
permute8
i
<
2
,
3
,
0
,
1
,
6
,
7
,
4
,
5
>
(
x
.
data
());
t1
=
permute8
i
<-
1
,
-
1
,
-
1
,
-
1
,
0
,
1
,
2
,
3
>
(
t0
);
t0
=
permute8
<
2
,
3
,
0
,
1
,
6
,
7
,
4
,
5
>
(
Vec8i
{
x
.
data
()
}
);
t1
=
permute8
<-
1
,
-
1
,
-
1
,
-
1
,
0
,
1
,
2
,
3
>
(
t0
);
x
+=
_mm256_blend_epi32
(
t0
,
t1
,
0x33
);
// final shift and add
x
+=
int_v
{
permute8
i
<-
1
,
-
1
,
-
1
,
-
1
,
0
,
1
,
2
,
3
>
(
x
.
data
())};
x
+=
int_v
{
permute8
<-
1
,
-
1
,
-
1
,
-
1
,
0
,
1
,
2
,
3
>
(
Vec8i
{
x
.
data
()
}
)};
return
x
;
}
...
...
@@ -148,7 +147,7 @@ std::tuple<storage_t, storage_t> generate_avx2(const long time_start, const long
last
=
second
[
long_v
::
size
()
-
1
];
//broadcast last element
offset
.
data
()
=
permute4
q
<
3
,
3
,
3
,
3
>
(
second
.
data
());
offset
.
data
()
=
permute4
<
3
,
3
,
3
,
3
>
(
Vec4q
{
second
.
data
()
}
);
// Generate ToT as a gauss and pmt flat.
// Only do it every other pass to make use of the double
...
...
src/generate/vectorclass_ranvec1.cpp
View file @
3040be47
#include
<ranvec1.cpp>
#include
<
random/
ranvec1.cpp>
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment