Commit 6c54ce72 authored by Roel Aaij's avatar Roel Aaij
Browse files

Fix generation of values and coincidences. Tests work.

parent c9b7f6f5
...@@ -113,7 +113,6 @@ endif() ...@@ -113,7 +113,6 @@ endif()
set(generate_SOURCES set(generate_SOURCES
src/generate/generate.cpp src/generate/generate.cpp
src/generate/storage.cpp
src/generate/generate_scalar.cpp) src/generate/generate_scalar.cpp)
if (USE_AVX2) if (USE_AVX2)
......
...@@ -74,14 +74,17 @@ public: ...@@ -74,14 +74,17 @@ public:
return 1.0e9f / m_rates[0]; return 1.0e9f / m_rates[0];
} }
size_t n_expect(const long interval) const { size_t n_expect(const long interval, bool include_l0) const {
float total_rate = std::accumulate(begin(m_rates), end(m_rates), 0.f); using namespace Constants;
float l0_rate = include_l0 ? m_rates[0] : 0.f;
float l1_rate = std::accumulate(std::next(begin(m_rates)), end(m_rates), 0.f);
// Fudge with 30% extra space to avoid reallocating // Fudge with 30% extra space to avoid reallocating
double n_per_pmt = 1.3 * total_rate * (double)(interval) / 1e9; double n_per_pmt = 1.3 * (l0_rate + 3.f * l1_rate) * (double)(interval) / 1e9;
if (n_per_pmt > std::numeric_limits<float>::max()) { auto n_pmts = n_dom * n_mod * n_pmt;
if (n_per_pmt > std::numeric_limits<float>::max() / n_pmts) {
throw std::domain_error{"rate of " + std::to_string(n_per_pmt) + " is too large"}; throw std::domain_error{"rate of " + std::to_string(n_per_pmt) + " is too large"};
} }
return std::lround(Constants::n_dom * Constants::n_mod * (float)n_per_pmt); return std::lround(n_pmts * (float)n_per_pmt);
} }
}; };
...@@ -112,8 +115,8 @@ unsigned int random_index(const Container& buffer, const double random) { ...@@ -112,8 +115,8 @@ unsigned int random_index(const Container& buffer, const double random) {
} }
} }
std::tuple<std::array<unsigned int, 4>, size_t> std::tuple<size_t, size_t>
fill_coincidences(storage_t& times, size_t idx, fill_coincidences(storage_t& times, pmts_t& pmts, size_t idx,
const long time_start, const long time_end, const long time_start, const long time_end,
Generators& gens); Generators& gens);
......
...@@ -15,23 +15,15 @@ ...@@ -15,23 +15,15 @@
*/ */
#pragma once #pragma once
#ifdef HAVE_CUDA
#include <thrust/host_vector.h>
#include <thrust/system/cuda/experimental/pinned_allocator.h>
#else
#include <vector> #include <vector>
#endif
#include "aligned_allocator.h"
namespace storage {
extern int n_per_mod;
}
#ifdef HAVE_CUDA #ifdef USE_AVX2
using storage_t = thrust::host_vector<long, thrust::cuda::experimental::pinned_allocator<int>>; #include <Vc/Allocator>
using storage_t = std::vector<long, Vc::Allocator<long>>;
using pmts_t = std::vector<int, Vc::Allocator<int>>;
#else #else
using storage_t = std::vector<long, aligned_allocator<long>>; using storage_t = std::vector<long>;
using pmts_t = std::vector<int>;
#endif #endif
using queue_t = std::vector<std::tuple<storage_t, storage_t>>; using queue_t = std::vector<std::tuple<storage_t, storage_t>>;
...@@ -27,7 +27,6 @@ ...@@ -27,7 +27,6 @@
namespace { namespace {
using std::cout; using std::cout;
using std::endl;
using std::pair; using std::pair;
using std::vector; using std::vector;
using std::array; using std::array;
...@@ -75,8 +74,8 @@ float cross_prob(const float ct) { ...@@ -75,8 +74,8 @@ float cross_prob(const float ct) {
return std::exp(ct * (Constants::p2 + ct * (Constants::p3 + ct * Constants::p4))); return std::exp(ct * (Constants::p2 + ct * (Constants::p3 + ct * Constants::p4)));
} }
std::tuple<array<unsigned int, 4>, size_t> std::tuple<size_t, size_t>
fill_coincidences(storage_t& times, size_t idx, fill_coincidences(storage_t& times, pmts_t& pmts, size_t idx,
const long time_start, const long time_end, const long time_start, const long time_end,
Generators& gen) { Generators& gen) {
const auto& prob1D = gen.prob1D; const auto& prob1D = gen.prob1D;
...@@ -85,21 +84,23 @@ fill_coincidences(storage_t& times, size_t idx, ...@@ -85,21 +84,23 @@ fill_coincidences(storage_t& times, size_t idx,
auto& mt = gen.mt; auto& mt = gen.mt;
auto& flat = gen.flat; auto& flat = gen.flat;
array<unsigned int, 4> pmts; std::fill(begin(pmts), end(pmts), 0);
if (gen.coincidence_rate < 0.001) { if (gen.coincidence_rate < 0.001) {
return {pmts, 0}; return {0, 0};
} }
pmts.clear();
// Fill coincidences // Fill coincidences
size_t n = 0; size_t n = 0;
for (long t1 = time_start ; t1 < time_end; t1 += gen.coincidence(mt)) { for (long t1 = time_start ; t1 < time_end; t1 += gen.coincidence(mt)) {
++n;
// generate two-fold coincidence // generate two-fold coincidence
const unsigned int pmt1 = random_index(prob1D, flat(mt)); const unsigned int pmt1 = random_index(prob1D, flat(mt));
const unsigned int pmt2 = random_index(prob2D[pmt1], flat(mt)); const unsigned int pmt2 = random_index(prob2D[pmt1], flat(mt));
pmts[n++] = pmt1; pmts.emplace_back(pmt1);
pmts[n++] = pmt2; pmts.emplace_back(pmt2);
std::normal_distribution<double> gauss(t1, 0.5); std::normal_distribution<double> gauss(t1, 0.5);
times[++idx] = std::lround(gauss(mt)); times[++idx] = std::lround(gauss(mt));
...@@ -117,28 +118,28 @@ fill_coincidences(storage_t& times, size_t idx, ...@@ -117,28 +118,28 @@ fill_coincidences(storage_t& times, size_t idx,
probND[pmtN] = 0.0; probND[pmtN] = 0.0;
pmtN = random_index(probND, flat(mt)); pmtN = random_index(probND, flat(mt));
pmts[n++] = pmtN; pmts.emplace_back(pmtN);
times[++idx] = std::lround(gauss(mt)); times[++idx] = std::lround(gauss(mt));
} }
} }
} }
catch (const std::domain_error&) {} catch (const std::domain_error&) {}
} }
return {pmts, n}; return {pmts.size(), n};
} }
std::tuple<storage_t, storage_t> generate(const long start, const long end, std::tuple<storage_t, storage_t> generate(const long start, const long end,
Generators& gens, bool use_avx2) { Generators& gens, bool use_avx2) {
#ifdef USE_AVX2 #ifdef USE_AVX2
if (use_avx2) { if (use_avx2) {
std::cout << "Generating AVX2" << std::endl; std::cout << "Generating AVX2\n";
return generate_avx2(start, end, gens); return generate_avx2(start, end, gens);
} else { } else {
std::cout << "Generating scalar" << std::endl; std::cout << "Generating scalar\n";
return generate_scalar(start, end, gens); return generate_scalar(start, end, gens);
} }
#else #else
std::cout << "Generating scalar" << std::endl; std::cout << "Generating scalar\n";
return generate_scalar(start, end, gens); return generate_scalar(start, end, gens);
#endif #endif
} }
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
*/ */
#include <cassert> #include <cassert>
#include <tuple> #include <tuple>
#include <functional>
#include <Vc/Vc> #include <Vc/Vc>
#include <instrset.h> #include <instrset.h>
...@@ -58,6 +59,45 @@ inline int_v scan_AVX(int_v x) { ...@@ -58,6 +59,45 @@ inline int_v scan_AVX(int_v x) {
return x; return x;
} }
auto int_v_to_long_v(const int_v& in) -> pair<long_v, long_v>
{
return {Vc::AvxIntrinsics::cvtepi32_epi64(Vc::AVX::lo128(in.data())),
Vc::AvxIntrinsics::cvtepi32_epi64(Vc::AVX::hi128(in.data()))};
};
void fill_values_avx2(long idx_start, long idx_end, storage_t& values, Ranvec1& random,
int dom, int mod,
std::function<int_v(size_t)> pmt_fun) {
// fill values
size_t n = 0;
const long value_end = idx_end + (2 * long_v::size() - (idx_end % 2 * long_v::size()));
for (long vidx = idx_start; vidx < value_end; vidx += 2 * long_v::size()) {
int_v pmt1 = pmt_fun(n++);
int_v pmt2 = pmt_fun(n++);
auto u1 = float_v{random.random8f()};
auto u2 = float_v{random.random8f()} * Constants::two_pi;
auto fact = sqrt(-2.0f * log(u1));
float_v z0, z1;
sincos(u2, &z0, &z1);
z0 = fact * tot_sigma * z0 + tot_mean;
z1 = fact * tot_sigma * z1 + tot_mean;
auto val0 = simd_cast<int_v>(z0) | (pmt1 << 8) | ((100 * (dom + 1) + mod + 1) << 13);
auto [val0_first, val0_second] = int_v_to_long_v(val0);
val0_first.store(&values[vidx]);
val0_second.store(&values[vidx + long_v::size()]);
auto val1 = simd_cast<int_v>(z1) | (pmt2 << 8) | ((100 * (dom + 1) + mod + 1) << 13);
auto [val1_first, val1_second] = int_v_to_long_v(val1);
val1_first.store(&values[vidx + 2 * long_v::size()]);
val1_second.store(&values[vidx + 3 *long_v::size()]);
}
}
std::tuple<storage_t, storage_t> generate_avx2(const long time_start, const long time_end, std::tuple<storage_t, storage_t> generate_avx2(const long time_start, const long time_end,
Generators& gens) { Generators& gens) {
...@@ -67,27 +107,22 @@ std::tuple<storage_t, storage_t> generate_avx2(const long time_start, const long ...@@ -67,27 +107,22 @@ std::tuple<storage_t, storage_t> generate_avx2(const long time_start, const long
// Assume times.size() is multiple of 8 here // Assume times.size() is multiple of 8 here
// assert(storage::n_hits % 16 == 0); // assert(storage::n_hits % 16 == 0);
const size_t n_expect = gens.n_expect(time_end - time_start); const size_t n_expect = gens.n_expect(time_end - time_start, true);
const size_t n_expect_pmts = gens.n_expect(time_end - time_start, false);
const float tau_l0 = gens.tau_l0(); const float tau_l0 = gens.tau_l0();
size_t storage_size = n_expect + long_v::size() - n_expect % long_v::size(); size_t storage_size = n_expect + long_v::size() - n_expect % long_v::size();
storage_t times; times.resize(storage_size); storage_t times; times.resize(storage_size);
storage_t values; values.resize(storage_size + 2 * long_v::size()); storage_t values; values.resize(storage_size + 2 * long_v::size());
pmts_t pmts(n_expect_pmts + long_v::size() - n_expect_pmts % long_v::size(), 0);
auto int_v_to_long_v = [] (const int_v& in) -> pair<long_v, long_v>
{
return {Vc::AvxIntrinsics::cvtepi32_epi64(Vc::AVX::lo128(in.data())),
Vc::AvxIntrinsics::cvtepi32_epi64(Vc::AVX::hi128(in.data()))};
};
size_t idx = 0; size_t idx = 0;
// First generate some data // First generate some data
for (int dom = 0; dom < Constants::n_dom; ++dom) { for (int dom = 0; dom < Constants::n_dom; ++dom) {
for (int mod = 0; mod < Constants::n_mod; ++mod) { for (int mod = 0; mod < Constants::n_mod; ++mod) {
size_t mod_start = idx;
for (int pmt = 0; pmt < Constants::n_pmt; ++pmt) { for (int pmt = 0; pmt < Constants::n_pmt; ++pmt) {
size_t pmt_start = idx;
long_v offset; long_v offset;
offset.data() = _mm256_set1_epi64x(time_start); offset.data() = _mm256_set1_epi64x(time_start);
long last = time_start; long last = time_start;
...@@ -121,38 +156,20 @@ std::tuple<storage_t, storage_t> generate_avx2(const long time_start, const long ...@@ -121,38 +156,20 @@ std::tuple<storage_t, storage_t> generate_avx2(const long time_start, const long
// When filling, fill the past and current indices // When filling, fill the past and current indices
idx += 2 * long_v::size(); idx += 2 * long_v::size();
} }
}
// Coincidences
auto [pmts, n_coincidence] = fill_coincidences(times, idx, time_start, time_end, gens);
idx += n_coincidence;
// fill values fill_values_avx2(pmt_start, idx, values, random, dom, mod,
const size_t value_end = idx + (2 * long_v::size() - (idx % 2 * long_v::size())); [pmt](size_t) { return int_v(pmt); });
for (size_t vidx = mod_start; vidx < value_end; vidx += 2 * long_v::size()) {
int_v pmt1{random.random8i(0, 31)};
int_v pmt2{random.random8i(0, 31)};
auto u1 = float_v{random.random8f()}; }
auto u2 = float_v{random.random8f()} * Constants::two_pi;
auto fact = sqrt(-2.0f * log(u1));
float_v z0, z1;
sincos(u2, &z0, &z1);
z0 = fact * tot_sigma * z0 + tot_mean;
z1 = fact * tot_sigma * z1 + tot_mean;
auto val0 = simd_cast<int_v>(z0) | (pmt1 << 8) | ((100 * (dom + 1) + mod + 1) << 13);
auto [val0_first, val0_second] = int_v_to_long_v(val0);
val0_first.store(&values[vidx]); // Coincidences
val0_second.store(&values[vidx + long_v::size()]); auto [n_times, _] = fill_coincidences(times, pmts, idx, time_start, time_end, gens);
fill_values_avx2(idx, idx + n_times, values, random, dom, mod,
[&pmts](size_t n) {
return int_v(pmts.data() + n * int_v::size());
});
idx += n_times;
auto val1 = simd_cast<int_v>(z1) | (pmt2 << 8) | ((100 * (dom + 1) + mod + 1) << 13);
auto [val1_first, val1_second] = int_v_to_long_v(val1);
val1_first.store(&values[vidx + 2 * long_v::size()]);
val1_second.store(&values[vidx + 3 *long_v::size()]);
}
} }
} }
times.resize(idx); times.resize(idx);
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
#include <cassert> #include <cassert>
#include <stdexcept> #include <stdexcept>
#include <vector> #include <vector>
#include <functional>
#include <iostream>
#include <tuple> #include <tuple>
#include <optional> #include <optional>
...@@ -28,9 +30,7 @@ namespace { ...@@ -28,9 +30,7 @@ namespace {
const float tot_mean = Constants::tot_mean; const float tot_mean = Constants::tot_mean;
const float tot_sigma = Constants::tot_sigma; const float tot_sigma = Constants::tot_sigma;
using namespace storage;
using std::cout; using std::cout;
using std::endl;
using std::array; using std::array;
} }
...@@ -43,26 +43,15 @@ float GenScalar::dot_product(const std::array<float, 3>& left, const std::array< ...@@ -43,26 +43,15 @@ float GenScalar::dot_product(const std::array<float, 3>& left, const std::array<
} }
void fill_values_scalar(long idx_start, long idx_end, storage_t& values, Generators& gens, int dom, int mod, void fill_values_scalar(long idx_start, long idx_end, storage_t& values, Generators& gens, int dom, int mod,
const std::optional<array<unsigned int, 4>>& pmts) { std::function<unsigned int(size_t)> pmt_fun) {
// fill values // fill values
std::uniform_int_distribution<long> flat_pmt(0, 31);
auto& mt = gens.mt; auto& mt = gens.mt;
auto& flat = gens.flat; auto& flat = gens.flat;
if (pmts) {
assert((idx_end - idx_start) < pmts->size());
}
size_t n = 0; size_t n = 0;
for (long vidx = idx_start; vidx < idx_end; vidx += 2) { for (long vidx = idx_start; vidx < idx_end; vidx += 2) {
unsigned int pmt1 = 0, pmt2 = 0; auto pmt1 = pmt_fun(n++);
if (pmts) { auto pmt2 = pmt_fun(n++);
pmt1 = (*pmts)[n++];
pmt2 = (*pmts)[n++];
} else {
pmt1 = flat_pmt(mt);
pmt2 = flat_pmt(mt);
}
auto u1 = flat(mt); auto u1 = flat(mt);
auto u2 = flat(mt) * Constants::two_pi; auto u2 = flat(mt) * Constants::two_pi;
...@@ -88,36 +77,39 @@ std::tuple<storage_t, storage_t> generate_scalar(const long time_start, const lo ...@@ -88,36 +77,39 @@ std::tuple<storage_t, storage_t> generate_scalar(const long time_start, const lo
auto& mt = gens.mt; auto& mt = gens.mt;
auto& flat = gens.flat; auto& flat = gens.flat;
const size_t n_expect = gens.n_expect(time_end - time_start); const size_t n_expect = gens.n_expect(time_end - time_start, true);
const float tau_l0 = gens.tau_l0(); const float tau_l0 = gens.tau_l0();
storage_t times; times.resize(n_expect); storage_t times; times.resize(n_expect);
storage_t values; values.resize(n_expect + 1); storage_t values; values.resize(n_expect + 1);
const size_t n_expect_pmts = gens.n_expect(time_end - time_start, false);
pmts_t pmts(n_expect_pmts, 0);
size_t idx = 0; size_t idx = 0;
// First generate some data // First generate some data
for (int dom = 0; dom < Constants::n_dom; ++dom) { for (int dom = 0; dom < Constants::n_dom; ++dom) {
for (int mod = 0; mod < Constants::n_mod; ++mod) { for (int mod = 0; mod < Constants::n_mod; ++mod) {
size_t mod_start = idx;
for (int pmt = 0; pmt < Constants::n_pmt; ++pmt) { for (int pmt = 0; pmt < Constants::n_pmt; ++pmt) {
long last = time_start; size_t pmt_start = idx;
while(last < time_end && idx < times.size() - 2) { long last = time_start;
// Generate times while(last < time_end && idx < times.size() - 2) {
float r = -1.f * tau_l0 * log(flat(mt)); // Generate times
last += static_cast<long>(r + 0.5); float r = -1.f * tau_l0 * log(flat(mt));
times[idx++] = last; last += static_cast<long>(r + 0.5);
} times[idx++] = last;
}
fill_values_scalar(pmt_start, idx, values, gens, dom, mod,
[pmt](size_t) { return pmt; });
} }
fill_values_scalar(mod_start, idx, values, gens, dom, mod, {});
// Coincidences // Coincidences
auto [pmts, n_coincidence] = fill_coincidences(times, idx, time_start, time_end, gens); auto [n_times, _] = fill_coincidences(times, pmts, idx, time_start, time_end, gens);
idx += n_coincidence; fill_values_scalar(idx, idx + n_times, values, gens, dom, mod,
[&pmts](size_t n) {
fill_values_scalar(mod_start, idx, values, gens, dom, mod, pmts); assert(n < pmts.size());
return pmts[n];
});
idx += n_times;
} }
} }
times.resize(idx); times.resize(idx);
......
/*
* Copyright 2018-2019 NWO-I
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "storage.h"
int storage::n_per_mod = 21760;
...@@ -37,10 +37,10 @@ generate_k40(const long time_start, const long time_end, Generators& gens, bool ...@@ -37,10 +37,10 @@ generate_k40(const long time_start, const long time_end, Generators& gens, bool
// factory for lambda's to shift and mask the values back into their // factory for lambda's to shift and mask the values back into their
// components. // components.
auto shift_mask_fact = [&values] (const size_t shift, const long mask) { auto shift_mask_fact = [] (const size_t shift, const long mask) {
return [&values, shift, mask] (const auto val) { return [shift, mask] (const auto val) {
return (val >> shift) & mask; return (val >> shift) & mask;
}; };
}; };
// column in the output array // column in the output array
......
...@@ -59,6 +59,8 @@ if (ROOT_FOUND) ...@@ -59,6 +59,8 @@ if (ROOT_FOUND)
generate generate
test_functions test_functions
${ROOT_LIBRARIES}) ${ROOT_LIBRARIES})
add_test(TestK40ROOT test_k40gen_root)
endif() endif()
find_package(Python3 COMPONENTS Interpreter) find_package(Python3 COMPONENTS Interpreter)
......
#pragma once #pragma once
#include <array> #include <array>
#include <unordered_map>
#include <tuple> #include <tuple>
template <std::size_t N> template <std::size_t N>
...@@ -15,5 +14,4 @@ struct get_n { ...@@ -15,5 +14,4 @@ struct get_n {
std::pair<double, double> generate_l0(float l0_rate, long dt, bool use_avx2); std::pair<double, double> generate_l0(float l0_rate, long dt, bool use_avx2);
std::tuple<double, double, std::unordered_map<size_t, double>> std::tuple<double, double> coincidence_rate(std::array<float, 4> rates);
coincidence_rate(std::array<float, 4> rates);