595390497c
Ensuring that: * it still works as before on C++98 and C++03 * C++11 "strict" constexpr is used where possible - requires replacing { R x; return f(x); } with { return f(R()); } * C++14 "relaxed" constexpr is used only where otherwise impossible - assignment operators - functions who's implementations require more than a single return statement
396 lines
13 KiB
C++
396 lines
13 KiB
C++
// Boost.Units - A C++ library for zero-overhead dimensional analysis and
|
|
// unit/quantity manipulation and conversion
|
|
//
|
|
// Copyright (C) 2003-2008 Matthias Christian Schabel
|
|
// Copyright (C) 2008 Steven Watanabe
|
|
//
|
|
// Distributed under the Boost Software License, Version 1.0. (See
|
|
// accompanying file LICENSE_1_0.txt or copy at
|
|
// http://www.boost.org/LICENSE_1_0.txt)
|
|
|
|
/**
|
|
\file
|
|
|
|
\brief performance.cpp
|
|
|
|
\details
|
|
Test runtime performance.
|
|
|
|
Output:
|
|
@verbatim
|
|
|
|
multiplying ublas::matrix<double>(1000, 1000) : 25.03 seconds
|
|
multiplying ublas::matrix<quantity>(1000, 1000) : 24.49 seconds
|
|
tiled_matrix_multiply<double>(1000, 1000) : 1.12 seconds
|
|
tiled_matrix_multiply<quantity>(1000, 1000) : 1.16 seconds
|
|
solving y' = 1 - x + 4 * y with double: 1.97 seconds
|
|
solving y' = 1 - x + 4 * y with quantity: 1.84 seconds
|
|
|
|
@endverbatim
|
|
**/
|
|
|
|
#define _SCL_SECURE_NO_WARNINGS
|
|
|
|
#include <cstdlib>
|
|
#include <ctime>
|
|
#include <algorithm>
|
|
#include <iostream>
|
|
#include <iomanip>
|
|
|
|
#include <boost/config.hpp>
|
|
#include <boost/timer.hpp>
|
|
#include <boost/utility/result_of.hpp>
|
|
|
|
#ifdef BOOST_MSVC
|
|
#pragma warning(push)
|
|
#pragma warning(disable:4267; disable:4127; disable:4244; disable:4100)
|
|
#endif
|
|
|
|
#include <boost/numeric/ublas/matrix.hpp>
|
|
|
|
#ifdef BOOST_MSVC
|
|
#pragma warning(pop)
|
|
#endif
|
|
|
|
#include <boost/units/quantity.hpp>
|
|
#include <boost/units/systems/si.hpp>
|
|
#include <boost/units/cmath.hpp>
|
|
#include <boost/units/io.hpp>
|
|
|
|
enum {
|
|
tile_block_size = 24
|
|
};
|
|
|
|
template<class T0, class T1, class Out>
|
|
void tiled_multiply_carray_inner(T0* first,
|
|
T1* second,
|
|
Out* out,
|
|
int totalwidth,
|
|
int width2,
|
|
int height1,
|
|
int common) {
|
|
for(int j = 0; j < height1; ++j) {
|
|
for(int i = 0; i < width2; ++i) {
|
|
Out value = out[j * totalwidth + i];
|
|
for(int k = 0; k < common; ++k) {
|
|
value += first[k + totalwidth * j] * second[k * totalwidth + i];
|
|
}
|
|
out[j * totalwidth + i] = value;
|
|
}
|
|
}
|
|
}
|
|
|
|
template<class T0, class T1, class Out>
|
|
void tiled_multiply_carray_outer(T0* first,
|
|
T1* second,
|
|
Out* out,
|
|
int width2,
|
|
int height1,
|
|
int common) {
|
|
std::fill_n(out, width2 * height1, Out());
|
|
int j = 0;
|
|
for(; j < height1 - tile_block_size; j += tile_block_size) {
|
|
int i = 0;
|
|
for(; i < width2 - tile_block_size; i += tile_block_size) {
|
|
int k = 0;
|
|
for(; k < common - tile_block_size; k += tile_block_size) {
|
|
tiled_multiply_carray_inner(
|
|
&first[k + width2 * j],
|
|
&second[k * width2 + i],
|
|
&out[j * width2 + i],
|
|
width2,
|
|
tile_block_size,
|
|
tile_block_size,
|
|
tile_block_size);
|
|
}
|
|
tiled_multiply_carray_inner(
|
|
&first[k + width2 * j],
|
|
&second[k * width2 + i],
|
|
&out[j * width2 + i],
|
|
width2,
|
|
tile_block_size,
|
|
tile_block_size,
|
|
common - k);
|
|
}
|
|
int k = 0;
|
|
for(; k < common - tile_block_size; k += tile_block_size) {
|
|
tiled_multiply_carray_inner(
|
|
&first[k + width2 * j],
|
|
&second[k * width2 + i],
|
|
&out[j * width2 + i],
|
|
width2, width2 - i,
|
|
tile_block_size,
|
|
tile_block_size);
|
|
}
|
|
tiled_multiply_carray_inner(
|
|
&first[k + width2 * j],
|
|
&second[k * width2 + i],
|
|
&out[j * width2 + i],
|
|
width2, width2 - i,
|
|
tile_block_size,
|
|
common - k);
|
|
}
|
|
int i = 0;
|
|
for(; i < width2 - tile_block_size; i += tile_block_size) {
|
|
int k = 0;
|
|
for(; k < common - tile_block_size; k += tile_block_size) {
|
|
tiled_multiply_carray_inner(
|
|
&first[k + width2 * j],
|
|
&second[k * width2 + i],
|
|
&out[j * width2 + i],
|
|
width2,
|
|
tile_block_size,
|
|
height1 - j,
|
|
tile_block_size);
|
|
}
|
|
tiled_multiply_carray_inner(
|
|
&first[k + width2 * j],
|
|
&second[k * width2 + i],
|
|
&out[j * width2 + i],
|
|
width2,
|
|
tile_block_size,
|
|
height1 - j,
|
|
common - k);
|
|
}
|
|
int k = 0;
|
|
for(; k < common - tile_block_size; k += tile_block_size) {
|
|
tiled_multiply_carray_inner(
|
|
&first[k + width2 * j],
|
|
&second[k * width2 + i],
|
|
&out[j * width2 + i],
|
|
width2,
|
|
width2 - i,
|
|
height1 - j,
|
|
tile_block_size);
|
|
}
|
|
tiled_multiply_carray_inner(
|
|
&first[k + width2 * j],
|
|
&second[k * width2 + i],
|
|
&out[j * width2 + i],
|
|
width2,
|
|
width2 - i,
|
|
height1 - j,
|
|
common - k);
|
|
}
|
|
|
|
enum { max_value = 1000};
|
|
|
|
template<class F, class T, class N, class R>
|
|
BOOST_CXX14_CONSTEXPR
|
|
R solve_differential_equation(F f, T lower, T upper, N steps, R start) {
|
|
typedef typename F::template result<T, R>::type f_result;
|
|
T h = (upper - lower) / (1.0*steps);
|
|
for(N i = N(); i < steps; ++i) {
|
|
R y = start;
|
|
T x = lower + h * (1.0*i);
|
|
f_result k1 = f(x, y);
|
|
f_result k2 = f(x + h / 2.0, y + h * k1 / 2.0);
|
|
f_result k3 = f(x + h / 2.0, y + h * k2 / 2.0);
|
|
f_result k4 = f(x + h, y + h * k3);
|
|
start = y + h * (k1 + 2.0 * k2 + 2.0 * k3 + k4) / 6.0;
|
|
}
|
|
return(start);
|
|
}
|
|
|
|
using namespace boost::units;
|
|
|
|
//y' = 1 - x + 4 * y
|
|
struct f {
|
|
template<class Arg1, class Arg2> struct result;
|
|
|
|
BOOST_CONSTEXPR double operator()(const double& x, const double& y) const {
|
|
return(1.0 - x + 4.0 * y);
|
|
}
|
|
|
|
boost::units::quantity<boost::units::si::velocity>
|
|
BOOST_CONSTEXPR operator()(const quantity<si::time>& x,
|
|
const quantity<si::length>& y) const {
|
|
using namespace boost::units;
|
|
using namespace si;
|
|
return(1.0 * meters / second -
|
|
x * meters / pow<2>(seconds) +
|
|
4.0 * y / seconds );
|
|
}
|
|
};
|
|
|
|
template<>
|
|
struct f::result<double,double> {
|
|
typedef double type;
|
|
};
|
|
|
|
template<>
|
|
struct f::result<quantity<si::time>, quantity<si::length> > {
|
|
typedef quantity<si::velocity> type;
|
|
};
|
|
|
|
|
|
|
|
//y' = 1 - x + 4 * y
|
|
//y' - 4 * y = 1 - x
|
|
//e^(-4 * x) * (dy - 4 * y * dx) = e^(-4 * x) * (1 - x) * dx
|
|
//d/dx(y * e ^ (-4 * x)) = e ^ (-4 * x) (1 - x) * dx
|
|
|
|
//d/dx(y * e ^ (-4 * x)) = e ^ (-4 * x) * dx - x * e ^ (-4 * x) * dx
|
|
//d/dx(y * e ^ (-4 * x)) = d/dx((-3/16 + 1/4 * x) * e ^ (-4 * x))
|
|
//y * e ^ (-4 * x) = (-3/16 + 1/4 * x) * e ^ (-4 * x) + C
|
|
//y = (-3/16 + 1/4 * x) + C/e ^ (-4 * x)
|
|
//y = 1/4 * x - 3/16 + C * e ^ (4 * x)
|
|
|
|
//y(0) = 1
|
|
//1 = - 3/16 + C
|
|
//C = 19/16
|
|
//y(x) = 1/4 * x - 3/16 + 19/16 * e ^ (4 * x)
|
|
|
|
|
|
|
|
int main() {
|
|
boost::numeric::ublas::matrix<double> ublas_result;
|
|
{
|
|
boost::numeric::ublas::matrix<double> m1(max_value, max_value);
|
|
boost::numeric::ublas::matrix<double> m2(max_value, max_value);
|
|
std::srand(1492);
|
|
for(int i = 0; i < max_value; ++i) {
|
|
for(int j = 0; j < max_value; ++j) {
|
|
m1(i,j) = std::rand();
|
|
m2(i,j) = std::rand();
|
|
}
|
|
}
|
|
std::cout << "multiplying ublas::matrix<double>("
|
|
<< max_value << ", " << max_value << ") : ";
|
|
boost::timer timer;
|
|
ublas_result = (prod(m1, m2));
|
|
std::cout << timer.elapsed() << " seconds" << std::endl;
|
|
}
|
|
typedef boost::numeric::ublas::matrix<
|
|
boost::units::quantity<boost::units::si::dimensionless>
|
|
> matrix_type;
|
|
matrix_type ublas_resultq;
|
|
{
|
|
matrix_type m1(max_value, max_value);
|
|
matrix_type m2(max_value, max_value);
|
|
std::srand(1492);
|
|
for(int i = 0; i < max_value; ++i) {
|
|
for(int j = 0; j < max_value; ++j) {
|
|
m1(i,j) = std::rand();
|
|
m2(i,j) = std::rand();
|
|
}
|
|
}
|
|
std::cout << "multiplying ublas::matrix<quantity>("
|
|
<< max_value << ", " << max_value << ") : ";
|
|
boost::timer timer;
|
|
ublas_resultq = (prod(m1, m2));
|
|
std::cout << timer.elapsed() << " seconds" << std::endl;
|
|
}
|
|
std::vector<double> cresult(max_value * max_value);
|
|
{
|
|
std::vector<double> m1(max_value * max_value);
|
|
std::vector<double> m2(max_value * max_value);
|
|
std::srand(1492);
|
|
for(int i = 0; i < max_value * max_value; ++i) {
|
|
m1[i] = std::rand();
|
|
m2[i] = std::rand();
|
|
}
|
|
std::cout << "tiled_matrix_multiply<double>("
|
|
<< max_value << ", " << max_value << ") : ";
|
|
boost::timer timer;
|
|
tiled_multiply_carray_outer(
|
|
&m1[0],
|
|
&m2[0],
|
|
&cresult[0],
|
|
max_value,
|
|
max_value,
|
|
max_value);
|
|
std::cout << timer.elapsed() << " seconds" << std::endl;
|
|
}
|
|
std::vector<
|
|
boost::units::quantity<boost::units::si::energy>
|
|
> cresultq(max_value * max_value);
|
|
{
|
|
std::vector<
|
|
boost::units::quantity<boost::units::si::force>
|
|
> m1(max_value * max_value);
|
|
std::vector<
|
|
boost::units::quantity<boost::units::si::length>
|
|
> m2(max_value * max_value);
|
|
std::srand(1492);
|
|
for(int i = 0; i < max_value * max_value; ++i) {
|
|
m1[i] = std::rand() * boost::units::si::newtons;
|
|
m2[i] = std::rand() * boost::units::si::meters;
|
|
}
|
|
std::cout << "tiled_matrix_multiply<quantity>("
|
|
<< max_value << ", " << max_value << ") : ";
|
|
boost::timer timer;
|
|
tiled_multiply_carray_outer(
|
|
&m1[0],
|
|
&m2[0],
|
|
&cresultq[0],
|
|
max_value,
|
|
max_value,
|
|
max_value);
|
|
std::cout << timer.elapsed() << " seconds" << std::endl;
|
|
}
|
|
for(int i = 0; i < max_value; ++i) {
|
|
for(int j = 0; j < max_value; ++j) {
|
|
const double diff =
|
|
std::abs(ublas_result(i,j) - cresult[i * max_value + j]);
|
|
if(diff > ublas_result(i,j) /1e14) {
|
|
std::cout << std::setprecision(15) << "Uh Oh. ublas_result("
|
|
<< i << "," << j << ") = " << ublas_result(i,j)
|
|
<< std::endl
|
|
<< "cresult[" << i << " * " << max_value << " + "
|
|
<< j << "] = " << cresult[i * max_value + j]
|
|
<< std::endl;
|
|
return(EXIT_FAILURE);
|
|
}
|
|
}
|
|
}
|
|
{
|
|
std::vector<double> values(1000);
|
|
std::cout << "solving y' = 1 - x + 4 * y with double: ";
|
|
boost::timer timer;
|
|
for(int i = 0; i < 1000; ++i) {
|
|
const double x = .1 * i;
|
|
values[i] = solve_differential_equation(f(), 0.0, x, i * 100, 1.0);
|
|
}
|
|
std::cout << timer.elapsed() << " seconds" << std::endl;
|
|
for(int i = 0; i < 1000; ++i) {
|
|
const double x = .1 * i;
|
|
const double value = 1.0/4.0 * x - 3.0/16.0 + 19.0/16.0 * std::exp(4 * x);
|
|
if(std::abs(values[i] - value) > value / 1e9) {
|
|
std::cout << std::setprecision(15) << "i = : " << i
|
|
<< ", value = " << value << " approx = " << values[i]
|
|
<< std::endl;
|
|
return(EXIT_FAILURE);
|
|
}
|
|
}
|
|
}
|
|
{
|
|
using namespace boost::units;
|
|
using namespace si;
|
|
std::vector<quantity<length> > values(1000);
|
|
std::cout << "solving y' = 1 - x + 4 * y with quantity: ";
|
|
boost::timer timer;
|
|
for(int i = 0; i < 1000; ++i) {
|
|
const quantity<si::time> x = .1 * i * seconds;
|
|
values[i] = solve_differential_equation(
|
|
f(),
|
|
0.0 * seconds,
|
|
x,
|
|
i * 100,
|
|
1.0 * meters);
|
|
}
|
|
std::cout << timer.elapsed() << " seconds" << std::endl;
|
|
for(int i = 0; i < 1000; ++i) {
|
|
const double x = .1 * i;
|
|
const quantity<si::length> value =
|
|
(1.0/4.0 * x - 3.0/16.0 + 19.0/16.0 * std::exp(4 * x)) * meters;
|
|
if(abs(values[i] - value) > value / 1e9) {
|
|
std::cout << std::setprecision(15) << "i = : " << i
|
|
<< ", value = " << value << " approx = "
|
|
<< values[i] << std::endl;
|
|
return(EXIT_FAILURE);
|
|
}
|
|
}
|
|
}
|
|
}
|