72e469da0a
[CI SKIP]
355 lines
43 KiB
HTML
355 lines
43 KiB
HTML
<html>
|
|
<head>
|
|
<meta http-equiv="Content-Type" content="text/html; charset=US-ASCII">
|
|
<title>Univariate Statistics</title>
|
|
<link rel="stylesheet" href="../math.css" type="text/css">
|
|
<meta name="generator" content="DocBook XSL Stylesheets V1.79.1">
|
|
<link rel="home" href="../index.html" title="Math Toolkit 2.11.0">
|
|
<link rel="up" href="../statistics.html" title="Chapter 6. Statistics">
|
|
<link rel="prev" href="../statistics.html" title="Chapter 6. Statistics">
|
|
<link rel="next" href="bivariate_statistics.html" title="Bivariate Statistics">
|
|
</head>
|
|
<body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
|
|
<table cellpadding="2" width="100%"><tr>
|
|
<td valign="top"><img alt="Boost C++ Libraries" width="277" height="86" src="../../../../../boost.png"></td>
|
|
<td align="center"><a href="../../../../../index.html">Home</a></td>
|
|
<td align="center"><a href="../../../../../libs/libraries.htm">Libraries</a></td>
|
|
<td align="center"><a href="http://www.boost.org/users/people.html">People</a></td>
|
|
<td align="center"><a href="http://www.boost.org/users/faq.html">FAQ</a></td>
|
|
<td align="center"><a href="../../../../../more/index.htm">More</a></td>
|
|
</tr></table>
|
|
<hr>
|
|
<div class="spirit-nav">
|
|
<a accesskey="p" href="../statistics.html"><img src="../../../../../doc/src/images/prev.png" alt="Prev"></a><a accesskey="u" href="../statistics.html"><img src="../../../../../doc/src/images/up.png" alt="Up"></a><a accesskey="h" href="../index.html"><img src="../../../../../doc/src/images/home.png" alt="Home"></a><a accesskey="n" href="bivariate_statistics.html"><img src="../../../../../doc/src/images/next.png" alt="Next"></a>
|
|
</div>
|
|
<div class="section">
|
|
<div class="titlepage"><div><div><h2 class="title" style="clear: both">
|
|
<a name="math_toolkit.univariate_statistics"></a><a class="link" href="univariate_statistics.html" title="Univariate Statistics">Univariate Statistics</a>
|
|
</h2></div></div></div>
|
|
<h4>
|
|
<a name="math_toolkit.univariate_statistics.h0"></a>
|
|
<span class="phrase"><a name="math_toolkit.univariate_statistics.synopsis"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.synopsis">Synopsis</a>
|
|
</h4>
|
|
<pre class="programlisting"><span class="preprocessor">#include</span> <span class="special"><</span><span class="identifier">boost</span><span class="special">/</span><span class="identifier">math</span><span class="special">/</span><span class="identifier">statistics</span><span class="special">/</span><span class="identifier">univariate_statistics</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">></span>
|
|
|
|
<span class="keyword">namespace</span> <span class="identifier">boost</span><span class="special">{</span> <span class="keyword">namespace</span> <span class="identifier">math</span><span class="special">{</span> <span class="keyword">namespace</span> <span class="identifier">statistics</span> <span class="special">{</span>
|
|
|
|
<span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span>
|
|
<span class="keyword">auto</span> <span class="identifier">mean</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span>
|
|
|
|
<span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span>
|
|
<span class="keyword">auto</span> <span class="identifier">mean</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
|
|
|
|
<span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span>
|
|
<span class="keyword">auto</span> <span class="identifier">variance</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span>
|
|
|
|
<span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span>
|
|
<span class="keyword">auto</span> <span class="identifier">variance</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
|
|
|
|
<span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span>
|
|
<span class="keyword">auto</span> <span class="identifier">sample_variance</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span>
|
|
|
|
<span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span>
|
|
<span class="keyword">auto</span> <span class="identifier">sample_variance</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
|
|
|
|
<span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span>
|
|
<span class="keyword">auto</span> <span class="identifier">mean_and_sample_variance</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span>
|
|
|
|
<span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span>
|
|
<span class="keyword">auto</span> <span class="identifier">skewness</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span>
|
|
|
|
<span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span>
|
|
<span class="keyword">auto</span> <span class="identifier">skewness</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
|
|
|
|
<span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span>
|
|
<span class="keyword">auto</span> <span class="identifier">kurtosis</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span>
|
|
|
|
<span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span>
|
|
<span class="keyword">auto</span> <span class="identifier">kurtosis</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
|
|
|
|
<span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span>
|
|
<span class="keyword">auto</span> <span class="identifier">excess_kurtosis</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span>
|
|
|
|
<span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span>
|
|
<span class="keyword">auto</span> <span class="identifier">excess_kurtosis</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
|
|
|
|
<span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span>
|
|
<span class="keyword">auto</span> <span class="identifier">first_four_moments</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span>
|
|
|
|
<span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span>
|
|
<span class="keyword">auto</span> <span class="identifier">first_four_moments</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
|
|
|
|
<span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span>
|
|
<span class="keyword">auto</span> <span class="identifier">median</span><span class="special">(</span><span class="identifier">Container</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span>
|
|
|
|
<span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span>
|
|
<span class="keyword">auto</span> <span class="identifier">median</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
|
|
|
|
<span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">RandomAccessIterator</span><span class="special">></span>
|
|
<span class="keyword">auto</span> <span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">,</span> <span class="keyword">typename</span> <span class="identifier">std</span><span class="special">::</span><span class="identifier">iterator_traits</span><span class="special"><</span><span class="identifier">RandomAccessIterator</span><span class="special">>::</span><span class="identifier">value_type</span> <span class="identifier">center</span><span class="special">=</span><span class="identifier">std</span><span class="special">::</span><span class="identifier">numeric_limits</span><span class="special"><</span><span class="identifier">Real</span><span class="special">>::</span><span class="identifier">quiet_NaN</span><span class="special">());</span>
|
|
|
|
<span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">RandomAccessContainer</span><span class="special">></span>
|
|
<span class="keyword">auto</span> <span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">RandomAccessContainer</span> <span class="identifier">v</span><span class="special">,</span> <span class="keyword">typename</span> <span class="identifier">RandomAccessContainer</span><span class="special">::</span><span class="identifier">value_type</span> <span class="identifier">center</span><span class="special">=</span><span class="identifier">std</span><span class="special">::</span><span class="identifier">numeric_limits</span><span class="special"><</span><span class="identifier">Real</span><span class="special">>::</span><span class="identifier">quiet_NaN</span><span class="special">());</span>
|
|
|
|
<span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span>
|
|
<span class="keyword">auto</span> <span class="identifier">gini_coefficient</span><span class="special">(</span><span class="identifier">Container</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span>
|
|
|
|
<span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span>
|
|
<span class="keyword">auto</span> <span class="identifier">gini_coefficient</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
|
|
|
|
<span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">></span>
|
|
<span class="keyword">auto</span> <span class="identifier">sample_gini_coefficient</span><span class="special">(</span><span class="identifier">Container</span> <span class="special">&</span> <span class="identifier">c</span><span class="special">);</span>
|
|
|
|
<span class="keyword">template</span><span class="special"><</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">></span>
|
|
<span class="keyword">auto</span> <span class="identifier">sample_gini_coefficient</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
|
|
|
|
<span class="special">}}}</span>
|
|
</pre>
|
|
<h4>
|
|
<a name="math_toolkit.univariate_statistics.h1"></a>
|
|
<span class="phrase"><a name="math_toolkit.univariate_statistics.description"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.description">Description</a>
|
|
</h4>
|
|
<p>
|
|
The file <code class="computeroutput"><span class="identifier">boost</span><span class="special">/</span><span class="identifier">math</span><span class="special">/</span><span class="identifier">statistics</span><span class="special">/</span><span class="identifier">univariate_statistics</span><span class="special">.</span><span class="identifier">hpp</span></code> is a
|
|
set of facilities for computing scalar values from vectors.
|
|
</p>
|
|
<p>
|
|
Many of these functionals have trivial naive implementations, but experienced
|
|
programmers will recognize that even trivial algorithms are easy to screw up,
|
|
and that numerical instabilities often lurk in corner cases. We have attempted
|
|
to do our "due diligence" to root out these problems-scouring the
|
|
literature for numerically stable algorithms for even the simplest of functionals.
|
|
</p>
|
|
<p>
|
|
<span class="emphasis"><em>Nota bene</em></span>: Some similar functionality is provided in
|
|
<a href="https://www.boost.org/doc/libs/1_68_0/doc/html/accumulators/user_s_guide.html" target="_top">Boost
|
|
Accumulators Framework</a>. These accumulators should be used in real-time
|
|
applications; <code class="computeroutput"><span class="identifier">univariate_statistics</span><span class="special">.</span><span class="identifier">hpp</span></code> should
|
|
be used when CPU vectorization is needed. As a reminder, remember that to actually
|
|
<span class="emphasis"><em>get</em></span> vectorization, compile with <code class="computeroutput"><span class="special">-</span><span class="identifier">march</span><span class="special">=</span><span class="identifier">native</span>
|
|
<span class="special">-</span><span class="identifier">O3</span></code>
|
|
flags.
|
|
</p>
|
|
<p>
|
|
We now describe each functional in detail. Our examples use <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span></code>
|
|
to hold the data, but this not required. In general, you can store your data
|
|
in an Eigen array, and Armadillo vector, <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">array</span></code>,
|
|
and for many of the routines, a <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">forward_list</span></code>.
|
|
These routines are usable in float, double, long double, and Boost.Multiprecision
|
|
precision, as well as their complex extensions whenever the computation is
|
|
well-defined. For certain operations (total variation, for example) integer
|
|
inputs are supported.
|
|
</p>
|
|
<h4>
|
|
<a name="math_toolkit.univariate_statistics.h2"></a>
|
|
<span class="phrase"><a name="math_toolkit.univariate_statistics.mean"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.mean">Mean</a>
|
|
</h4>
|
|
<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
|
|
<span class="keyword">double</span> <span class="identifier">mu</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">mean</span><span class="special">(</span><span class="identifier">v</span><span class="special">.</span><span class="identifier">cbegin</span><span class="special">(),</span> <span class="identifier">v</span><span class="special">.</span><span class="identifier">cend</span><span class="special">());</span>
|
|
<span class="comment">// Alternative syntax if you want to use entire container:</span>
|
|
<span class="identifier">mu</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">mean</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
|
|
</pre>
|
|
<p>
|
|
The implementation follows <a href="https://doi.org/10.1137/1.9780898718027" target="_top">Higham
|
|
1.6a</a>. The data is not modified and must be forward iterable. Works
|
|
with real and integer data. If the input is an integer type, the output is
|
|
a double precision float.
|
|
</p>
|
|
<h4>
|
|
<a name="math_toolkit.univariate_statistics.h3"></a>
|
|
<span class="phrase"><a name="math_toolkit.univariate_statistics.variance"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.variance">Variance</a>
|
|
</h4>
|
|
<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
|
|
<span class="identifier">Real</span> <span class="identifier">sigma_sq</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">variance</span><span class="special">(</span><span class="identifier">v</span><span class="special">.</span><span class="identifier">cbegin</span><span class="special">(),</span> <span class="identifier">v</span><span class="special">.</span><span class="identifier">cend</span><span class="special">());</span>
|
|
</pre>
|
|
<p>
|
|
If you don't need to calculate on a subset of the input, then the range call
|
|
is more terse:
|
|
</p>
|
|
<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
|
|
<span class="identifier">Real</span> <span class="identifier">sigma_sq</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">variance</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
|
|
</pre>
|
|
<p>
|
|
The implementation follows <a href="https://doi.org/10.1137/1.9780898718027" target="_top">Higham
|
|
1.6b</a>. The input data must be forward iterable and the range <code class="computeroutput"><span class="special">[</span><span class="identifier">first</span><span class="special">,</span>
|
|
<span class="identifier">last</span><span class="special">)</span></code>
|
|
must contain at least two elements. It is <span class="emphasis"><em>not</em></span> in general
|
|
sensible to pass complex numbers to this routine. If integers are passed as
|
|
input, then the output is a double precision float.
|
|
</p>
|
|
<p>
|
|
<code class="computeroutput"><span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">variance</span></code>
|
|
returns the population variance. If you want a sample variance, use
|
|
</p>
|
|
<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
|
|
<span class="identifier">Real</span> <span class="identifier">sn_sq</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">sample_variance</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
|
|
</pre>
|
|
<h4>
|
|
<a name="math_toolkit.univariate_statistics.h4"></a>
|
|
<span class="phrase"><a name="math_toolkit.univariate_statistics.skewness"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.skewness">Skewness</a>
|
|
</h4>
|
|
<p>
|
|
Computes the skewness of a dataset:
|
|
</p>
|
|
<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
|
|
<span class="keyword">double</span> <span class="identifier">skewness</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">skewness</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
|
|
<span class="comment">// skewness = 0.</span>
|
|
</pre>
|
|
<p>
|
|
The input vector is not modified, works with integral and real data. If the
|
|
input data is integral, the output is a double precision float.
|
|
</p>
|
|
<p>
|
|
For a dataset consisting of a single constant value, we take the skewness to
|
|
be zero by definition.
|
|
</p>
|
|
<p>
|
|
The implementation follows <a href="https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf" target="_top">Pebay</a>.
|
|
</p>
|
|
<h4>
|
|
<a name="math_toolkit.univariate_statistics.h5"></a>
|
|
<span class="phrase"><a name="math_toolkit.univariate_statistics.kurtosis"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.kurtosis">Kurtosis</a>
|
|
</h4>
|
|
<p>
|
|
Computes the kurtosis of a dataset:
|
|
</p>
|
|
<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
|
|
<span class="keyword">double</span> <span class="identifier">kurtosis</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">kurtosis</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
|
|
<span class="comment">// kurtosis = 17/10</span>
|
|
</pre>
|
|
<p>
|
|
The implementation follows <a href="https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf" target="_top">Pebay</a>.
|
|
The input data must be forward iterable and must consist of real or integral
|
|
values. If the input data is integral, the output is a double precision float.
|
|
Note that this is <span class="emphasis"><em>not</em></span> the excess kurtosis. If you require
|
|
the excess kurtosis, use <code class="computeroutput"><span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">excess_kurtosis</span></code>. This function simply subtracts
|
|
3 from the kurtosis, but it makes eminently clear our definition of kurtosis.
|
|
</p>
|
|
<h4>
|
|
<a name="math_toolkit.univariate_statistics.h6"></a>
|
|
<span class="phrase"><a name="math_toolkit.univariate_statistics.first_four_moments"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.first_four_moments">First
|
|
four moments</a>
|
|
</h4>
|
|
<p>
|
|
Simultaneously computes the first four <a href="https://en.wikipedia.org/wiki/Central_moment" target="_top">central
|
|
moments</a> in a single pass through the data:
|
|
</p>
|
|
<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
|
|
<span class="keyword">auto</span> <span class="special">[</span><span class="identifier">M1</span><span class="special">,</span> <span class="identifier">M2</span><span class="special">,</span> <span class="identifier">M3</span><span class="special">,</span> <span class="identifier">M4</span><span class="special">]</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">first_four_moments</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
|
|
</pre>
|
|
<h4>
|
|
<a name="math_toolkit.univariate_statistics.h7"></a>
|
|
<span class="phrase"><a name="math_toolkit.univariate_statistics.median"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.median">Median</a>
|
|
</h4>
|
|
<p>
|
|
Computes the median of a dataset:
|
|
</p>
|
|
<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
|
|
<span class="keyword">double</span> <span class="identifier">m</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">median</span><span class="special">(</span><span class="identifier">v</span><span class="special">.</span><span class="identifier">begin</span><span class="special">(),</span> <span class="identifier">v</span><span class="special">.</span><span class="identifier">end</span><span class="special">());</span>
|
|
</pre>
|
|
<p>
|
|
<span class="emphasis"><em>Nota bene: The input vector is modified.</em></span> The calculation
|
|
of the median is a thin wrapper around the C++11 <a href="https://en.cppreference.com/w/cpp/algorithm/nth_element" target="_top"><code class="computeroutput"><span class="identifier">nth_element</span></code></a>. Therefore, all requirements
|
|
of <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">nth_element</span></code> are inherited by the median calculation.
|
|
In particular, the container must allow random access.
|
|
</p>
|
|
<h4>
|
|
<a name="math_toolkit.univariate_statistics.h8"></a>
|
|
<span class="phrase"><a name="math_toolkit.univariate_statistics.median_absolute_deviation"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.median_absolute_deviation">Median
|
|
Absolute Deviation</a>
|
|
</h4>
|
|
<p>
|
|
Computes the <a href="https://en.wikipedia.org/wiki/Median_absolute_deviation" target="_top">median
|
|
absolute deviation</a> of a dataset:
|
|
</p>
|
|
<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
|
|
<span class="keyword">double</span> <span class="identifier">mad</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
|
|
</pre>
|
|
<p>
|
|
By default, the deviation from the median is used. If you have some prior that
|
|
the median is zero, or wish to compute the median absolute deviation from the
|
|
mean, use the following:
|
|
</p>
|
|
<pre class="programlisting"><span class="comment">// prior is that center is zero:</span>
|
|
<span class="keyword">double</span> <span class="identifier">center</span> <span class="special">=</span> <span class="number">0</span><span class="special">;</span>
|
|
<span class="keyword">double</span> <span class="identifier">mad</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">v</span><span class="special">,</span> <span class="identifier">center</span><span class="special">);</span>
|
|
|
|
<span class="comment">// compute median absolute deviation from the mean:</span>
|
|
<span class="keyword">double</span> <span class="identifier">mu</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">mean</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
|
|
<span class="keyword">double</span> <span class="identifier">mad</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">v</span><span class="special">,</span> <span class="identifier">mu</span><span class="special">);</span>
|
|
</pre>
|
|
<p>
|
|
<span class="emphasis"><em>Nota bene:</em></span> The input vector is modified. Again the vector
|
|
is passed into a call to <a href="https://en.cppreference.com/w/cpp/algorithm/nth_element" target="_top"><code class="computeroutput"><span class="identifier">nth_element</span></code></a>.
|
|
</p>
|
|
<h4>
|
|
<a name="math_toolkit.univariate_statistics.h9"></a>
|
|
<span class="phrase"><a name="math_toolkit.univariate_statistics.gini_coefficient"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.gini_coefficient">Gini
|
|
Coefficient</a>
|
|
</h4>
|
|
<p>
|
|
Compute the Gini coefficient of a dataset:
|
|
</p>
|
|
<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">0</span><span class="special">,</span><span class="number">0</span><span class="special">,</span><span class="number">0</span><span class="special">};</span>
|
|
<span class="keyword">double</span> <span class="identifier">gini</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">gini_coefficient</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
|
|
<span class="comment">// gini = 3/4</span>
|
|
<span class="keyword">double</span> <span class="identifier">s_gini</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">sample_gini_coefficient</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
|
|
<span class="comment">// s_gini = 1.</span>
|
|
<span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special"><</span><span class="keyword">double</span><span class="special">></span> <span class="identifier">w</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">1</span><span class="special">,</span><span class="number">1</span><span class="special">,</span><span class="number">1</span><span class="special">};</span>
|
|
<span class="identifier">gini</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">gini_coefficient</span><span class="special">(</span><span class="identifier">w</span><span class="special">.</span><span class="identifier">begin</span><span class="special">(),</span> <span class="identifier">w</span><span class="special">.</span><span class="identifier">end</span><span class="special">());</span>
|
|
<span class="comment">// gini = 0, as all elements are now equal.</span>
|
|
</pre>
|
|
<p>
|
|
<span class="emphasis"><em>Nota bene</em></span>: The input data is altered: in particular, it
|
|
is sorted. Makes a call to <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">sort</span></code>, and
|
|
as such requires random access iterators.
|
|
</p>
|
|
<p>
|
|
The sample Gini coefficient lies in the range [0,1], whereas the population
|
|
Gini coefficient is in the range [0, 1 - 1/ <span class="emphasis"><em>n</em></span>].
|
|
</p>
|
|
<p>
|
|
<span class="emphasis"><em>Nota bene:</em></span> There is essentially no reason to pass negative
|
|
values to the Gini coefficient function. However, a use case (measuring wealth
|
|
inequality when some people have negative wealth) exists, so we do not throw
|
|
an exception when negative values are encountered. You should have <span class="emphasis"><em>very</em></span>
|
|
good cause to pass negative values to the Gini coefficient calculator. Another
|
|
use case is found in signal processing, but the sorting is by magnitude and
|
|
hence has a different implementation. See <code class="computeroutput"><span class="identifier">absolute_gini_coefficient</span></code>
|
|
for details.
|
|
</p>
|
|
<h4>
|
|
<a name="math_toolkit.univariate_statistics.h10"></a>
|
|
<span class="phrase"><a name="math_toolkit.univariate_statistics.references"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.references">References</a>
|
|
</h4>
|
|
<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">
|
|
<li class="listitem">
|
|
Higham, Nicholas J. <span class="emphasis"><em>Accuracy and stability of numerical algorithms.</em></span>
|
|
Vol. 80. Siam, 2002.
|
|
</li>
|
|
<li class="listitem">
|
|
Philippe P. Pébay: <span class="quote">“<span class="quote">Formulas for Robust, One-Pass Parallel Computation
|
|
of Covariances and Arbitrary-Order Statistical Moments.</span>”</span> Technical
|
|
Report SAND2008-6212, Sandia National Laboratories, September 2008.
|
|
</li>
|
|
</ul></div>
|
|
</div>
|
|
<table xmlns:rev="http://www.cs.rpi.edu/~gregod/boost/tools/doc/revision" width="100%"><tr>
|
|
<td align="left"></td>
|
|
<td align="right"><div class="copyright-footer">Copyright © 2006-2019 Nikhar
|
|
Agrawal, Anton Bikineev, Paul A. Bristow, Marco Guazzone, Christopher Kormanyos,
|
|
Hubert Holin, Bruno Lalande, John Maddock, Jeremy Murphy, Matthew Pulver, Johan
|
|
Råde, Gautam Sewani, Benjamin Sobotta, Nicholas Thompson, Thijs van den Berg,
|
|
Daryle Walker and Xiaogang Zhang<p>
|
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
|
file LICENSE_1_0.txt or copy at <a href="http://www.boost.org/LICENSE_1_0.txt" target="_top">http://www.boost.org/LICENSE_1_0.txt</a>)
|
|
</p>
|
|
</div></td>
|
|
</tr></table>
|
|
<hr>
|
|
<div class="spirit-nav">
|
|
<a accesskey="p" href="../statistics.html"><img src="../../../../../doc/src/images/prev.png" alt="Prev"></a><a accesskey="u" href="../statistics.html"><img src="../../../../../doc/src/images/up.png" alt="Up"></a><a accesskey="h" href="../index.html"><img src="../../../../../doc/src/images/home.png" alt="Home"></a><a accesskey="n" href="bivariate_statistics.html"><img src="../../../../../doc/src/images/next.png" alt="Next"></a>
|
|
</div>
|
|
</body>
|
|
</html>
|