math/doc/html/math_toolkit/univariate_statistics.html
2019-10-31 17:55:35 +00:00

355 lines
43 KiB
HTML

<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=US-ASCII">
<title>Univariate Statistics</title>
<link rel="stylesheet" href="../math.css" type="text/css">
<meta name="generator" content="DocBook XSL Stylesheets V1.79.1">
<link rel="home" href="../index.html" title="Math Toolkit 2.11.0">
<link rel="up" href="../statistics.html" title="Chapter&#160;6.&#160;Statistics">
<link rel="prev" href="../statistics.html" title="Chapter&#160;6.&#160;Statistics">
<link rel="next" href="bivariate_statistics.html" title="Bivariate Statistics">
</head>
<body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
<table cellpadding="2" width="100%"><tr>
<td valign="top"><img alt="Boost C++ Libraries" width="277" height="86" src="../../../../../boost.png"></td>
<td align="center"><a href="../../../../../index.html">Home</a></td>
<td align="center"><a href="../../../../../libs/libraries.htm">Libraries</a></td>
<td align="center"><a href="http://www.boost.org/users/people.html">People</a></td>
<td align="center"><a href="http://www.boost.org/users/faq.html">FAQ</a></td>
<td align="center"><a href="../../../../../more/index.htm">More</a></td>
</tr></table>
<hr>
<div class="spirit-nav">
<a accesskey="p" href="../statistics.html"><img src="../../../../../doc/src/images/prev.png" alt="Prev"></a><a accesskey="u" href="../statistics.html"><img src="../../../../../doc/src/images/up.png" alt="Up"></a><a accesskey="h" href="../index.html"><img src="../../../../../doc/src/images/home.png" alt="Home"></a><a accesskey="n" href="bivariate_statistics.html"><img src="../../../../../doc/src/images/next.png" alt="Next"></a>
</div>
<div class="section">
<div class="titlepage"><div><div><h2 class="title" style="clear: both">
<a name="math_toolkit.univariate_statistics"></a><a class="link" href="univariate_statistics.html" title="Univariate Statistics">Univariate Statistics</a>
</h2></div></div></div>
<h4>
<a name="math_toolkit.univariate_statistics.h0"></a>
<span class="phrase"><a name="math_toolkit.univariate_statistics.synopsis"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.synopsis">Synopsis</a>
</h4>
<pre class="programlisting"><span class="preprocessor">#include</span> <span class="special">&lt;</span><span class="identifier">boost</span><span class="special">/</span><span class="identifier">math</span><span class="special">/</span><span class="identifier">statistics</span><span class="special">/</span><span class="identifier">univariate_statistics</span><span class="special">.</span><span class="identifier">hpp</span><span class="special">&gt;</span>
<span class="keyword">namespace</span> <span class="identifier">boost</span><span class="special">{</span> <span class="keyword">namespace</span> <span class="identifier">math</span><span class="special">{</span> <span class="keyword">namespace</span> <span class="identifier">statistics</span> <span class="special">{</span>
<span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
<span class="keyword">auto</span> <span class="identifier">mean</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
<span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
<span class="keyword">auto</span> <span class="identifier">mean</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
<span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
<span class="keyword">auto</span> <span class="identifier">variance</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
<span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
<span class="keyword">auto</span> <span class="identifier">variance</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
<span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
<span class="keyword">auto</span> <span class="identifier">sample_variance</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
<span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
<span class="keyword">auto</span> <span class="identifier">sample_variance</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
<span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
<span class="keyword">auto</span> <span class="identifier">mean_and_sample_variance</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
<span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
<span class="keyword">auto</span> <span class="identifier">skewness</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
<span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
<span class="keyword">auto</span> <span class="identifier">skewness</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
<span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
<span class="keyword">auto</span> <span class="identifier">kurtosis</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
<span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
<span class="keyword">auto</span> <span class="identifier">kurtosis</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
<span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
<span class="keyword">auto</span> <span class="identifier">excess_kurtosis</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
<span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
<span class="keyword">auto</span> <span class="identifier">excess_kurtosis</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
<span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
<span class="keyword">auto</span> <span class="identifier">first_four_moments</span><span class="special">(</span><span class="identifier">Container</span> <span class="keyword">const</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
<span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
<span class="keyword">auto</span> <span class="identifier">first_four_moments</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
<span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
<span class="keyword">auto</span> <span class="identifier">median</span><span class="special">(</span><span class="identifier">Container</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
<span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
<span class="keyword">auto</span> <span class="identifier">median</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
<span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">RandomAccessIterator</span><span class="special">&gt;</span>
<span class="keyword">auto</span> <span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">,</span> <span class="keyword">typename</span> <span class="identifier">std</span><span class="special">::</span><span class="identifier">iterator_traits</span><span class="special">&lt;</span><span class="identifier">RandomAccessIterator</span><span class="special">&gt;::</span><span class="identifier">value_type</span> <span class="identifier">center</span><span class="special">=</span><span class="identifier">std</span><span class="special">::</span><span class="identifier">numeric_limits</span><span class="special">&lt;</span><span class="identifier">Real</span><span class="special">&gt;::</span><span class="identifier">quiet_NaN</span><span class="special">());</span>
<span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">RandomAccessContainer</span><span class="special">&gt;</span>
<span class="keyword">auto</span> <span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">RandomAccessContainer</span> <span class="identifier">v</span><span class="special">,</span> <span class="keyword">typename</span> <span class="identifier">RandomAccessContainer</span><span class="special">::</span><span class="identifier">value_type</span> <span class="identifier">center</span><span class="special">=</span><span class="identifier">std</span><span class="special">::</span><span class="identifier">numeric_limits</span><span class="special">&lt;</span><span class="identifier">Real</span><span class="special">&gt;::</span><span class="identifier">quiet_NaN</span><span class="special">());</span>
<span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
<span class="keyword">auto</span> <span class="identifier">gini_coefficient</span><span class="special">(</span><span class="identifier">Container</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
<span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
<span class="keyword">auto</span> <span class="identifier">gini_coefficient</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
<span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">Container</span><span class="special">&gt;</span>
<span class="keyword">auto</span> <span class="identifier">sample_gini_coefficient</span><span class="special">(</span><span class="identifier">Container</span> <span class="special">&amp;</span> <span class="identifier">c</span><span class="special">);</span>
<span class="keyword">template</span><span class="special">&lt;</span><span class="keyword">class</span> <span class="identifier">ForwardIterator</span><span class="special">&gt;</span>
<span class="keyword">auto</span> <span class="identifier">sample_gini_coefficient</span><span class="special">(</span><span class="identifier">ForwardIterator</span> <span class="identifier">first</span><span class="special">,</span> <span class="identifier">ForwardIterator</span> <span class="identifier">last</span><span class="special">);</span>
<span class="special">}}}</span>
</pre>
<h4>
<a name="math_toolkit.univariate_statistics.h1"></a>
<span class="phrase"><a name="math_toolkit.univariate_statistics.description"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.description">Description</a>
</h4>
<p>
The file <code class="computeroutput"><span class="identifier">boost</span><span class="special">/</span><span class="identifier">math</span><span class="special">/</span><span class="identifier">statistics</span><span class="special">/</span><span class="identifier">univariate_statistics</span><span class="special">.</span><span class="identifier">hpp</span></code> is a
set of facilities for computing scalar values from vectors.
</p>
<p>
Many of these functionals have trivial naive implementations, but experienced
programmers will recognize that even trivial algorithms are easy to screw up,
and that numerical instabilities often lurk in corner cases. We have attempted
to do our "due diligence" to root out these problems-scouring the
literature for numerically stable algorithms for even the simplest of functionals.
</p>
<p>
<span class="emphasis"><em>Nota bene</em></span>: Some similar functionality is provided in
<a href="https://www.boost.org/doc/libs/1_68_0/doc/html/accumulators/user_s_guide.html" target="_top">Boost
Accumulators Framework</a>. These accumulators should be used in real-time
applications; <code class="computeroutput"><span class="identifier">univariate_statistics</span><span class="special">.</span><span class="identifier">hpp</span></code> should
be used when CPU vectorization is needed. As a reminder, remember that to actually
<span class="emphasis"><em>get</em></span> vectorization, compile with <code class="computeroutput"><span class="special">-</span><span class="identifier">march</span><span class="special">=</span><span class="identifier">native</span>
<span class="special">-</span><span class="identifier">O3</span></code>
flags.
</p>
<p>
We now describe each functional in detail. Our examples use <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span></code>
to hold the data, but this not required. In general, you can store your data
in an Eigen array, and Armadillo vector, <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">array</span></code>,
and for many of the routines, a <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">forward_list</span></code>.
These routines are usable in float, double, long double, and Boost.Multiprecision
precision, as well as their complex extensions whenever the computation is
well-defined. For certain operations (total variation, for example) integer
inputs are supported.
</p>
<h4>
<a name="math_toolkit.univariate_statistics.h2"></a>
<span class="phrase"><a name="math_toolkit.univariate_statistics.mean"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.mean">Mean</a>
</h4>
<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
<span class="keyword">double</span> <span class="identifier">mu</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">mean</span><span class="special">(</span><span class="identifier">v</span><span class="special">.</span><span class="identifier">cbegin</span><span class="special">(),</span> <span class="identifier">v</span><span class="special">.</span><span class="identifier">cend</span><span class="special">());</span>
<span class="comment">// Alternative syntax if you want to use entire container:</span>
<span class="identifier">mu</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">mean</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
</pre>
<p>
The implementation follows <a href="https://doi.org/10.1137/1.9780898718027" target="_top">Higham
1.6a</a>. The data is not modified and must be forward iterable. Works
with real and integer data. If the input is an integer type, the output is
a double precision float.
</p>
<h4>
<a name="math_toolkit.univariate_statistics.h3"></a>
<span class="phrase"><a name="math_toolkit.univariate_statistics.variance"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.variance">Variance</a>
</h4>
<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
<span class="identifier">Real</span> <span class="identifier">sigma_sq</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">variance</span><span class="special">(</span><span class="identifier">v</span><span class="special">.</span><span class="identifier">cbegin</span><span class="special">(),</span> <span class="identifier">v</span><span class="special">.</span><span class="identifier">cend</span><span class="special">());</span>
</pre>
<p>
If you don't need to calculate on a subset of the input, then the range call
is more terse:
</p>
<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
<span class="identifier">Real</span> <span class="identifier">sigma_sq</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">variance</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
</pre>
<p>
The implementation follows <a href="https://doi.org/10.1137/1.9780898718027" target="_top">Higham
1.6b</a>. The input data must be forward iterable and the range <code class="computeroutput"><span class="special">[</span><span class="identifier">first</span><span class="special">,</span>
<span class="identifier">last</span><span class="special">)</span></code>
must contain at least two elements. It is <span class="emphasis"><em>not</em></span> in general
sensible to pass complex numbers to this routine. If integers are passed as
input, then the output is a double precision float.
</p>
<p>
<code class="computeroutput"><span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">variance</span></code>
returns the population variance. If you want a sample variance, use
</p>
<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
<span class="identifier">Real</span> <span class="identifier">sn_sq</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">sample_variance</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
</pre>
<h4>
<a name="math_toolkit.univariate_statistics.h4"></a>
<span class="phrase"><a name="math_toolkit.univariate_statistics.skewness"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.skewness">Skewness</a>
</h4>
<p>
Computes the skewness of a dataset:
</p>
<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
<span class="keyword">double</span> <span class="identifier">skewness</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">skewness</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
<span class="comment">// skewness = 0.</span>
</pre>
<p>
The input vector is not modified, works with integral and real data. If the
input data is integral, the output is a double precision float.
</p>
<p>
For a dataset consisting of a single constant value, we take the skewness to
be zero by definition.
</p>
<p>
The implementation follows <a href="https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf" target="_top">Pebay</a>.
</p>
<h4>
<a name="math_toolkit.univariate_statistics.h5"></a>
<span class="phrase"><a name="math_toolkit.univariate_statistics.kurtosis"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.kurtosis">Kurtosis</a>
</h4>
<p>
Computes the kurtosis of a dataset:
</p>
<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
<span class="keyword">double</span> <span class="identifier">kurtosis</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">kurtosis</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
<span class="comment">// kurtosis = 17/10</span>
</pre>
<p>
The implementation follows <a href="https://prod.sandia.gov/techlib-noauth/access-control.cgi/2008/086212.pdf" target="_top">Pebay</a>.
The input data must be forward iterable and must consist of real or integral
values. If the input data is integral, the output is a double precision float.
Note that this is <span class="emphasis"><em>not</em></span> the excess kurtosis. If you require
the excess kurtosis, use <code class="computeroutput"><span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">excess_kurtosis</span></code>. This function simply subtracts
3 from the kurtosis, but it makes eminently clear our definition of kurtosis.
</p>
<h4>
<a name="math_toolkit.univariate_statistics.h6"></a>
<span class="phrase"><a name="math_toolkit.univariate_statistics.first_four_moments"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.first_four_moments">First
four moments</a>
</h4>
<p>
Simultaneously computes the first four <a href="https://en.wikipedia.org/wiki/Central_moment" target="_top">central
moments</a> in a single pass through the data:
</p>
<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
<span class="keyword">auto</span> <span class="special">[</span><span class="identifier">M1</span><span class="special">,</span> <span class="identifier">M2</span><span class="special">,</span> <span class="identifier">M3</span><span class="special">,</span> <span class="identifier">M4</span><span class="special">]</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">first_four_moments</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
</pre>
<h4>
<a name="math_toolkit.univariate_statistics.h7"></a>
<span class="phrase"><a name="math_toolkit.univariate_statistics.median"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.median">Median</a>
</h4>
<p>
Computes the median of a dataset:
</p>
<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
<span class="keyword">double</span> <span class="identifier">m</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">median</span><span class="special">(</span><span class="identifier">v</span><span class="special">.</span><span class="identifier">begin</span><span class="special">(),</span> <span class="identifier">v</span><span class="special">.</span><span class="identifier">end</span><span class="special">());</span>
</pre>
<p>
<span class="emphasis"><em>Nota bene: The input vector is modified.</em></span> The calculation
of the median is a thin wrapper around the C++11 <a href="https://en.cppreference.com/w/cpp/algorithm/nth_element" target="_top"><code class="computeroutput"><span class="identifier">nth_element</span></code></a>. Therefore, all requirements
of <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">nth_element</span></code> are inherited by the median calculation.
In particular, the container must allow random access.
</p>
<h4>
<a name="math_toolkit.univariate_statistics.h8"></a>
<span class="phrase"><a name="math_toolkit.univariate_statistics.median_absolute_deviation"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.median_absolute_deviation">Median
Absolute Deviation</a>
</h4>
<p>
Computes the <a href="https://en.wikipedia.org/wiki/Median_absolute_deviation" target="_top">median
absolute deviation</a> of a dataset:
</p>
<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">2</span><span class="special">,</span><span class="number">3</span><span class="special">,</span><span class="number">4</span><span class="special">,</span><span class="number">5</span><span class="special">};</span>
<span class="keyword">double</span> <span class="identifier">mad</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
</pre>
<p>
By default, the deviation from the median is used. If you have some prior that
the median is zero, or wish to compute the median absolute deviation from the
mean, use the following:
</p>
<pre class="programlisting"><span class="comment">// prior is that center is zero:</span>
<span class="keyword">double</span> <span class="identifier">center</span> <span class="special">=</span> <span class="number">0</span><span class="special">;</span>
<span class="keyword">double</span> <span class="identifier">mad</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">v</span><span class="special">,</span> <span class="identifier">center</span><span class="special">);</span>
<span class="comment">// compute median absolute deviation from the mean:</span>
<span class="keyword">double</span> <span class="identifier">mu</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">mean</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
<span class="keyword">double</span> <span class="identifier">mad</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">median_absolute_deviation</span><span class="special">(</span><span class="identifier">v</span><span class="special">,</span> <span class="identifier">mu</span><span class="special">);</span>
</pre>
<p>
<span class="emphasis"><em>Nota bene:</em></span> The input vector is modified. Again the vector
is passed into a call to <a href="https://en.cppreference.com/w/cpp/algorithm/nth_element" target="_top"><code class="computeroutput"><span class="identifier">nth_element</span></code></a>.
</p>
<h4>
<a name="math_toolkit.univariate_statistics.h9"></a>
<span class="phrase"><a name="math_toolkit.univariate_statistics.gini_coefficient"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.gini_coefficient">Gini
Coefficient</a>
</h4>
<p>
Compute the Gini coefficient of a dataset:
</p>
<pre class="programlisting"><span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">v</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">0</span><span class="special">,</span><span class="number">0</span><span class="special">,</span><span class="number">0</span><span class="special">};</span>
<span class="keyword">double</span> <span class="identifier">gini</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">gini_coefficient</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
<span class="comment">// gini = 3/4</span>
<span class="keyword">double</span> <span class="identifier">s_gini</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">sample_gini_coefficient</span><span class="special">(</span><span class="identifier">v</span><span class="special">);</span>
<span class="comment">// s_gini = 1.</span>
<span class="identifier">std</span><span class="special">::</span><span class="identifier">vector</span><span class="special">&lt;</span><span class="keyword">double</span><span class="special">&gt;</span> <span class="identifier">w</span><span class="special">{</span><span class="number">1</span><span class="special">,</span><span class="number">1</span><span class="special">,</span><span class="number">1</span><span class="special">,</span><span class="number">1</span><span class="special">};</span>
<span class="identifier">gini</span> <span class="special">=</span> <span class="identifier">boost</span><span class="special">::</span><span class="identifier">math</span><span class="special">::</span><span class="identifier">statistics</span><span class="special">::</span><span class="identifier">gini_coefficient</span><span class="special">(</span><span class="identifier">w</span><span class="special">.</span><span class="identifier">begin</span><span class="special">(),</span> <span class="identifier">w</span><span class="special">.</span><span class="identifier">end</span><span class="special">());</span>
<span class="comment">// gini = 0, as all elements are now equal.</span>
</pre>
<p>
<span class="emphasis"><em>Nota bene</em></span>: The input data is altered: in particular, it
is sorted. Makes a call to <code class="computeroutput"><span class="identifier">std</span><span class="special">::</span><span class="identifier">sort</span></code>, and
as such requires random access iterators.
</p>
<p>
The sample Gini coefficient lies in the range [0,1], whereas the population
Gini coefficient is in the range [0, 1 - 1/ <span class="emphasis"><em>n</em></span>].
</p>
<p>
<span class="emphasis"><em>Nota bene:</em></span> There is essentially no reason to pass negative
values to the Gini coefficient function. However, a use case (measuring wealth
inequality when some people have negative wealth) exists, so we do not throw
an exception when negative values are encountered. You should have <span class="emphasis"><em>very</em></span>
good cause to pass negative values to the Gini coefficient calculator. Another
use case is found in signal processing, but the sorting is by magnitude and
hence has a different implementation. See <code class="computeroutput"><span class="identifier">absolute_gini_coefficient</span></code>
for details.
</p>
<h4>
<a name="math_toolkit.univariate_statistics.h10"></a>
<span class="phrase"><a name="math_toolkit.univariate_statistics.references"></a></span><a class="link" href="univariate_statistics.html#math_toolkit.univariate_statistics.references">References</a>
</h4>
<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">
<li class="listitem">
Higham, Nicholas J. <span class="emphasis"><em>Accuracy and stability of numerical algorithms.</em></span>
Vol. 80. Siam, 2002.
</li>
<li class="listitem">
Philippe P. P&#233;bay: <span class="quote">&#8220;<span class="quote">Formulas for Robust, One-Pass Parallel Computation
of Covariances and Arbitrary-Order Statistical Moments.</span>&#8221;</span> Technical
Report SAND2008-6212, Sandia National Laboratories, September 2008.
</li>
</ul></div>
</div>
<table xmlns:rev="http://www.cs.rpi.edu/~gregod/boost/tools/doc/revision" width="100%"><tr>
<td align="left"></td>
<td align="right"><div class="copyright-footer">Copyright &#169; 2006-2019 Nikhar
Agrawal, Anton Bikineev, Paul A. Bristow, Marco Guazzone, Christopher Kormanyos,
Hubert Holin, Bruno Lalande, John Maddock, Jeremy Murphy, Matthew Pulver, Johan
R&#229;de, Gautam Sewani, Benjamin Sobotta, Nicholas Thompson, Thijs van den Berg,
Daryle Walker and Xiaogang Zhang<p>
Distributed under the Boost Software License, Version 1.0. (See accompanying
file LICENSE_1_0.txt or copy at <a href="http://www.boost.org/LICENSE_1_0.txt" target="_top">http://www.boost.org/LICENSE_1_0.txt</a>)
</p>
</div></td>
</tr></table>
<hr>
<div class="spirit-nav">
<a accesskey="p" href="../statistics.html"><img src="../../../../../doc/src/images/prev.png" alt="Prev"></a><a accesskey="u" href="../statistics.html"><img src="../../../../../doc/src/images/up.png" alt="Up"></a><a accesskey="h" href="../index.html"><img src="../../../../../doc/src/images/home.png" alt="Home"></a><a accesskey="n" href="bivariate_statistics.html"><img src="../../../../../doc/src/images/next.png" alt="Next"></a>
</div>
</body>
</html>