ee56e2c03d
Update history: we've added new features so go up a version number. Regenerate docs.
425 lines
16 KiB
C++
425 lines
16 KiB
C++
// Copyright John Maddock 2006
|
|
// Copyright Paul A. Bristow 2007, 2010
|
|
|
|
// Use, modification and distribution are subject to the
|
|
// Boost Software License, Version 1.0.
|
|
// (See accompanying file LICENSE_1_0.txt
|
|
// or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
|
|
#ifdef _MSC_VER
|
|
# pragma warning(disable: 4512) // assignment operator could not be generated.
|
|
# pragma warning(disable: 4510) // default constructor could not be generated.
|
|
# pragma warning(disable: 4610) // can never be instantiated - user defined constructor required.
|
|
#endif
|
|
|
|
#include <boost/math/distributions/students_t.hpp>
|
|
|
|
// avoid "using namespace std;" and "using namespace boost::math;"
|
|
// to avoid potential ambiguity with names in std random.
|
|
#include <iostream>
|
|
using std::cout; using std::endl;
|
|
using std::left; using std::fixed; using std::right; using std::scientific;
|
|
#include <iomanip>
|
|
using std::setw;
|
|
using std::setprecision;
|
|
|
|
void confidence_limits_on_mean(double Sm, double Sd, unsigned Sn)
|
|
{
|
|
//
|
|
// Sm = Sample Mean.
|
|
// Sd = Sample Standard Deviation.
|
|
// Sn = Sample Size.
|
|
//
|
|
// Calculate confidence intervals for the mean.
|
|
// For example if we set the confidence limit to
|
|
// 0.95, we know that if we repeat the sampling
|
|
// 100 times, then we expect that the true mean
|
|
// will be between out limits on 95 occations.
|
|
// Note: this is not the same as saying a 95%
|
|
// confidence interval means that there is a 95%
|
|
// probability that the interval contains the true mean.
|
|
// The interval computed from a given sample either
|
|
// contains the true mean or it does not.
|
|
// See http://www.itl.nist.gov/div898/handbook/eda/section3/eda352.htm
|
|
|
|
using boost::math::students_t;
|
|
|
|
// Print out general info:
|
|
cout <<
|
|
"__________________________________\n"
|
|
"2-Sided Confidence Limits For Mean\n"
|
|
"__________________________________\n\n";
|
|
cout << setprecision(7);
|
|
cout << setw(40) << left << "Number of Observations" << "= " << Sn << "\n";
|
|
cout << setw(40) << left << "Mean" << "= " << Sm << "\n";
|
|
cout << setw(40) << left << "Standard Deviation" << "= " << Sd << "\n";
|
|
//
|
|
// Define a table of significance/risk levels:
|
|
//
|
|
double alpha[] = { 0.5, 0.25, 0.1, 0.05, 0.01, 0.001, 0.0001, 0.00001 };
|
|
//
|
|
// Start by declaring the distribution we'll need:
|
|
//
|
|
students_t dist(Sn - 1);
|
|
//
|
|
// Print table header:
|
|
//
|
|
cout << "\n\n"
|
|
"_______________________________________________________________\n"
|
|
"Confidence T Interval Lower Upper\n"
|
|
" Value (%) Value Width Limit Limit\n"
|
|
"_______________________________________________________________\n";
|
|
//
|
|
// Now print out the data for the table rows.
|
|
//
|
|
for(unsigned i = 0; i < sizeof(alpha)/sizeof(alpha[0]); ++i)
|
|
{
|
|
// Confidence value:
|
|
cout << fixed << setprecision(3) << setw(10) << right << 100 * (1-alpha[i]);
|
|
// calculate T:
|
|
double T = quantile(complement(dist, alpha[i] / 2));
|
|
// Print T:
|
|
cout << fixed << setprecision(3) << setw(10) << right << T;
|
|
// Calculate width of interval (one sided):
|
|
double w = T * Sd / sqrt(double(Sn));
|
|
// Print width:
|
|
if(w < 0.01)
|
|
cout << scientific << setprecision(3) << setw(17) << right << w;
|
|
else
|
|
cout << fixed << setprecision(3) << setw(17) << right << w;
|
|
// Print Limits:
|
|
cout << fixed << setprecision(5) << setw(15) << right << Sm - w;
|
|
cout << fixed << setprecision(5) << setw(15) << right << Sm + w << endl;
|
|
}
|
|
cout << endl;
|
|
} // void confidence_limits_on_mean
|
|
|
|
void single_sample_t_test(double M, double Sm, double Sd, unsigned Sn, double alpha)
|
|
{
|
|
//
|
|
// M = true mean.
|
|
// Sm = Sample Mean.
|
|
// Sd = Sample Standard Deviation.
|
|
// Sn = Sample Size.
|
|
// alpha = Significance Level.
|
|
//
|
|
// A Students t test applied to a single set of data.
|
|
// We are testing the null hypothesis that the true
|
|
// mean of the sample is M, and that any variation is down
|
|
// to chance. We can also test the alternative hypothesis
|
|
// that any difference is not down to chance.
|
|
// See http://www.itl.nist.gov/div898/handbook/eda/section3/eda352.htm
|
|
|
|
using boost::math::students_t;
|
|
|
|
// Print header:
|
|
cout <<
|
|
"__________________________________\n"
|
|
"Student t test for a single sample\n"
|
|
"__________________________________\n\n";
|
|
cout << setprecision(5);
|
|
cout << setw(55) << left << "Number of Observations" << "= " << Sn << "\n";
|
|
cout << setw(55) << left << "Sample Mean" << "= " << Sm << "\n";
|
|
cout << setw(55) << left << "Sample Standard Deviation" << "= " << Sd << "\n";
|
|
cout << setw(55) << left << "Expected True Mean" << "= " << M << "\n\n";
|
|
//
|
|
// Now we can calculate and output some stats:
|
|
//
|
|
// Difference in means:
|
|
double diff = Sm - M;
|
|
cout << setw(55) << left << "Sample Mean - Expected Test Mean" << "= " << diff << "\n";
|
|
// Degrees of freedom:
|
|
unsigned v = Sn - 1;
|
|
cout << setw(55) << left << "Degrees of Freedom" << "= " << v << "\n";
|
|
// t-statistic:
|
|
double t_stat = diff * sqrt(double(Sn)) / Sd;
|
|
cout << setw(55) << left << "T Statistic" << "= " << t_stat << "\n";
|
|
//
|
|
// Finally define our distribution, and get the probability:
|
|
//
|
|
students_t dist(v);
|
|
double q = cdf(complement(dist, fabs(t_stat)));
|
|
cout << setw(55) << left << "Probability that difference is due to chance" << "= "
|
|
<< setprecision(3) << scientific << 2 * q << "\n\n";
|
|
//
|
|
// Finally print out results of alternative hypothesis:
|
|
//
|
|
cout << setw(55) << left <<
|
|
"Results for Alternative Hypothesis and alpha" << "= "
|
|
<< setprecision(4) << fixed << alpha << "\n\n";
|
|
cout << "Alternative Hypothesis Conclusion\n";
|
|
cout << "Mean != " << setprecision(3) << fixed << M << " ";
|
|
if(q < alpha / 2)
|
|
cout << "NOT REJECTED\n";
|
|
else
|
|
cout << "REJECTED\n";
|
|
cout << "Mean < " << setprecision(3) << fixed << M << " ";
|
|
if(cdf(complement(dist, t_stat)) > alpha)
|
|
cout << "NOT REJECTED\n";
|
|
else
|
|
cout << "REJECTED\n";
|
|
cout << "Mean > " << setprecision(3) << fixed << M << " ";
|
|
if(cdf(dist, t_stat) > alpha)
|
|
cout << "NOT REJECTED\n";
|
|
else
|
|
cout << "REJECTED\n";
|
|
cout << endl << endl;
|
|
} // void single_sample_t_test(
|
|
|
|
void single_sample_find_df(double M, double Sm, double Sd)
|
|
{
|
|
//
|
|
// M = true mean.
|
|
// Sm = Sample Mean.
|
|
// Sd = Sample Standard Deviation.
|
|
//
|
|
|
|
using boost::math::students_t;
|
|
|
|
// Print out general info:
|
|
cout <<
|
|
"_____________________________________________________________\n"
|
|
"Estimated sample sizes required for various confidence levels\n"
|
|
"_____________________________________________________________\n\n";
|
|
cout << setprecision(5);
|
|
cout << setw(40) << left << "True Mean" << "= " << M << "\n";
|
|
cout << setw(40) << left << "Sample Mean" << "= " << Sm << "\n";
|
|
cout << setw(40) << left << "Sample Standard Deviation" << "= " << Sd << "\n";
|
|
//
|
|
// Define a table of significance intervals:
|
|
//
|
|
double alpha[] = { 0.5, 0.25, 0.1, 0.05, 0.01, 0.001, 0.0001, 0.00001 };
|
|
//
|
|
// Print table header:
|
|
//
|
|
cout << "\n\n"
|
|
"_______________________________________________________________\n"
|
|
"Confidence Estimated Estimated\n"
|
|
" Value (%) Sample Size Sample Size\n"
|
|
" (one sided test) (two sided test)\n"
|
|
"_______________________________________________________________\n";
|
|
//
|
|
// Now print out the data for the table rows.
|
|
//
|
|
for(unsigned i = 1; i < sizeof(alpha)/sizeof(alpha[0]); ++i)
|
|
{
|
|
// Confidence value:
|
|
cout << fixed << setprecision(3) << setw(10) << right << 100 * (1-alpha[i]);
|
|
// calculate df for single sided test:
|
|
double df = students_t::find_degrees_of_freedom(
|
|
fabs(M - Sm), alpha[i], alpha[i], Sd);
|
|
// convert to sample size, always one more than the degrees of freedom:
|
|
double size = ceil(df) + 1;
|
|
// Print size:
|
|
cout << fixed << setprecision(0) << setw(16) << right << size;
|
|
// calculate df for two sided test:
|
|
df = students_t::find_degrees_of_freedom(
|
|
fabs(M - Sm), alpha[i]/2, alpha[i], Sd);
|
|
// convert to sample size:
|
|
size = ceil(df) + 1;
|
|
// Print size:
|
|
cout << fixed << setprecision(0) << setw(16) << right << size << endl;
|
|
}
|
|
cout << endl;
|
|
} // void single_sample_find_df
|
|
|
|
int main()
|
|
{
|
|
//
|
|
// Run tests for Heat Flow Meter data
|
|
// see http://www.itl.nist.gov/div898/handbook/eda/section4/eda428.htm
|
|
// The data was collected while calibrating a heat flow meter
|
|
// against a known value.
|
|
//
|
|
confidence_limits_on_mean(9.261460, 0.2278881e-01, 195);
|
|
single_sample_t_test(5, 9.261460, 0.2278881e-01, 195, 0.05);
|
|
single_sample_find_df(5, 9.261460, 0.2278881e-01);
|
|
|
|
//
|
|
// Data for this example from:
|
|
// P.K.Hou, O. W. Lau & M.C. Wong, Analyst (1983) vol. 108, p 64.
|
|
// from Statistics for Analytical Chemistry, 3rd ed. (1994), pp 54-55
|
|
// J. C. Miller and J. N. Miller, Ellis Horwood ISBN 0 13 0309907
|
|
//
|
|
// Determination of mercury by cold-vapour atomic absorption,
|
|
// the following values were obtained fusing a trusted
|
|
// Standard Reference Material containing 38.9% mercury,
|
|
// which we assume is correct or 'true'.
|
|
//
|
|
confidence_limits_on_mean(37.8, 0.964365, 3);
|
|
// 95% test:
|
|
single_sample_t_test(38.9, 37.8, 0.964365, 3, 0.05);
|
|
// 90% test:
|
|
single_sample_t_test(38.9, 37.8, 0.964365, 3, 0.1);
|
|
// parameter estimate:
|
|
single_sample_find_df(38.9, 37.8, 0.964365);
|
|
|
|
return 0;
|
|
} // int main()
|
|
|
|
/*
|
|
|
|
Output:
|
|
|
|
------ Rebuild All started: Project: students_t_single_sample, Configuration: Release Win32 ------
|
|
students_t_single_sample.cpp
|
|
Generating code
|
|
Finished generating code
|
|
students_t_single_sample.vcxproj -> J:\Cpp\MathToolkit\test\Math_test\Release\students_t_single_sample.exe
|
|
__________________________________
|
|
2-Sided Confidence Limits For Mean
|
|
__________________________________
|
|
|
|
Number of Observations = 195
|
|
Mean = 9.26146
|
|
Standard Deviation = 0.02278881
|
|
|
|
|
|
_______________________________________________________________
|
|
Confidence T Interval Lower Upper
|
|
Value (%) Value Width Limit Limit
|
|
_______________________________________________________________
|
|
50.000 0.676 1.103e-003 9.26036 9.26256
|
|
75.000 1.154 1.883e-003 9.25958 9.26334
|
|
90.000 1.653 2.697e-003 9.25876 9.26416
|
|
95.000 1.972 3.219e-003 9.25824 9.26468
|
|
99.000 2.601 4.245e-003 9.25721 9.26571
|
|
99.900 3.341 5.453e-003 9.25601 9.26691
|
|
99.990 3.973 6.484e-003 9.25498 9.26794
|
|
99.999 4.537 7.404e-003 9.25406 9.26886
|
|
|
|
__________________________________
|
|
Student t test for a single sample
|
|
__________________________________
|
|
|
|
Number of Observations = 195
|
|
Sample Mean = 9.26146
|
|
Sample Standard Deviation = 0.02279
|
|
Expected True Mean = 5.00000
|
|
|
|
Sample Mean - Expected Test Mean = 4.26146
|
|
Degrees of Freedom = 194
|
|
T Statistic = 2611.28380
|
|
Probability that difference is due to chance = 0.000e+000
|
|
|
|
Results for Alternative Hypothesis and alpha = 0.0500
|
|
|
|
Alternative Hypothesis Conclusion
|
|
Mean != 5.000 NOT REJECTED
|
|
Mean < 5.000 REJECTED
|
|
Mean > 5.000 NOT REJECTED
|
|
|
|
|
|
_____________________________________________________________
|
|
Estimated sample sizes required for various confidence levels
|
|
_____________________________________________________________
|
|
|
|
True Mean = 5.00000
|
|
Sample Mean = 9.26146
|
|
Sample Standard Deviation = 0.02279
|
|
|
|
|
|
_______________________________________________________________
|
|
Confidence Estimated Estimated
|
|
Value (%) Sample Size Sample Size
|
|
(one sided test) (two sided test)
|
|
_______________________________________________________________
|
|
75.000 2 2
|
|
90.000 2 2
|
|
95.000 2 2
|
|
99.000 2 2
|
|
99.900 3 3
|
|
99.990 3 3
|
|
99.999 4 4
|
|
|
|
__________________________________
|
|
2-Sided Confidence Limits For Mean
|
|
__________________________________
|
|
|
|
Number of Observations = 3
|
|
Mean = 37.8000000
|
|
Standard Deviation = 0.9643650
|
|
|
|
|
|
_______________________________________________________________
|
|
Confidence T Interval Lower Upper
|
|
Value (%) Value Width Limit Limit
|
|
_______________________________________________________________
|
|
50.000 0.816 0.455 37.34539 38.25461
|
|
75.000 1.604 0.893 36.90717 38.69283
|
|
90.000 2.920 1.626 36.17422 39.42578
|
|
95.000 4.303 2.396 35.40438 40.19562
|
|
99.000 9.925 5.526 32.27408 43.32592
|
|
99.900 31.599 17.594 20.20639 55.39361
|
|
99.990 99.992 55.673 -17.87346 93.47346
|
|
99.999 316.225 176.067 -138.26683 213.86683
|
|
|
|
__________________________________
|
|
Student t test for a single sample
|
|
__________________________________
|
|
|
|
Number of Observations = 3
|
|
Sample Mean = 37.80000
|
|
Sample Standard Deviation = 0.96437
|
|
Expected True Mean = 38.90000
|
|
|
|
Sample Mean - Expected Test Mean = -1.10000
|
|
Degrees of Freedom = 2
|
|
T Statistic = -1.97566
|
|
Probability that difference is due to chance = 1.869e-001
|
|
|
|
Results for Alternative Hypothesis and alpha = 0.0500
|
|
|
|
Alternative Hypothesis Conclusion
|
|
Mean != 38.900 REJECTED
|
|
Mean < 38.900 NOT REJECTED
|
|
Mean > 38.900 NOT REJECTED
|
|
|
|
|
|
__________________________________
|
|
Student t test for a single sample
|
|
__________________________________
|
|
|
|
Number of Observations = 3
|
|
Sample Mean = 37.80000
|
|
Sample Standard Deviation = 0.96437
|
|
Expected True Mean = 38.90000
|
|
|
|
Sample Mean - Expected Test Mean = -1.10000
|
|
Degrees of Freedom = 2
|
|
T Statistic = -1.97566
|
|
Probability that difference is due to chance = 1.869e-001
|
|
|
|
Results for Alternative Hypothesis and alpha = 0.1000
|
|
|
|
Alternative Hypothesis Conclusion
|
|
Mean != 38.900 REJECTED
|
|
Mean < 38.900 NOT REJECTED
|
|
Mean > 38.900 REJECTED
|
|
|
|
|
|
_____________________________________________________________
|
|
Estimated sample sizes required for various confidence levels
|
|
_____________________________________________________________
|
|
|
|
True Mean = 38.90000
|
|
Sample Mean = 37.80000
|
|
Sample Standard Deviation = 0.96437
|
|
|
|
|
|
_______________________________________________________________
|
|
Confidence Estimated Estimated
|
|
Value (%) Sample Size Sample Size
|
|
(one sided test) (two sided test)
|
|
_______________________________________________________________
|
|
75.000 3 4
|
|
90.000 7 9
|
|
95.000 11 13
|
|
99.000 20 22
|
|
99.900 35 37
|
|
99.990 50 53
|
|
99.999 66 68
|
|
|
|
*/
|
|
|