統計:利用自我撰寫函數來計算F分布與T分布的𝑝-value

最近在研究ANOVA檢定過程中的產物,這些C#程式碼是從Javascript範例拿過來修改的,將其建立並與.NET Framework 4後提供的System.Web.UI.DataVisualization.Charting類別進行相比較,驗證其運作能力(計算)是正常與正確的,我們也可以從中得知如何自建立函式庫與官方函式庫的使用方式。

F分布與T分布機率之相關函式

C# Console程式碼請直接看下列所示:

static void Main(string[] args)
{
  var oStatistics = new System.Web.UI.DataVisualization.Charting.Chart();
      
  //F-Test
  var f = 2.137706904d;
  var fDf1 = 1;
  var fDf2 = 17;
  WriteLine($"(自建)F-Test-P值:{PValueForF(f, fDf1, fDf2)}");
  WriteLine($"(微軟)F-Test-P值:{oStatistics.DataManipulator.Statistics.FDistribution(f, fDf1, fDf2)}");

  //T-Test
  var t = 4.007924449d;
  var tDf = 17;
  WriteLine($"(自建)T-Test-P值:{PValueForT(t, tDf)}");
  WriteLine($"(微軟)T-Test-P值:{oStatistics.DataManipulator.Statistics.TDistribution(t, tDf, false)}");

  ReadKey();
}

public static double PValueForT(double t, double df)
{
  double n = df;
  double a, b, y;
  t = t * t;
  y = t / n;
  b = y + 1.0;
  if (y > 1.0E-6) y = System.Math.Log(b);
  a = n - 0.5;
  b = 48.0 * a * a;
  y = a * y;
  y = (((((-0.4 * y - 3.3) * y - 24.0) * y - 85.5) /
    (0.8 * y * y + 100.0 + b) + y + 3.0) / b + 1.0) *
    System.Math.Sqrt(y);
  return 2.0 * Gauss(-y);
}

public static double Gauss(double z)
{
  double y;
  double p;
  double w;
  if (z == 0.0)
    p = 0.0;
  else
  {
    y = System.Math.Abs(z) / 2;
    if (y >= 3.0)
    {
      p = 1.0;
    }
    else if (y < 1.0)
    {
      w = y * y;
      p = ((((((((0.000124818987 * w
        - 0.001075204047) * w + 0.005198775019) * w
        - 0.019198292004) * w + 0.059054035642) * w
        - 0.151968751364) * w + 0.319152932694) * w
        - 0.531923007300) * w + 0.797884560593) * y * 2.0;
    }
    else
    {
      y = y - 2.0;
      p = (((((((((((((-0.000045255659 * y
        + 0.000152529290) * y - 0.000019538132) * y
        - 0.000676904986) * y + 0.001390604284) * y
        - 0.000794620820) * y - 0.002034254874) * y
        + 0.006549791214) * y - 0.010557625006) * y
        + 0.011630447319) * y - 0.009279453341) * y
        + 0.005353579108) * y - 0.002141268741) * y
        + 0.000535310849) * y + 0.999936657524;
    }
  }
  if (z > 0.0)
    return (p + 1.0) / 2;
  else
    return (1.0 - p) / 2;
}

public static double PValueForF(double F, int df1, int df2)
{
  if (F == 0)
  { return 1; }
  var x = df2 / (df1 * F + df2);
  return BetaI(df2, df1, x);
}

//Returns the incomplete beta function I_x(n/2, m/2) for positive integers n and m and 0 <= x <=1
public static double BetaI(int n, int m, double x)
{
  double bt;
  var a = 0.5 * n;
  var b = 0.5 * m;
  if (x == 0 || x == 1)
  { bt = 0.0; }
  else
  { bt = System.Math.Exp(GammaLn(m + n) - GammaLn(n) - GammaLn(m) + a * System.Math.Log(x) + b * System.Math.Log(1-x)); }
  double beti;
  //use continued fraction directly
  if (x < (a + 1.0) / (a + b + 2))
  { beti = bt * BetaF(a, b, x) / a; }
  else
  { // use continued fraction after making the symmetry transformation
    beti = 1.0 - bt * BetaF(b, a, 1-x) / b;
  }
  return beti;
}

//Evaluates incomplete beta function by modified Lentz's method 
public static double BetaF(double a, double b, double x)
{
  var qab = a + b;
  var qap = a + 1.0;
  var qam = a - 1.0;
  var c = 1.0;
  var d = 1 - qab * x / qap;
  var fpmin = 1e-300;
  var eps = 1e-8;
  if (System.Math.Abs(d) < fpmin) { d = fpmin; }
  d = 1.0 / d;
  var h = d;
  for (var m = 1; m < Int32.MaxValue; m++)
  {
    var m2 = 2 * m;
    var aa = m * (b - m) * x / ((qam + m2) * (a + m2));
    d = 1 + aa * d;
    if (System.Math.Abs(d) < fpmin) { d = fpmin; }
    c = 1 + aa / c;
    if (System.Math.Abs(c) < fpmin) { c = fpmin; }
    d = 1.0 / d;
    h = h * d * c;
    aa = -(a + m) * (qab + m) * x / ((a + m2) * (qap + m2));
    d = 1 + aa * d;
    if (System.Math.Abs(d) < fpmin) { d = fpmin; }
    c = 1 + aa / c;
    if (System.Math.Abs(c) < fpmin) { c = fpmin; }
    d = 1.0 / d;
    var del = d * c;
    h = h * del;
    if (System.Math.Abs(del - 1.0) < eps) { break; }
  }
  return h;
}

//Returns ln(Gamma(n/2)) for n=1,2,...
public static double GammaLn(int n)
{
  //Tabulated values of ln(Gamma(n/2)) for 200 arrays
  double[] lg = {0.5723649429247001, 0, -0.1207822376352452, 0, 0.2846828704729192, 0.6931471805599453, 1.200973602347074, 1.791759469228055, 2.453736570842442, 3.178053830347946, 3.957813967618717, 4.787491742782046, 5.662562059857142, 6.579251212010101, 7.534364236758733, 8.525161361065415, 9.549267257300997, 10.60460290274525, 11.68933342079727, 12.80182748008147, 13.94062521940376, 15.10441257307552, 16.29200047656724, 17.50230784587389, 18.73434751193645, 19.98721449566188, 21.2600761562447, 22.55216385312342, 23.86276584168909, 25.19122118273868, 26.53691449111561, 27.89927138384089, 29.27775451504082, 30.67186010608068, 32.08111489594736, 33.50507345013689, 34.94331577687682, 36.39544520803305, 37.86108650896109, 39.3398841871995, 40.8315009745308, 42.33561646075349, 43.85192586067515, 45.3801388984769, 46.91997879580877, 48.47118135183522, 50.03349410501914, 51.60667556776437, 53.19049452616927, 54.78472939811231, 56.38916764371993, 58.00360522298051, 59.62784609588432, 61.26170176100199, 62.9049908288765, 64.55753862700632, 66.21917683354901, 67.88974313718154, 69.56908092082364, 71.257038967168, 72.9534711841694, 74.65823634883016, 76.37119786778275, 78.09222355331531, 79.82118541361436, 81.55795945611503, 83.30242550295004, 85.05446701758153, 86.81397094178108, 88.58082754219767, 90.35493026581838, 92.13617560368709, 93.92446296229978, 95.71969454214322, 97.52177522288821, 99.33061245478741, 101.1461161558646, 102.9681986145138, 104.7967743971583, 106.6317602606435, 108.4730750690654, 110.3206397147574, 112.1743770431779, 114.0342117814617, 115.9000704704145, 117.7718813997451, 119.6495745463449, 121.5330815154387, 123.4223354844396, 125.3172711493569, 127.2178246736118, 129.1239336391272, 131.0355369995686, 132.9525750356163, 134.8749893121619, 136.8027226373264, 138.7357190232026, 140.6739236482343, 142.617282821146, 144.5657439463449, 146.5192554907206, 148.477766951773, 150.4412288270019, 152.4095925844974, 154.3828106346716, 156.3608363030788, 158.3436238042692, 160.3311282166309, 162.3233054581712, 164.3201122631952, 166.3215061598404, 168.3274454484277, 170.3378891805928, 172.3527971391628, 174.3721298187452, 176.3958484069973, 178.4239147665485, 180.4562914175438, 182.4929415207863, 184.5338288614495, 186.5789178333379, 188.6281734236716, 190.6815611983747, 192.7390472878449, 194.8005983731871, 196.86618167289, 198.9357649299295, 201.0093163992815, 203.0868048358281, 205.1681994826412, 207.2534700596299, 209.3425867525368, 211.435520202271, 213.5322414945632, 215.6327221499328, 217.7369341139542, 219.8448497478113, 221.9564418191303, 224.0716834930795, 226.1905483237276, 228.3130102456502, 230.4390435657769, 232.5686229554685, 234.7017234428182, 236.8383204051684, 238.9783895618343, 241.1219069670290, 243.2688490029827, 245.4191923732478, 247.5729140961868, 249.7299914986334, 251.8904022097232, 254.0541241548883, 256.2211355500095, 258.3914148957209, 260.5649409718632, 262.7416928320802, 264.9216497985528, 267.1047914568685, 269.2910976510198, 271.4805484785288, 273.6731242856937, 275.8688056629533, 278.0675734403662, 280.2694086832001, 282.4742926876305, 284.6822069765408, 286.893133295427, 289.1070536083976, 291.3239500942703, 293.5438051427607, 295.7666013507606, 297.9923215187034, 300.2209486470141, 302.4524659326413, 304.6868567656687, 306.9241047260048, 309.1641935801469, 311.4071072780187, 313.652829949879, 315.9013459032995, 318.1526396202093, 320.4066957540055, 322.6634991267262, 324.9230347262869, 327.1852877037753, 329.4502433708053, 331.7178871969285, 333.9882048070999, 336.2611819791985, 338.5368046415996, 340.815058870799, 343.0959308890863, 345.3794070622669, 347.6654738974312, 349.9541180407703, 352.2453262754350, 354.5390855194408, 356.835382823613, 359.1342053695754};
  if (n < lg.Length)
  { return lg[n - 1]; }
  //For n > 200 use the approx, formula given by numerical recipe
  double[] coef = { 76.18009172947146, -86.50532032941677, 24.01409824083091, -1.231739572450155, 1.208650973866179e-3, -5.395239384953e-6 };
  var stp = 2.5066282746310005;
  var x = 0.5 * n;
  var y = x;
  var tmp = x + 5.5;
  tmp = (x + 0.5) * System.Math.Log(tmp) - tmp;
  var ser = 1.000000000190015;
  for (var i = 0; i < 6; i++)
  {
    y = y + 1;
    ser = ser + coef[i] / y;
  }
  var gamln = tmp + System.Math.Log(stp * ser / x);
  return gamln;
}

輸出結果

//F檢定
(自建)F-Test-P值:0.161956982038765
(微軟)F-Test-P值:0.161956982038728
//T檢定
(自建)T-Test-P值:0.000911423797537392
(微軟)T-Test-P值:0.000911422873084433

相關資源提供參考

Statistics TDistribution FDistribution Microsoft .NETFramework CSharp SelfMethod 𝐹-Statistics 𝑇-Statistics 𝑝-Value