@@ -92,6 +92,16 @@ get_math_module_state(PyObject *module)
9292 return (math_module_state * )state ;
9393}
9494
95+ /*
96+ sin(pi*x), giving accurate results for all finite x (especially x
97+ integral or close to an integer). This is here for use in the
98+ reflection formula for the gamma function. It conforms to IEEE
99+ 754-2008 for finite arguments, but not for infinities or nans.
100+ */
101+
102+ static const double pi = 3.141592653589793238462643383279502884197 ;
103+ static const double logpi = 1.144729885849400174143427351353058711647 ;
104+
95105/* Version of PyFloat_AsDouble() with in-line fast paths
96106 for exact floats and integers. Gives a substantial
97107 speed improvement for extracting float arguments.
@@ -114,6 +124,162 @@ get_math_module_state(PyObject *module)
114124 } \
115125 }
116126
127+ static double
128+ m_sinpi (double x )
129+ {
130+ double y , r ;
131+ int n ;
132+ /* this function should only ever be called for finite arguments */
133+ assert (Py_IS_FINITE (x ));
134+ y = fmod (fabs (x ), 2.0 );
135+ n = (int )round (2.0 * y );
136+ assert (0 <= n && n <= 4 );
137+ switch (n ) {
138+ case 0 :
139+ r = sin (pi * y );
140+ break ;
141+ case 1 :
142+ r = cos (pi * (y - 0.5 ));
143+ break ;
144+ case 2 :
145+ /* N.B. -sin(pi*(y-1.0)) is *not* equivalent: it would give
146+ -0.0 instead of 0.0 when y == 1.0. */
147+ r = sin (pi * (1.0 - y ));
148+ break ;
149+ case 3 :
150+ r = - cos (pi * (y - 1.5 ));
151+ break ;
152+ case 4 :
153+ r = sin (pi * (y - 2.0 ));
154+ break ;
155+ default :
156+ Py_UNREACHABLE ();
157+ }
158+ return copysign (1.0 , x )* r ;
159+ }
160+
161+ /* Implementation of the real gamma function. In extensive but non-exhaustive
162+ random tests, this function proved accurate to within <= 10 ulps across the
163+ entire float domain. Note that accuracy may depend on the quality of the
164+ system math functions, the pow function in particular. Special cases
165+ follow C99 annex F. The parameters and method are tailored to platforms
166+ whose double format is the IEEE 754 binary64 format.
167+
168+ Method: for x > 0.0 we use the Lanczos approximation with parameters N=13
169+ and g=6.024680040776729583740234375; these parameters are amongst those
170+ used by the Boost library. Following Boost (again), we re-express the
171+ Lanczos sum as a rational function, and compute it that way. The
172+ coefficients below were computed independently using MPFR, and have been
173+ double-checked against the coefficients in the Boost source code.
174+
175+ For x < 0.0 we use the reflection formula.
176+
177+ There's one minor tweak that deserves explanation: Lanczos' formula for
178+ Gamma(x) involves computing pow(x+g-0.5, x-0.5) / exp(x+g-0.5). For many x
179+ values, x+g-0.5 can be represented exactly. However, in cases where it
180+ can't be represented exactly the small error in x+g-0.5 can be magnified
181+ significantly by the pow and exp calls, especially for large x. A cheap
182+ correction is to multiply by (1 + e*g/(x+g-0.5)), where e is the error
183+ involved in the computation of x+g-0.5 (that is, e = computed value of
184+ x+g-0.5 - exact value of x+g-0.5). Here's the proof:
185+
186+ Correction factor
187+ -----------------
188+ Write x+g-0.5 = y-e, where y is exactly representable as an IEEE 754
189+ double, and e is tiny. Then:
190+
191+ pow(x+g-0.5,x-0.5)/exp(x+g-0.5) = pow(y-e, x-0.5)/exp(y-e)
192+ = pow(y, x-0.5)/exp(y) * C,
193+
194+ where the correction_factor C is given by
195+
196+ C = pow(1-e/y, x-0.5) * exp(e)
197+
198+ Since e is tiny, pow(1-e/y, x-0.5) ~ 1-(x-0.5)*e/y, and exp(x) ~ 1+e, so:
199+
200+ C ~ (1-(x-0.5)*e/y) * (1+e) ~ 1 + e*(y-(x-0.5))/y
201+
202+ But y-(x-0.5) = g+e, and g+e ~ g. So we get C ~ 1 + e*g/y, and
203+
204+ pow(x+g-0.5,x-0.5)/exp(x+g-0.5) ~ pow(y, x-0.5)/exp(y) * (1 + e*g/y),
205+
206+ Note that for accuracy, when computing r*C it's better to do
207+
208+ r + e*g/y*r;
209+
210+ than
211+
212+ r * (1 + e*g/y);
213+
214+ since the addition in the latter throws away most of the bits of
215+ information in e*g/y.
216+ */
217+
218+ #define LANCZOS_N 13
219+ static const double lanczos_g = 6.024680040776729583740234375 ;
220+ static const double lanczos_g_minus_half = 5.524680040776729583740234375 ;
221+ static const double lanczos_num_coeffs [LANCZOS_N ] = {
222+ 23531376880.410759688572007674451636754734846804940 ,
223+ 42919803642.649098768957899047001988850926355848959 ,
224+ 35711959237.355668049440185451547166705960488635843 ,
225+ 17921034426.037209699919755754458931112671403265390 ,
226+ 6039542586.3520280050642916443072979210699388420708 ,
227+ 1439720407.3117216736632230727949123939715485786772 ,
228+ 248874557.86205415651146038641322942321632125127801 ,
229+ 31426415.585400194380614231628318205362874684987640 ,
230+ 2876370.6289353724412254090516208496135991145378768 ,
231+ 186056.26539522349504029498971604569928220784236328 ,
232+ 8071.6720023658162106380029022722506138218516325024 ,
233+ 210.82427775157934587250973392071336271166969580291 ,
234+ 2.5066282746310002701649081771338373386264310793408
235+ };
236+
237+ /* denominator is x*(x+1)*...*(x+LANCZOS_N-2) */
238+ static const double lanczos_den_coeffs [LANCZOS_N ] = {
239+ 0.0 , 39916800.0 , 120543840.0 , 150917976.0 , 105258076.0 , 45995730.0 ,
240+ 13339535.0 , 2637558.0 , 357423.0 , 32670.0 , 1925.0 , 66.0 , 1.0 };
241+
242+ /* gamma values for small positive integers, 1 though NGAMMA_INTEGRAL */
243+ #define NGAMMA_INTEGRAL 23
244+ static const double gamma_integral [NGAMMA_INTEGRAL ] = {
245+ 1.0 , 1.0 , 2.0 , 6.0 , 24.0 , 120.0 , 720.0 , 5040.0 , 40320.0 , 362880.0 ,
246+ 3628800.0 , 39916800.0 , 479001600.0 , 6227020800.0 , 87178291200.0 ,
247+ 1307674368000.0 , 20922789888000.0 , 355687428096000.0 ,
248+ 6402373705728000.0 , 121645100408832000.0 , 2432902008176640000.0 ,
249+ 51090942171709440000.0 , 1124000727777607680000.0 ,
250+ };
251+
252+ /* Lanczos' sum L_g(x), for positive x */
253+
254+ static double
255+ lanczos_sum (double x )
256+ {
257+ double num = 0.0 , den = 0.0 ;
258+ int i ;
259+ assert (x > 0.0 );
260+ /* evaluate the rational function lanczos_sum(x). For large
261+ x, the obvious algorithm risks overflow, so we instead
262+ rescale the denominator and numerator of the rational
263+ function by x**(1-LANCZOS_N) and treat this as a
264+ rational function in 1/x. This also reduces the error for
265+ larger x values. The choice of cutoff point (5.0 below) is
266+ somewhat arbitrary; in tests, smaller cutoff values than
267+ this resulted in lower accuracy. */
268+ if (x < 5.0 ) {
269+ for (i = LANCZOS_N ; -- i >= 0 ; ) {
270+ num = num * x + lanczos_num_coeffs [i ];
271+ den = den * x + lanczos_den_coeffs [i ];
272+ }
273+ }
274+ else {
275+ for (i = 0 ; i < LANCZOS_N ; i ++ ) {
276+ num = num / x + lanczos_num_coeffs [i ];
277+ den = den / x + lanczos_den_coeffs [i ];
278+ }
279+ }
280+ return num /den ;
281+ }
282+
117283/* Constant for +infinity, generated in the same way as float('inf'). */
118284
119285static double
@@ -143,46 +309,113 @@ m_nan(void)
143309
144310#endif
145311
146- /*
147- gamma: the real gamma function.
148- */
149-
150312static double
151- m_gamma (double x )
313+ m_tgamma (double x )
152314{
315+ double absx , r , y , z , sqrtpow ;
316+
153317 /* special cases */
154318 if (!Py_IS_FINITE (x )) {
155319 if (Py_IS_NAN (x ) || x > 0.0 )
156- return x ; /* gamma (nan) = nan, gamma (inf) = inf */
320+ return x ; /* tgamma (nan) = nan, tgamma (inf) = inf */
157321 else {
158322 errno = EDOM ;
159- return Py_NAN ; /* gamma (-inf) = nan, invalid */
323+ return Py_NAN ; /* tgamma (-inf) = nan, invalid */
160324 }
161325 }
162326 if (x == 0.0 ) {
163327 errno = EDOM ;
164- /* gamma (+-0.0) = +-inf, divide-by-zero */
328+ /* tgamma (+-0.0) = +-inf, divide-by-zero */
165329 return copysign (Py_HUGE_VAL , x );
166330 }
167331
168332 /* integer arguments */
169333 if (x == floor (x )) {
170334 if (x < 0.0 ) {
171- errno = EDOM ; /* gamma (n) = nan, invalid for */
335+ errno = EDOM ; /* tgamma (n) = nan, invalid for */
172336 return Py_NAN ; /* negative integers n */
173337 }
338+ if (x <= NGAMMA_INTEGRAL )
339+ return gamma_integral [(int )x - 1 ];
340+ }
341+ absx = fabs (x );
342+
343+ /* tiny arguments: tgamma(x) ~ 1/x for x near 0 */
344+ if (absx < 1e-20 ) {
345+ r = 1.0 /x ;
346+ if (Py_IS_INFINITY (r ))
347+ errno = ERANGE ;
348+ return r ;
349+ }
350+
351+ /* large arguments: assuming IEEE 754 doubles, tgamma(x) overflows for
352+ x > 200, and underflows to +-0.0 for x < -200, not a negative
353+ integer. */
354+ if (absx > 200.0 ) {
355+ if (x < 0.0 ) {
356+ return 0.0 /m_sinpi (x );
357+ }
358+ else {
359+ errno = ERANGE ;
360+ return Py_HUGE_VAL ;
361+ }
174362 }
175363
176- return tgamma (x );
364+ y = absx + lanczos_g_minus_half ;
365+ /* compute error in sum */
366+ if (absx > lanczos_g_minus_half ) {
367+ /* note: the correction can be foiled by an optimizing
368+ compiler that (incorrectly) thinks that an expression like
369+ a + b - a - b can be optimized to 0.0. This shouldn't
370+ happen in a standards-conforming compiler. */
371+ double q = y - absx ;
372+ z = q - lanczos_g_minus_half ;
373+ }
374+ else {
375+ double q = y - lanczos_g_minus_half ;
376+ z = q - absx ;
377+ }
378+ z = z * lanczos_g / y ;
379+ if (x < 0.0 ) {
380+ r = - pi / m_sinpi (absx ) / absx * exp (y ) / lanczos_sum (absx );
381+ r -= z * r ;
382+ if (absx < 140.0 ) {
383+ r /= pow (y , absx - 0.5 );
384+ }
385+ else {
386+ sqrtpow = pow (y , absx / 2.0 - 0.25 );
387+ r /= sqrtpow ;
388+ r /= sqrtpow ;
389+ }
390+ }
391+ else {
392+ r = lanczos_sum (absx ) / exp (y );
393+ r += z * r ;
394+ if (absx < 140.0 ) {
395+ r *= pow (y , absx - 0.5 );
396+ }
397+ else {
398+ sqrtpow = pow (y , absx / 2.0 - 0.25 );
399+ r *= sqrtpow ;
400+ r *= sqrtpow ;
401+ }
402+ }
403+ if (Py_IS_INFINITY (r ))
404+ errno = ERANGE ;
405+ return r ;
177406}
178407
179408/*
180409 lgamma: natural log of the absolute value of the Gamma function.
410+ For large arguments, Lanczos' formula works extremely well here.
181411*/
182412
183413static double
184414m_lgamma (double x )
185415{
416+ double r ;
417+ double absx ;
418+
186419 /* special cases */
187420 if (!Py_IS_FINITE (x )) {
188421 if (Py_IS_NAN (x ))
@@ -197,9 +430,28 @@ m_lgamma(double x)
197430 errno = EDOM ; /* lgamma(n) = inf, divide-by-zero for */
198431 return Py_HUGE_VAL ; /* integers n <= 0 */
199432 }
433+ else {
434+ return 0.0 ; /* lgamma(1) = lgamma(2) = 0.0 */
435+ }
200436 }
201437
202- return lgamma (x );
438+ absx = fabs (x );
439+ /* tiny arguments: lgamma(x) ~ -log(fabs(x)) for small x */
440+ if (absx < 1e-20 )
441+ return - log (absx );
442+
443+ /* Lanczos' formula. We could save a fraction of a ulp in accuracy by
444+ having a second set of numerator coefficients for lanczos_sum that
445+ absorbed the exp(-lanczos_g) term, and throwing out the lanczos_g
446+ subtraction below; it's probably not worth it. */
447+ r = log (lanczos_sum (absx )) - lanczos_g ;
448+ r += (absx - 0.5 ) * (log (absx + lanczos_g - 0.5 ) - 1 );
449+ if (x < 0.0 )
450+ /* Use reflection formula to get value for negative x. */
451+ r = logpi - log (fabs (m_sinpi (absx ))) - log (absx ) - r ;
452+ if (Py_IS_INFINITY (r ))
453+ errno = ERANGE ;
454+ return r ;
203455}
204456
205457/*
@@ -907,7 +1159,7 @@ math_floor(PyObject *module, PyObject *number)
9071159 return PyLong_FromDouble (floor (x ));
9081160}
9091161
910- FUNC1A (gamma , m_gamma ,
1162+ FUNC1A (gamma , m_tgamma ,
9111163 "gamma($module, x, /)\n--\n\n"
9121164 "Gamma function at x." )
9131165FUNC1A (lgamma , m_lgamma ,
0 commit comments