当前位置: 首页 > news >正文

【libm】 10 rem_pio2函数 (rem_pio2.rs)

一、源码

这段代码实现了将浮点数x对π/2取余数的功能,返回一个三元组:(整数商n, 余数y0, 余数y1)。这个函数是数学库中用于三角函数计算的重要底层函数。

// origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2.c
//
// ====================================================
// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
//
// Developed at SunPro, a Sun Microsystems, Inc. business.
// Permission to use, copy, modify, and distribute this
// software is freely granted, provided that this notice
// is preserved.
// ====================================================
//
// Optimized by Bruce D. Evans. */
use super::rem_pio2_large;// #if FLT_EVAL_METHOD==0 || FLT_EVAL_METHOD==1
// #define EPS DBL_EPSILON
const EPS: f64 = 2.2204460492503131e-16;
// #elif FLT_EVAL_METHOD==2
// #define EPS LDBL_EPSILON
// #endif// TODO: Support FLT_EVAL_METHOD?const TO_INT: f64 = 1.5 / EPS;
/// 53 bits of 2/pi
const INV_PIO2: f64 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */
/// first 33 bits of pi/2
const PIO2_1: f64 = 1.57079632673412561417e+00; /* 0x3FF921FB, 0x54400000 */
/// pi/2 - PIO2_1
const PIO2_1T: f64 = 6.07710050650619224932e-11; /* 0x3DD0B461, 0x1A626331 */
/// second 33 bits of pi/2
const PIO2_2: f64 = 6.07710050630396597660e-11; /* 0x3DD0B461, 0x1A600000 */
/// pi/2 - (PIO2_1+PIO2_2)
const PIO2_2T: f64 = 2.02226624879595063154e-21; /* 0x3BA3198A, 0x2E037073 */
/// third 33 bits of pi/2
const PIO2_3: f64 = 2.02226624871116645580e-21; /* 0x3BA3198A, 0x2E000000 */
/// pi/2 - (PIO2_1+PIO2_2+PIO2_3)
const PIO2_3T: f64 = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */// return the remainder of x rem pi/2 in y[0]+y[1]
// use rem_pio2_large() for large x
//
// caller must handle the case when reduction is not needed: |x| ~<= pi/4 */
#[cfg_attr(all(test, assert_no_panic), no_panic::no_panic)]
pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) {let x1p24 = f64::from_bits(0x4170000000000000);let sign = (f64::to_bits(x) >> 63) as i32;let ix = (f64::to_bits(x) >> 32) as u32 & 0x7fffffff;fn medium(x: f64, ix: u32) -> (i32, f64, f64) {/* rint(x/(pi/2)), Assume round-to-nearest. */let tmp = x * INV_PIO2 + TO_INT;// force rounding of tmp to it's storage format on x87 to avoid// excess precision issues.#[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]let tmp = force_eval!(tmp);let f_n = tmp - TO_INT;let n = f_n as i32;let mut r = x - f_n * PIO2_1;let mut w = f_n * PIO2_1T; /* 1st round, good to 85 bits */let mut y0 = r - w;let ui = f64::to_bits(y0);let ey = (ui >> 52) as i32 & 0x7ff;let ex = (ix >> 20) as i32;if ex - ey > 16 {/* 2nd round, good to 118 bits */let t = r;w = f_n * PIO2_2;r = t - w;w = f_n * PIO2_2T - ((t - r) - w);y0 = r - w;let ey = (f64::to_bits(y0) >> 52) as i32 & 0x7ff;if ex - ey > 49 {/* 3rd round, good to 151 bits, covers all cases */let t = r;w = f_n * PIO2_3;r = t - w;w = f_n * PIO2_3T - ((t - r) - w);y0 = r - w;}}let y1 = (r - y0) - w;(n, y0, y1)}if ix <= 0x400f6a7a {/* |x| ~<= 5pi/4 */if (ix & 0xfffff) == 0x921fb {/* |x| ~= pi/2 or 2pi/2 */return medium(x, ix); /* cancellation -- use medium case */}if ix <= 0x4002d97c {/* |x| ~<= 3pi/4 */if sign == 0 {let z = x - PIO2_1; /* one round good to 85 bits */let y0 = z - PIO2_1T;let y1 = (z - y0) - PIO2_1T;return (1, y0, y1);} else {let z = x + PIO2_1;let y0 = z + PIO2_1T;let y1 = (z - y0) + PIO2_1T;return (-1, y0, y1);}} else if sign == 0 {let z = x - 2.0 * PIO2_1;let y0 = z - 2.0 * PIO2_1T;let y1 = (z - y0) - 2.0 * PIO2_1T;return (2, y0, y1);} else {let z = x + 2.0 * PIO2_1;let y0 = z + 2.0 * PIO2_1T;let y1 = (z - y0) + 2.0 * PIO2_1T;return (-2, y0, y1);}}if ix <= 0x401c463b {/* |x| ~<= 9pi/4 */if ix <= 0x4015fdbc {/* |x| ~<= 7pi/4 */if ix == 0x4012d97c {/* |x| ~= 3pi/2 */return medium(x, ix);}if sign == 0 {let z = x - 3.0 * PIO2_1;let y0 = z - 3.0 * PIO2_1T;let y1 = (z - y0) - 3.0 * PIO2_1T;return (3, y0, y1);} else {let z = x + 3.0 * PIO2_1;let y0 = z + 3.0 * PIO2_1T;let y1 = (z - y0) + 3.0 * PIO2_1T;return (-3, y0, y1);}} else {if ix == 0x401921fb {/* |x| ~= 4pi/2 */return medium(x, ix);}if sign == 0 {let z = x - 4.0 * PIO2_1;let y0 = z - 4.0 * PIO2_1T;let y1 = (z - y0) - 4.0 * PIO2_1T;return (4, y0, y1);} else {let z = x + 4.0 * PIO2_1;let y0 = z + 4.0 * PIO2_1T;let y1 = (z - y0) + 4.0 * PIO2_1T;return (-4, y0, y1);}}}if ix < 0x413921fb {/* |x| ~< 2^20*(pi/2), medium size */return medium(x, ix);}/** all other (large) arguments*/if ix >= 0x7ff00000 {/* x is inf or NaN */let y0 = x - x;let y1 = y0;return (0, y0, y1);}/* set z = scalbn(|x|,-ilogb(x)+23) */let mut ui = f64::to_bits(x);ui &= (!1) >> 12;ui |= (0x3ff + 23) << 52;let mut z = f64::from_bits(ui);let mut tx = [0.0; 3];for i in 0..2 {i!(tx,i, =, z as i32 as f64);z = (z - i!(tx, i)) * x1p24;}i!(tx,2, =, z);/* skip zero terms, first term is non-zero */let mut i = 2;while i != 0 && i!(tx, i) == 0.0 {i -= 1;}let mut ty = [0.0; 3];let n = rem_pio2_large(&tx[..=i], &mut ty, ((ix as i32) >> 20) - (0x3ff + 23), 1);if sign != 0 {return (-n, -i!(ty, 0), -i!(ty, 1));}(n, i!(ty, 0), i!(ty, 1))
}#[cfg(test)]
mod tests {use super::rem_pio2;#[test]// FIXME(correctness): inaccurate results on i586#[cfg_attr(all(target_arch = "x86", not(target_feature = "sse")), ignore)]fn test_near_pi() {let arg = 3.141592025756836;let arg = force_eval!(arg);assert_eq!(rem_pio2(arg), (2, -6.278329573009626e-7, -2.1125998133974653e-23));let arg = 3.141592033207416;let arg = force_eval!(arg);assert_eq!(rem_pio2(arg), (2, -6.20382377148128e-7, -2.1125998133974653e-23));let arg = 3.141592144966125;let arg = force_eval!(arg);assert_eq!(rem_pio2(arg), (2, -5.086236681942706e-7, -2.1125998133974653e-23));let arg = 3.141592979431152;let arg = force_eval!(arg);assert_eq!(rem_pio2(arg), (2, 3.2584135866119817e-7, -2.1125998133974653e-23));}#[test]fn test_overflow_b9b847() {let _ = rem_pio2(-3054214.5490637687);}#[test]fn test_overflow_4747b9() {let _ = rem_pio2(917340800458.2274);}
}

二、主要功能

  • 计算x除以π/2的整数商n和余数y = y0 + y1

  • 使用多种优化策略处理不同范围的输入值

三、关键常量

  • INV_PIO2: 2/π的近似值,53位精度

  • PIO2_1, PIO2_1T: π/2的高33位和剩余部分

  • PIO2_2, PIO2_2T: 第二部分的33位和剩余部分

  • PIO2_3, PIO2_3T: 第三部分的33位和剩余部分

  • EPS: 双精度浮点数的机器ε

  • TO_INT: 用于舍入到整数的缩放因子

四、算法实现

  1. 主要分支
  • 小范围处理 (|x| ≤ 5π/4):

    • 直接减去π/2的倍数
    • 处理π/2附近的特殊情况
  • 中等范围处理 (5π/4 < |x| ≤ 9π/4):

    • 类似小范围处理,但处理3π/2和2π附近的特殊情况
  • 大范围处理 (9π/4 < |x| < 2^20*(π/2)):

    • 使用medium函数进行更精确的计算
  • 极大值处理 (|x| ≥ 2^20*(π/2)):

    • 调用rem_pio2_large函数处理
  • 特殊值处理 (无穷大或NaN):

    • 直接返回(0, NaN, NaN)
  1. medium函数

这是处理中等大小输入的核心函数:

  • 计算x/(π/2)的舍入整数n

  • 使用多阶段减法减少误差:

    • 第一阶段:85位精度

    • 第二阶段:118位精度(如果需要)

    • 第三阶段:151位精度(如果需要)

  1. 技术细节
  • 精度控制:通过分阶段减法控制累积误差

  • 符号处理:正确处理正负输入值

  • 边界条件:特别处理接近π/2倍数的点

  • 大数处理:使用rem_pio2_large函数处理极大输入

  1. 测试用例

代码包含测试用例验证:

  • 接近π的值

  • 溢出情况

这个函数是高效计算三角函数的关键组件,通过精心设计的算法在各种输入范围内保持高精度。

http://www.lryc.cn/news/581564.html

相关文章:

  • 人工智能之数学基础:线性回归算法的矩阵参数求导
  • 传统微商困境与开源链动2+1模式、AI智能名片及S2B2C商城小程序的转型破局
  • AUTOSAR进阶图解==>AUTOSAR_SWS_V2XFacilities
  • Hadoop MapReduce 入门
  • Hadoop高可用集群搭建
  • k8s-服务发布基础
  • 小菜狗的云计算之旅,学习了解rsync+sersync实现数据实时同步(详细操作步骤)
  • 【Linux网络编程】Socket - UDP
  • 儿童趣味记忆配对游戏
  • 【CSS-15】深入理解CSS transition-duration:掌握过渡动画的时长控制
  • Java的各种各样的生命周期——思考历程
  • 字符函数和字符串函数(下)- 暴力匹配算法
  • ASP.NET Web Pages 安装使用教程
  • 随机森林算法详解:Bagging思想的代表算法
  • 【大模型入门】访问GPT_API实战案例
  • 8.2.1+8.2.2插入排序
  • 企业智脑:智能营销新纪元——自动化品牌建设与智能化营销的技术革命
  • 【Linux操作系统 | 第12篇】Linux磁盘分区
  • Dubbo 3.x源码(31)—Dubbo消息的编码解码
  • 我的LeetCode刷题指南:链表部分
  • 微服务基础:Spring Cloud Alibaba 组件有哪些?
  • 云原生 Serverless 架构下的智能弹性伸缩与成本优化实践
  • java easyExce 动态表头列数不固定
  • vue3 当前页面方法暴露
  • 0704-0706上海,又聚上了
  • 《前端路由重构:解锁多语言交互的底层逻辑》
  • 【Zotero】Zotero无法正常启动解决方案
  • 深度解析命令模式:将请求封装为对象的设计智慧
  • Flink ClickHouse 连接器数据写入源码深度解析
  • Gin Web 层集成 Viper 配置文件和 Zap 日志文件指南(下)