当前位置: 首页 > news >正文

自编以e为底的对数函数ln,性能接近标准库函数

算法描述:
(1). 先做自变量x的范围检查,不能出现负数和0. 自己使用时,如果能通过其它途径保证自变量为正,那么可以省略这两个判断,提高速度。
(2). 根据IEEE 754浮点数的格式,x=2^k \cdot m,\ m\in(1,2),则 ln(x)=kln(2)+ln(m),可以通过位运算方便快速地获取k和m .
(3). 把 ln(1+x) 和 ln(1-x) 在 x=0 处的泰勒级数相减,

\ln(1+x) =x-\frac{x^2}{2}+\frac{x^3}{3}-\frac{x^4}{4}+\cdots

\ln(1-x) =-x-\frac{x^2}{2}-\frac{x^3}{3}-\frac{x^4}{4}-\cdots

\ln\frac{1+x}{1-x}=2x+\frac{2x^3}{3}+\frac{2x^5}{5}+\cdots

因为m的范围是(1, 2),不够接近1,如果直接令m=(1+x)/(1-x),那么x不够接近0,代入上面的泰勒级数,则精度不够高,所以要对m进行变换,常见的做法是乘上sqrt(2)/2,即

\ln(x)=k\ln(2)+\ln(m) \\ =k\ln(2)+\ln(\sqrt{2})+\ln(\frac{\sqrt{2}}{2}m) \\ =(k+\frac{1}{2})\ln(2)+\ln(\frac{\sqrt{2}}{2}m)

\frac{\sqrt{2}}{2}m\in(\frac{\sqrt{2}}{2}, \sqrt{2}),\quad x=\frac{\frac{\sqrt{2}}{2}m-1}{\frac{\sqrt{2}}{2}m+1} \in(-(3-2\sqrt{2}),3-2\sqrt{2}) 

如果改为乘以 2/3,则

\frac{2}{3}m\in(\frac{2}{3}, \frac{4}{3}),\quad x=\frac{\frac{2}{3}m-1}{\frac{2}{3}m+1} \in(-\frac{1}{5},\frac{1}{7})

这个区间长度为12/35=0.34285714,比区间(-(3-2\sqrt{2}),3-2\sqrt{2})的长度6-4\sqrt{2}=0.34314575更短,代入泰勒级数后的精度更高一些。

\ln(x)=k\ln(2)+\ln(m) \\ =k\ln(2)+\ln(\frac{3}{2})+\ln(\frac{2}{3}m)
多项式求值采用秦九韶算法,同时还使用fmadd指令加速运算(融合乘加,intel _mm_fmadd_sd)

计算机如何计算对数函数_数值计算】求对数函数值,输入实数x>0 ,输出x对应的对数函数值ln(x)(使用双精度dou-CSDN博客更详细地解释了如何利用IEEE 754浮点数的格式获取k和m.

标准库的算法可参考:glibc/sysdeps/ieee754/dbl-64/s_log1p.c at master · bminor/glibc · GitHub

最终的效果是,精度与标准库互有胜负(以windows的计算器以及Wolfram|Alpha: Computational Intelligence作为参考值),如果不对自变量为NAN的情况进行处理,速度稍快于标准库。

 C++代码如下:

#include<stdio.h>
#include<math.h>
#include<time.h>
#include<immintrin.h>#define FMADD
constexpr double ln2 = 0.6931471805599453;
constexpr double ln3_2 = 0.40546510810816438;   // ln(3/2)
constexpr double sqrt2_2 = 0.7071067811865475;  // sqrt(2)/2
constexpr unsigned long long x000F = 0x000FFFFFFFFFFFFF;
constexpr unsigned long long x3FF0 = 0x3FF0000000000000;__m128d c17 = _mm_set_sd(2.0 / 17.0);
__m128d c15 = _mm_set_sd(2.0 / 15.0);
__m128d c13 = _mm_set_sd(2.0 / 13.0);
__m128d c11 = _mm_set_sd(2.0 / 11.0);
__m128d c9 = _mm_set_sd(2.0 / 9.0);
__m128d c7 = _mm_set_sd(2.0 / 7.0);
__m128d c5 = _mm_set_sd(2.0 / 5.0);
__m128d c3 = _mm_set_sd(2.0 / 3.0);
__m128d c1 = _mm_set_sd(2.0);inline double myln(double x) {if (x < 0) {return NAN;}if (x == 0) {return -INFINITY;}unsigned long long llx = *reinterpret_cast<unsigned long long*>(&x);short k = (llx >> 52) - 1023;        //  x = 2^k * munsigned long long llm = (llx & x000F) | x3FF0;double m = *reinterpret_cast<double*>(&llm);m *= 0.66666666666666666;    //m *= sqrt2_2;x = (m - 1.0) / (m + 1.0);double x2 = x * x;
#ifdef FMADD__m128d x128 = _mm_set_sd(x);__m128d x2_128 = _mm_set_sd(x2);__m128d t128 = c17;t128 = _mm_fmadd_sd(t128, x2_128, c15);t128 = _mm_fmadd_sd(t128, x2_128, c13);t128 = _mm_fmadd_sd(t128, x2_128, c11);t128 = _mm_fmadd_sd(t128, x2_128, c9);t128 = _mm_fmadd_sd(t128, x2_128, c7);t128 = _mm_fmadd_sd(t128, x2_128, c5);t128 = _mm_fmadd_sd(t128, x2_128, c3);t128 = _mm_fmadd_sd(t128, x2_128, c1);t128 = _mm_mul_sd(t128, x128);m = _mm_cvtsd_f64(t128);
#elsem = 2.0 / 17.0;m = m * x2 + 2.0 / 15.0;m = m * x2 + 2.0 / 13.0;m = m * x2 + 2.0 / 11.0;m = m * x2 + 2.0 / 9.0;m = m * x2 + 2.0 / 7.0;m = m * x2 + 2.0 / 5.0;m = m * x2 + 2.0 / 3.0;m = m * x2 + 2.0;m *= x;
#endifreturn k * ln2 + ln3_2 + m;  // return (k + 0.5) * ln2 + m; //如果前面 m *= sqrt2_2,那就需要用这一行return
}int main() {printf("double, 精度测试\n");for (double x = 0.1; x < 3; x += 0.1) {printf("myln(%2.1f)=%18.16lf\n  ln(%2.1f)=%18.16lf\n-------\n", x, myln(x), x, log(x));}printf("速度测试,编译器优化设为/O2,CPU:Core i7-11800H \n");clock_t start = clock();double sum = 0;double x1 = 0.01, x2 =1000, dx = 1e-6;for (double x = x1; x < x2; x += dx) {sum += myln(x) / x;}printf("sum=%lf, myln_Time: %fs\n", sum, (double)(clock() - start) / CLOCKS_PER_SEC);start = clock();sum = 0;for (double x = x1; x < x2; x += dx) {sum += log(x) / x;}printf("sum=%lf,   ln_Time: %fs\n", sum, (double)(clock() - start) / CLOCKS_PER_SEC);
}

 运行结果如下:

double, 精度测试
myln(0.1)=-2.3025850929940459ln(0.1)=-2.3025850929940455
-------
myln(0.2)=-1.6094379124341003ln(0.2)=-1.6094379124341003
-------
myln(0.3)=-1.2039728043259359ln(0.3)=-1.2039728043259359
-------
myln(0.4)=-0.9162907318741550ln(0.4)=-0.9162907318741550
-------
myln(0.5)=-0.6931471805599396ln(0.5)=-0.6931471805599453
-------
myln(0.6)=-0.5108256237659907ln(0.6)=-0.5108256237659907
-------
myln(0.7)=-0.3566749439387324ln(0.7)=-0.3566749439387324
-------
myln(0.8)=-0.2231435513142100ln(0.8)=-0.2231435513142098
-------
myln(0.9)=-0.1053605156578265ln(0.9)=-0.1053605156578264
-------
myln(1.0)=-0.0000000000000002ln(1.0)=-0.0000000000000001
-------
myln(1.1)=0.0953101798043247ln(1.1)=0.0953101798043247
-------
myln(1.2)=0.1823215567939545ln(1.2)=0.1823215567939546
-------
myln(1.3)=0.2623642644674911ln(1.3)=0.2623642644674911
-------
myln(1.4)=0.3364722366212130ln(1.4)=0.3364722366212130
-------
myln(1.5)=0.4054651081081644ln(1.5)=0.4054651081081646
-------
myln(1.6)=0.4700036292457357ln(1.6)=0.4700036292457357
-------
myln(1.7)=0.5306282510621706ln(1.7)=0.5306282510621706
-------
myln(1.8)=0.5877866649021192ln(1.8)=0.5877866649021193
-------
myln(1.9)=0.6418538861723950ln(1.9)=0.6418538861723950
-------
myln(2.0)=0.6931471805599509ln(2.0)=0.6931471805599455
-------
myln(2.1)=0.7419373447293780ln(2.1)=0.7419373447293776
-------
myln(2.2)=0.7884573603642703ln(2.2)=0.7884573603642705
-------
myln(2.3)=0.8329091229351041ln(2.3)=0.8329091229351043
-------
myln(2.4)=0.8754687373539001ln(2.4)=0.8754687373539003
-------
myln(2.5)=0.9162907318741552ln(2.5)=0.9162907318741554
-------
myln(2.6)=0.9555114450274365ln(2.6)=0.9555114450274368
-------
myln(2.7)=0.9932517730102837ln(2.7)=0.9932517730102838
-------
myln(2.8)=1.0296194171811583ln(2.8)=1.0296194171811586
-------
myln(2.9)=1.0647107369924285ln(2.9)=1.0647107369924287
-------
速度测试,编译器优化设为/O2,CPU:Core i7-11800H
sum=13254515.057331, myln_Time: 3.817000s
sum=13254515.057331,   ln_Time: 3.838000s

http://www.lryc.cn/news/475798.html

相关文章:

  • Java中的日期时间
  • 位置编码的表示
  • 0,国产FPGA(紫光同创)-新建PDS工程
  • c++联合
  • Edit Data. Create Cell Editors. Validate User Input 编辑数据。创建 Cell Editors。验证用户输入
  • Java 文件操作与IO流
  • 探索开源MiniMind项目:让大语言模型不再神秘(1)
  • Android 大疆面经
  • 【2024-10-31-2024-11-03】LeetCode刷题——python语法基础题
  • 【算法】二分查找
  • 第十五章 Vue工程化开发及Vue CLI脚手架
  • 【Grafana】Grafana 基础入门
  • 如何获取页面上所有input框
  • 0-ARM Linux驱动开发-字符设备
  • 使用 Faster Whisper 和 Gradio 实现实时语音转文字
  • redis v6.0.16 安装 基于Ubuntu 22.04
  • Milvus - 内存索引类型详解
  • 【STM32】按键控制LED 光敏传感器控制蜂鸣器
  • flutter-防抖
  • 什么是贪心算法
  • YOLOv6-4.0部分代码阅读笔记-effidehead_lite.py
  • 重学SpringBoot3-整合 Elasticsearch 8.x (一)客户端方式
  • 极简实现酷炫动效:Flutter隐式动画指南第三篇自定义Flutter隐式动画
  • 无人机维护保养、部件修理更换技术详解
  • xilinx vitis 更换硬件平台——ZYNQ学习笔记5
  • vscode makfile编译c程序
  • 【学术论文投稿】探索嵌入式硬件设计:揭秘智能设备的心脏
  • JavaScript 概述
  • 2024年10月个人工作生活总结
  • uniapp ,微信小程序,滚动(下滑,上拉)到底部加载下一页内容