16 #if defined(HIGHWAY_HWY_CONTRIB_MATH_MATH_INL_H_) == \
17 defined(HWY_TARGET_TOGGLE)
18 #ifdef HIGHWAY_HWY_CONTRIB_MATH_MATH_INL_H_
19 #undef HIGHWAY_HWY_CONTRIB_MATH_MATH_INL_H_
21 #define HIGHWAY_HWY_CONTRIB_MATH_MATH_INL_H_
38 template <
class D,
class V>
40 template <
class D,
class V>
53 template <
class D,
class V>
55 template <
class D,
class V>
68 template <
class D,
class V>
70 template <
class D,
class V>
83 template <
class D,
class V>
85 template <
class D,
class V>
98 template <
class D,
class V>
100 template <
class D,
class V>
113 template <
class D,
class V>
115 template <
class D,
class V>
128 template <
class D,
class V>
130 template <
class D,
class V>
143 template <
class D,
class V>
145 template <
class D,
class V>
158 template <
class D,
class V>
160 template <
class D,
class V>
173 template <
class D,
class V>
175 template <
class D,
class V>
188 template <
class D,
class V>
190 template <
class D,
class V>
203 template <
class D,
class V>
205 template <
class D,
class V>
218 template <
class D,
class V>
220 template <
class D,
class V>
233 template <
class D,
class V>
235 template <
class D,
class V>
248 template <
class D,
class V>
250 template <
class D,
class V>
263 template <
class D,
class V>
265 template <
class D,
class V>
334 T c6, T c7, T c8, T c9) {
344 T c6, T c7, T c8, T c9, T c10) {
354 T c6, T c7, T c8, T c9, T c10, T c11) {
364 T c6, T c7, T c8, T c9, T c10, T c11,
370 x8,
MulAdd(x4, c12,
MulAdd(x2,
MulAdd(c11, x, c10),
MulAdd(c9, x, c8))),
376 T c6, T c7, T c8, T c9, T c10, T c11,
389 T c6, T c7, T c8, T c9, T c10, T c11,
390 T c12, T c13, T c14) {
402 T c6, T c7, T c8, T c9, T c10, T c11,
403 T c12, T c13, T c14, T c15) {
415 T c6, T c7, T c8, T c9, T c10, T c11,
416 T c12, T c13, T c14, T c15, T c16) {
431 T c6, T c7, T c8, T c9, T c10, T c11,
432 T c12, T c13, T c14, T c15, T c16, T c17) {
447 T c6, T c7, T c8, T c9, T c10, T c11,
448 T c12, T c13, T c14, T c15, T c16, T c17,
463 template <
class FloatOrDouble>
465 template <
class FloatOrDouble>
467 template <
class FloatOrDouble>
469 template <
class FloatOrDouble>
471 template <
class FloatOrDouble>
477 template <
class D,
class V>
479 const auto k0 =
Set(d, +0.1666677296f);
480 const auto k1 =
Set(d, +0.07495029271f);
481 const auto k2 =
Set(d, +0.04547423869f);
482 const auto k3 =
Set(d, +0.02424046025f);
483 const auto k4 =
Set(d, +0.04197454825f);
485 return Estrin(x2, k0, k1, k2, k3, k4);
489 #if HWY_CAP_FLOAT64 && HWY_CAP_INTEGER64
492 struct AsinImpl<double> {
494 template <
class D,
class V>
496 const auto k0 =
Set(d, +0.1666666666666497543);
497 const auto k1 =
Set(d, +0.07500000000378581611);
498 const auto k2 =
Set(d, +0.04464285681377102438);
499 const auto k3 =
Set(d, +0.03038195928038132237);
500 const auto k4 =
Set(d, +0.02237176181932048341);
501 const auto k5 =
Set(d, +0.01735956991223614604);
502 const auto k6 =
Set(d, +0.01388715184501609218);
503 const auto k7 =
Set(d, +0.01215360525577377331);
504 const auto k8 =
Set(d, +0.006606077476277170610);
505 const auto k9 =
Set(d, +0.01929045477267910674);
506 const auto k10 =
Set(d, -0.01581918243329996643);
507 const auto k11 =
Set(d, +0.03161587650653934628);
509 return Estrin(x2, k0, k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11);
518 template <
class D,
class V>
520 const auto k0 =
Set(d, -0.333331018686294555664062f);
521 const auto k1 =
Set(d, +0.199926957488059997558594f);
522 const auto k2 =
Set(d, -0.142027363181114196777344f);
523 const auto k3 =
Set(d, +0.106347933411598205566406f);
524 const auto k4 =
Set(d, -0.0748900920152664184570312f);
525 const auto k5 =
Set(d, +0.0425049886107444763183594f);
526 const auto k6 =
Set(d, -0.0159569028764963150024414f);
527 const auto k7 =
Set(d, +0.00282363896258175373077393f);
529 const auto y =
Mul(x, x);
530 return MulAdd(
Estrin(y, k0, k1, k2, k3, k4, k5, k6, k7),
Mul(y, x), x);
534 #if HWY_CAP_FLOAT64 && HWY_CAP_INTEGER64
537 struct AtanImpl<double> {
539 template <
class D,
class V>
541 const auto k0 =
Set(d, -0.333333333333311110369124);
542 const auto k1 =
Set(d, +0.199999999996591265594148);
543 const auto k2 =
Set(d, -0.14285714266771329383765);
544 const auto k3 =
Set(d, +0.111111105648261418443745);
545 const auto k4 =
Set(d, -0.090908995008245008229153);
546 const auto k5 =
Set(d, +0.0769219538311769618355029);
547 const auto k6 =
Set(d, -0.0666573579361080525984562);
548 const auto k7 =
Set(d, +0.0587666392926673580854313);
549 const auto k8 =
Set(d, -0.0523674852303482457616113);
550 const auto k9 =
Set(d, +0.0466667150077840625632675);
551 const auto k10 =
Set(d, -0.0407629191276836500001934);
552 const auto k11 =
Set(d, +0.0337852580001353069993897);
553 const auto k12 =
Set(d, -0.0254517624932312641616861);
554 const auto k13 =
Set(d, +0.016599329773529201970117);
555 const auto k14 =
Set(d, -0.00889896195887655491740809);
556 const auto k15 =
Set(d, +0.00370026744188713119232403);
557 const auto k16 =
Set(d, -0.00110611831486672482563471);
558 const auto k17 =
Set(d, +0.000209850076645816976906797);
559 const auto k18 =
Set(d, -1.88796008463073496563746e-5);
561 const auto y =
Mul(x, x);
562 return MulAdd(
Estrin(y, k0, k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11,
563 k12, k13, k14, k15, k16, k17, k18),
573 template <
class D,
class V>
578 template <
class D,
class V>
580 const auto k0 =
Set(d, -1.66666597127914428710938e-1f);
581 const auto k1 =
Set(d, +8.33307858556509017944336e-3f);
582 const auto k2 =
Set(d, -1.981069071916863322258e-4f);
583 const auto k3 =
Set(d, +2.6083159809786593541503e-6f);
585 const auto y =
Mul(x, x);
589 template <
class D,
class V,
class VI32>
592 const V kHalfPiPart0f =
Set(d, -0.5f * 3.140625f);
593 const V kHalfPiPart1f =
Set(d, -0.5f * 0.0009670257568359375f);
594 const V kHalfPiPart2f =
Set(d, -0.5f * 6.2771141529083251953e-7f);
595 const V kHalfPiPart3f =
Set(d, -0.5f * 1.2154201256553420762e-10f);
599 x =
MulAdd(qf, kHalfPiPart0f, x);
600 x =
MulAdd(qf, kHalfPiPart1f, x);
601 x =
MulAdd(qf, kHalfPiPart2f, x);
602 x =
MulAdd(qf, kHalfPiPart3f, x);
606 template <
class D,
class V,
class VI32>
609 const V kPiPart0f =
Set(d, -3.140625f);
610 const V kPiPart1f =
Set(d, -0.0009670257568359375f);
611 const V kPiPart2f =
Set(d, -6.2771141529083251953e-7f);
612 const V kPiPart3f =
Set(d, -1.2154201256553420762e-10f);
616 x =
MulAdd(qf, kPiPart0f, x);
617 x =
MulAdd(qf, kPiPart1f, x);
618 x =
MulAdd(qf, kPiPart2f, x);
619 x =
MulAdd(qf, kPiPart3f, x);
624 template <
class D,
class VI32>
631 template <
class D,
class VI32>
634 return BitCast(d, ShiftLeft<31>(
And(q, kOne)));
638 #if HWY_CAP_FLOAT64 && HWY_CAP_INTEGER64
641 struct CosSinImpl<double> {
643 template <
class D,
class V>
648 template <
class D,
class V>
650 const auto k0 =
Set(d, -0.166666666666666657414808);
651 const auto k1 =
Set(d, +0.00833333333333332974823815);
652 const auto k2 =
Set(d, -0.000198412698412696162806809);
653 const auto k3 =
Set(d, +2.75573192239198747630416e-6);
654 const auto k4 =
Set(d, -2.50521083763502045810755e-8);
655 const auto k5 =
Set(d, +1.60590430605664501629054e-10);
656 const auto k6 =
Set(d, -7.64712219118158833288484e-13);
657 const auto k7 =
Set(d, +2.81009972710863200091251e-15);
658 const auto k8 =
Set(d, -7.97255955009037868891952e-18);
660 const auto y =
Mul(x, x);
661 return MulAdd(
Estrin(y, k0, k1, k2, k3, k4, k5, k6, k7, k8),
Mul(y, x), x);
664 template <
class D,
class V,
class VI32>
667 const V kHalfPiPart0d =
Set(d, -0.5 * 3.1415926218032836914);
668 const V kHalfPiPart1d =
Set(d, -0.5 * 3.1786509424591713469e-8);
669 const V kHalfPiPart2d =
Set(d, -0.5 * 1.2246467864107188502e-16);
670 const V kHalfPiPart3d =
Set(d, -0.5 * 1.2736634327021899816e-24);
674 x =
MulAdd(qf, kHalfPiPart0d, x);
675 x =
MulAdd(qf, kHalfPiPart1d, x);
676 x =
MulAdd(qf, kHalfPiPart2d, x);
677 x =
MulAdd(qf, kHalfPiPart3d, x);
681 template <
class D,
class V,
class VI32>
684 const V kPiPart0d =
Set(d, -3.1415926218032836914);
685 const V kPiPart1d =
Set(d, -3.1786509424591713469e-8);
686 const V kPiPart2d =
Set(d, -1.2246467864107188502e-16);
687 const V kPiPart3d =
Set(d, -1.2736634327021899816e-24);
691 x =
MulAdd(qf, kPiPart0d, x);
692 x =
MulAdd(qf, kPiPart1d, x);
693 x =
MulAdd(qf, kPiPart2d, x);
694 x =
MulAdd(qf, kPiPart3d, x);
699 template <
class D,
class VI32>
700 HWY_INLINE Vec<Rebind<double, D>> CosSignFromQuadrant(D d, VI32 q) {
701 const VI32 kTwo =
Set(Rebind<int32_t, D>(), 2);
707 template <
class D,
class VI32>
708 HWY_INLINE Vec<Rebind<double, D>> SinSignFromQuadrant(D d, VI32 q) {
709 const VI32 kOne =
Set(Rebind<int32_t, D>(), 1);
711 d, ShiftLeft<63>(
PromoteTo(Rebind<int64_t, D>(),
And(q, kOne))));
720 template <
class D,
class V>
725 template <
class D,
class V>
727 const auto k0 =
Set(d, +0.5f);
728 const auto k1 =
Set(d, +0.166666671633720397949219f);
729 const auto k2 =
Set(d, +0.0416664853692054748535156f);
730 const auto k3 =
Set(d, +0.00833336077630519866943359f);
731 const auto k4 =
Set(d, +0.00139304355252534151077271f);
732 const auto k5 =
Set(d, +0.000198527617612853646278381f);
738 template <
class D,
class VI32>
741 const VI32 kOffset =
Set(di32, 0x7F);
742 return BitCast(d, ShiftLeft<23>(
Add(x, kOffset)));
746 template <
class D,
class V,
class VI32>
748 const VI32 y = ShiftRight<1>(e);
749 return Mul(
Mul(x, Pow2I(d, y)), Pow2I(d,
Sub(e, y)));
752 template <
class D,
class V,
class VI32>
755 const V kLn2Part0f =
Set(d, -0.693145751953125f);
756 const V kLn2Part1f =
Set(d, -1.428606765330187045e-6f);
760 x =
MulAdd(qf, kLn2Part0f, x);
761 x =
MulAdd(qf, kLn2Part1f, x);
768 template <
class D,
class V>
772 const auto kBias =
Set(di32, 0x7F);
777 template <
class D,
class V>
779 const V k0 =
Set(d, 0.66666662693f);
780 const V k1 =
Set(d, 0.40000972152f);
781 const V k2 =
Set(d, 0.28498786688f);
782 const V k3 =
Set(d, 0.24279078841f);
784 const V x2 =
Mul(x, x);
785 const V x4 =
Mul(x2, x2);
790 #if HWY_CAP_FLOAT64 && HWY_CAP_INTEGER64
792 struct ExpImpl<double> {
794 template <
class D,
class V>
799 template <
class D,
class V>
801 const auto k0 =
Set(d, +0.5);
802 const auto k1 =
Set(d, +0.166666666666666851703837);
803 const auto k2 =
Set(d, +0.0416666666666665047591422);
804 const auto k3 =
Set(d, +0.00833333333331652721664984);
805 const auto k4 =
Set(d, +0.00138888888889774492207962);
806 const auto k5 =
Set(d, +0.000198412698960509205564975);
807 const auto k6 =
Set(d, +2.4801587159235472998791e-5);
808 const auto k7 =
Set(d, +2.75572362911928827629423e-6);
809 const auto k8 =
Set(d, +2.75573911234900471893338e-7);
810 const auto k9 =
Set(d, +2.51112930892876518610661e-8);
811 const auto k10 =
Set(d, +2.08860621107283687536341e-9);
813 return MulAdd(
Estrin(x, k0, k1, k2, k3, k4, k5, k6, k7, k8, k9, k10),
818 template <
class D,
class VI32>
820 const Rebind<int32_t, D> di32;
821 const Rebind<int64_t, D> di64;
822 const VI32 kOffset =
Set(di32, 0x3FF);
827 template <
class D,
class V,
class VI32>
828 HWY_INLINE V LoadExpShortRange(D d, V x, VI32 e) {
829 const VI32 y = ShiftRight<1>(e);
830 return Mul(
Mul(x, Pow2I(d, y)), Pow2I(d,
Sub(e, y)));
833 template <
class D,
class V,
class VI32>
836 const V kLn2Part0d =
Set(d, -0.6931471805596629565116018);
837 const V kLn2Part1d =
Set(d, -0.28235290563031577122588448175e-12);
841 x =
MulAdd(qf, kLn2Part0d, x);
842 x =
MulAdd(qf, kLn2Part1d, x);
848 struct LogImpl<double> {
849 template <
class D,
class V>
850 HWY_INLINE Vec<Rebind<int64_t, D>> Log2p1NoSubnormal(D , V x) {
851 const Rebind<int64_t, D> di64;
852 const Rebind<uint64_t, D> du64;
858 template <
class D,
class V>
860 const V k0 =
Set(d, 0.6666666666666735130);
861 const V k1 =
Set(d, 0.3999999999940941908);
862 const V k2 =
Set(d, 0.2857142874366239149);
863 const V k3 =
Set(d, 0.2222219843214978396);
864 const V k4 =
Set(d, 0.1818357216161805012);
865 const V k5 =
Set(d, 0.1531383769920937332);
866 const V k6 =
Set(d, 0.1479819860511658591);
868 const V x2 =
Mul(x, x);
869 const V x4 =
Mul(x2, x2);
877 template <
class D,
class V,
bool kAllowSubnormals = true>
884 constexpr
bool kIsF32 = (
sizeof(
LaneType) == 4);
887 const V kLn2Hi =
Set(d, (kIsF32 ? 0.69313812256f :
888 0.693147180369123816490 ));
889 const V kLn2Lo =
Set(d, (kIsF32 ? 9.0580006145e-6f :
890 1.90821492927058770002e-10));
891 const V kOne =
Set(d, +1.0);
892 const V kMinNormal =
Set(d, (kIsF32 ? 1.175494351e-38f :
893 2.2250738585072014e-308 ));
894 const V kScale =
Set(d, (kIsF32 ? 3.355443200e+7f :
895 1.8014398509481984e+16 ));
899 using VI = decltype(
Zero(di));
900 const VI kLowerBits =
Set(di, (kIsF32 ? 0x00000000L : 0xFFFFFFFFLL));
901 const VI kMagic =
Set(di, (kIsF32 ? 0x3F3504F3L : 0x3FE6A09E00000000LL));
902 const VI kExpMask =
Set(di, (kIsF32 ? 0x3F800000L : 0x3FF0000000000000LL));
903 const VI kExpScale =
Set(di, (kIsF32 ? -25 : -54));
904 const VI kManMask =
Set(di, (kIsF32 ? 0x7FFFFFL : 0xFFFFF00000000LL));
910 if (kAllowSubnormals ==
true) {
911 const auto is_denormal =
Lt(x, kMinNormal);
919 d,
Add(exp_scale, impl.Log2p1NoSubnormal(d,
BitCast(d, exp_bits))));
931 const V ym1 =
Sub(y, kOne);
932 const V z =
Div(ym1,
Add(y, kOne));
941 template <
class D,
class V>
945 const V kZero =
Zero(d);
946 const V kHalf =
Set(d, +0.5);
947 const V kPi =
Set(d, +3.14159265358979323846264);
948 const V kPiOverTwo =
Set(d, +1.57079632679489661923132169);
951 const V abs_x =
Xor(x, sign_x);
952 const auto mask =
Lt(abs_x, kHalf);
958 const V t =
Mul(impl.AsinPoly(d, yy, y),
Mul(y, yy));
960 const V t_plus_y =
Add(t, y);
963 Add(t_plus_y, t_plus_y));
967 template <
class D,
class V>
969 const V kLarge =
Set(d, 268435456.0);
970 const V kLog2 =
Set(d, 0.693147180559945286227);
971 const V kOne =
Set(d, +1.0);
972 const V kTwo =
Set(d, +2.0);
974 const auto is_x_large =
Gt(x, kLarge);
975 const auto is_x_gt_2 =
Gt(x, kTwo);
977 const V x_minus_1 =
Sub(x, kOne);
983 const V z =
impl::Log<D, V,
false>(d, y2);
985 const auto is_pole =
Eq(y2, kOne);
992 template <
class D,
class V>
996 const V kHalf =
Set(d, +0.5);
997 const V kTwo =
Set(d, +2.0);
998 const V kPiOverTwo =
Set(d, +1.57079632679489661923132169);
1001 const V abs_x =
Xor(x, sign_x);
1002 const auto mask =
Lt(abs_x, kHalf);
1008 const V z0 =
MulAdd(impl.AsinPoly(d, yy, y),
Mul(yy, y), y);
1009 const V z1 =
NegMulAdd(z0, kTwo, kPiOverTwo);
1013 template <
class D,
class V>
1015 const V kSmall =
Set(d, 1.0 / 268435456.0);
1016 const V kLarge =
Set(d, 268435456.0);
1017 const V kLog2 =
Set(d, 0.693147180559945286227);
1018 const V kOne =
Set(d, +1.0);
1019 const V kTwo =
Set(d, +2.0);
1022 const V abs_x =
Xor(x, sign_x);
1024 const auto is_x_large =
Gt(abs_x, kLarge);
1025 const auto is_x_lt_2 =
Lt(abs_x, kTwo);
1027 const V x2 =
Mul(x, x);
1028 const V sqrt_x2_plus_1 =
Sqrt(
Add(x2, kOne));
1030 const V y0 =
MulAdd(abs_x, kTwo,
Div(kOne,
Add(sqrt_x2_plus_1, abs_x)));
1031 const V y1 =
Add(
Div(x2,
Add(sqrt_x2_plus_1, kOne)), abs_x);
1034 const V z =
impl::Log<D, V,
false>(d, y2);
1036 const auto is_pole =
Eq(y2, kOne);
1044 template <
class D,
class V>
1048 const V kOne =
Set(d, +1.0);
1049 const V kPiOverTwo =
Set(d, +1.57079632679489661923132169);
1052 const V abs_x =
Xor(x, sign);
1053 const auto mask =
Gt(abs_x, kOne);
1056 const auto divisor =
IfThenElse(mask, abs_x, kOne);
1057 const V y = impl.AtanPoly(d,
IfThenElse(mask,
Div(kOne, divisor), abs_x));
1061 template <
class D,
class V>
1063 const V kHalf =
Set(d, +0.5);
1064 const V kOne =
Set(d, +1.0);
1067 const V abs_x =
Xor(x, sign);
1072 template <
class D,
class V>
1078 const V kOneOverPi =
Set(d, 0.31830988618379067153);
1082 using VI32 = decltype(
Zero(di32));
1083 const VI32 kOne =
Set(di32, 1);
1088 const VI32 q =
Add(ShiftLeft<1>(impl.ToInt32(d,
Mul(y, kOneOverPi))), kOne);
1092 d,
Xor(impl.CosReduce(d, y, q), impl.CosSignFromQuadrant(d, q)));
1095 template <
class D,
class V>
1100 const V kHalf =
Set(d, +0.5);
1101 const V kLowerBound =
Set(d, (
sizeof(
LaneType) == 4 ? -104.0 : -1000.0));
1102 const V kNegZero =
Set(d, -0.0);
1103 const V kOne =
Set(d, +1.0);
1104 const V kOneOverLog2 =
Set(d, +1.442695040888963407359924681);
1111 impl.ToInt32(d,
MulAdd(x, kOneOverLog2,
Or(kHalf,
And(x, kNegZero))));
1114 const V y = impl.LoadExpShortRange(
1115 d,
Add(impl.ExpPoly(d, impl.ExpReduce(d, x, q)), kOne), q);
1119 template <
class D,
class V>
1124 const V kHalf =
Set(d, +0.5);
1125 const V kLowerBound =
Set(d, (
sizeof(
LaneType) == 4 ? -104.0 : -1000.0));
1126 const V kLn2Over2 =
Set(d, +0.346573590279972654708616);
1127 const V kNegOne =
Set(d, -1.0);
1128 const V kNegZero =
Set(d, -0.0);
1129 const V kOne =
Set(d, +1.0);
1130 const V kOneOverLog2 =
Set(d, +1.442695040888963407359924681);
1137 impl.ToInt32(d,
MulAdd(x, kOneOverLog2,
Or(kHalf,
And(x, kNegZero))));
1140 const V y = impl.ExpPoly(d, impl.ExpReduce(d, x, q));
1142 Sub(impl.LoadExpShortRange(d,
Add(y, kOne), q), kOne));
1146 template <
class D,
class V>
1151 template <
class D,
class V>
1153 return Mul(
Log(d, x),
Set(d, 0.4342944819032518276511));
1156 template <
class D,
class V>
1158 const V kOne =
Set(d, +1.0);
1160 const V y =
Add(x, kOne);
1161 const auto is_pole =
Eq(y, kOne);
1163 const auto non_pole =
1164 Mul(impl::Log<D, V, /*kAllowSubnormals=*/false>(d, y),
Div(x, divisor));
1168 template <
class D,
class V>
1170 return Mul(
Log(d, x),
Set(d, 1.44269504088896340735992));
1173 template <
class D,
class V>
1179 const V kOneOverPi =
Set(d, 0.31830988618379067153);
1180 const V kHalf =
Set(d, 0.5);
1184 using VI32 = decltype(
Zero(di32));
1186 const V abs_x =
Abs(x);
1187 const V sign_x =
Xor(abs_x, x);
1190 const VI32 q = impl.ToInt32(d,
MulAdd(abs_x, kOneOverPi, kHalf));
1193 return impl.Poly(d,
Xor(impl.SinReduce(d, abs_x, q),
1194 Xor(impl.SinSignFromQuadrant(d, q), sign_x)));
1197 template <
class D,
class V>
1199 const V kHalf =
Set(d, +0.5);
1200 const V kOne =
Set(d, +1.0);
1201 const V kTwo =
Set(d, +2.0);
1204 const V abs_x =
Xor(x, sign);
1205 const V y =
Expm1(d, abs_x);
1207 return Xor(z, sign);
1210 template <
class D,
class V>
1212 const V kLimit =
Set(d, 18.714973875);
1213 const V kOne =
Set(d, +1.0);
1214 const V kTwo =
Set(d, +2.0);
1217 const V abs_x =
Xor(x, sign);
1218 const V y =
Expm1(d,
Mul(abs_x, kTwo));
1220 return Xor(z, sign);
#define HWY_NOINLINE
Definition: base.h:60
#define HWY_INLINE
Definition: base.h:59
#define HWY_MAYBE_UNUSED
Definition: base.h:70
HWY_INLINE HWY_MAYBE_UNUSED T Estrin(T x, T c0, T c1)
Definition: math-inl.h:280
HWY_INLINE V Log(const D d, V x)
Definition: math-inl.h:878
HWY_API Vec< D > SignBit(D d)
Definition: generic_ops-inl.h:66
HWY_NOINLINE V CallSin(const D d, VecArg< V > x)
Definition: math-inl.h:236
V VecArg
Definition: shared-inl.h:226
svuint16_t Set(Simd< bfloat16_t, N > d, bfloat16_t arg)
Definition: arm_sve-inl.h:299
HWY_NOINLINE V CallAsin(const D d, VecArg< V > x)
Definition: math-inl.h:71
HWY_INLINE V Atan(const D d, V x)
Highway SIMD version of std::atan(x).
Definition: math-inl.h:1045
HWY_API auto Lt(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:5035
HWY_API auto Eq(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:5027
HWY_INLINE V Cos(const D d, V x)
Highway SIMD version of std::cos(x).
Definition: math-inl.h:1073
HWY_NOINLINE V CallAcos(const D d, VecArg< V > x)
Definition: math-inl.h:41
HWY_API auto Gt(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:5040
HWY_API Vec128< float, N > MulAdd(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > add)
Definition: arm_neon-inl.h:1232
HWY_INLINE V Sin(const D d, V x)
Highway SIMD version of std::sin(x).
Definition: math-inl.h:1174
HWY_API Vec128< int8_t > Abs(const Vec128< int8_t > v)
Definition: arm_neon-inl.h:1529
HWY_INLINE V Exp(const D d, V x)
Highway SIMD version of std::exp(x).
Definition: math-inl.h:1096
HWY_API auto Ge(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:5044
HWY_INLINE V Log10(const D d, V x)
Highway SIMD version of std::log10(x).
Definition: math-inl.h:1152
HWY_INLINE V Log1p(const D d, V x)
Highway SIMD version of std::log1p(x).
Definition: math-inl.h:1157
HWY_NOINLINE V CallExpm1(const D d, VecArg< V > x)
Definition: math-inl.h:161
HWY_API Vec128< uint16_t, 4 > DemoteTo(Simd< uint16_t, 4 >, const Vec128< int32_t > v)
Definition: arm_neon-inl.h:2546
HWY_NOINLINE V CallLog1p(const D d, VecArg< V > x)
Definition: math-inl.h:206
HWY_INLINE V Atanh(const D d, V x)
Highway SIMD version of std::atanh(x).
Definition: math-inl.h:1062
HWY_API Vec128< float > ConvertTo(Full128< float >, const Vec128< int32_t > v)
Definition: arm_neon-inl.h:2739
HWY_NOINLINE V CallLog10(const D d, VecArg< V > x)
Definition: math-inl.h:191
HWY_API Vec128< T, N > IfThenElseZero(const Mask128< T, N > mask, const Vec128< T, N > yes)
Definition: arm_neon-inl.h:1642
HWY_API V Add(V a, V b)
Definition: arm_neon-inl.h:5000
HWY_NOINLINE V CallLog2(const D d, VecArg< V > x)
Definition: math-inl.h:221
HWY_NOINLINE V CallExp(const D d, VecArg< V > x)
Definition: math-inl.h:146
HWY_NOINLINE V CallAtanh(const D d, VecArg< V > x)
Definition: math-inl.h:116
HWY_API Vec128< float, N > MulSub(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > sub)
Definition: arm_neon-inl.h:1288
HWY_INLINE V Log2(const D d, V x)
Highway SIMD version of std::log2(x).
Definition: math-inl.h:1169
HWY_INLINE V Acos(const D d, V x)
Highway SIMD version of std::acos(x).
Definition: math-inl.h:942
HWY_NOINLINE V CallAtan(const D d, VecArg< V > x)
Definition: math-inl.h:101
HWY_INLINE V Acosh(const D d, V x)
Highway SIMD version of std::acosh(x).
Definition: math-inl.h:968
HWY_NOINLINE V CallLog(const D d, VecArg< V > x)
Definition: math-inl.h:176
HWY_INLINE V Tanh(const D d, V x)
Highway SIMD version of std::tanh(x).
Definition: math-inl.h:1211
decltype(GetLane(V())) LaneType
Definition: generic_ops-inl.h:24
HWY_INLINE V Log(const D d, V x)
Highway SIMD version of std::log(x).
Definition: math-inl.h:1147
HWY_API Vec128< uint16_t > PromoteTo(Full128< uint16_t >, const Vec128< uint8_t, 8 > v)
Definition: arm_neon-inl.h:2362
HWY_API Vec128< T, N > And(const Vec128< T, N > a, const Vec128< T, N > b)
Definition: arm_neon-inl.h:1384
HWY_API Vec128< T, N > BitCast(Simd< T, N > d, Vec128< FromT, N *sizeof(T)/sizeof(FromT)> v)
Definition: arm_neon-inl.h:687
HWY_INLINE V Asin(const D d, V x)
Highway SIMD version of std::asin(x).
Definition: math-inl.h:993
HWY_INLINE V Asinh(const D d, V x)
Highway SIMD version of std::asinh(x).
Definition: math-inl.h:1014
HWY_NOINLINE V CallAsinh(const D d, VecArg< V > x)
Definition: math-inl.h:86
HWY_API V Sub(V a, V b)
Definition: arm_neon-inl.h:5004
typename D::template Rebind< T > Rebind
Definition: shared-inl.h:144
HWY_INLINE V Expm1(const D d, V x)
Highway SIMD version of std::expm1(x).
Definition: math-inl.h:1120
HWY_API Vec128< T, N > IfThenZeroElse(const Mask128< T, N > mask, const Vec128< T, N > no)
Definition: arm_neon-inl.h:1649
HWY_API Vec128< T, N > Xor(const Vec128< T, N > a, const Vec128< T, N > b)
Definition: arm_neon-inl.h:1430
HWY_API Vec128< float, N > NegMulAdd(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > add)
Definition: arm_neon-inl.h:1266
HWY_NOINLINE V CallCos(const D d, VecArg< V > x)
Definition: math-inl.h:131
HWY_API Vec128< float, N > Sqrt(const Vec128< float, N > v)
Definition: arm_neon-inl.h:1348
HWY_NOINLINE V CallSinh(const D d, VecArg< V > x)
Definition: math-inl.h:251
HWY_INLINE V Sinh(const D d, V x)
Highway SIMD version of std::sinh(x).
Definition: math-inl.h:1198
HWY_API Vec128< T, N > AndNot(const Vec128< T, N > not_mask, const Vec128< T, N > mask)
Definition: arm_neon-inl.h:1398
HWY_API V Div(V a, V b)
Definition: arm_neon-inl.h:5013
HWY_API V Mul(V a, V b)
Definition: arm_neon-inl.h:5009
HWY_NOINLINE V CallTanh(const D d, VecArg< V > x)
Definition: math-inl.h:266
HWY_API Vec128< T, N > Zero(Simd< T, N > d)
Definition: arm_neon-inl.h:710
HWY_API Vec128< T, N > Or(const Vec128< T, N > a, const Vec128< T, N > b)
Definition: arm_neon-inl.h:1419
decltype(Zero(D())) Vec
Definition: generic_ops-inl.h:31
HWY_API Vec1< T > IfThenElse(const Mask1< T > mask, const Vec1< T > yes, const Vec1< T > no)
Definition: scalar-inl.h:263
HWY_NOINLINE V CallAcosh(const D d, VecArg< V > x)
Definition: math-inl.h:56
Definition: aligned_allocator.h:23
#define HWY_NAMESPACE
Definition: set_macros-inl.h:77
HWY_INLINE V AsinPoly(D d, V x2, V)
Definition: math-inl.h:478
Definition: math-inl.h:464
HWY_INLINE V AtanPoly(D d, V x)
Definition: math-inl.h:519
Definition: math-inl.h:466
HWY_INLINE Vec< Rebind< float, D > > CosSignFromQuadrant(D d, VI32 q)
Definition: math-inl.h:625
HWY_INLINE V SinReduce(D d, V x, VI32 q)
Definition: math-inl.h:607
HWY_INLINE Vec< Rebind< int32_t, D > > ToInt32(D, V x)
Definition: math-inl.h:574
HWY_INLINE V Poly(D d, V x)
Definition: math-inl.h:579
HWY_INLINE Vec< Rebind< float, D > > SinSignFromQuadrant(D d, VI32 q)
Definition: math-inl.h:632
HWY_INLINE V CosReduce(D d, V x, VI32 q)
Definition: math-inl.h:590
Definition: math-inl.h:468
HWY_INLINE V ExpReduce(D d, V x, VI32 q)
Definition: math-inl.h:753
HWY_INLINE Vec< Rebind< int32_t, D > > ToInt32(D, V x)
Definition: math-inl.h:721
HWY_INLINE V ExpPoly(D d, V x)
Definition: math-inl.h:726
HWY_INLINE V LoadExpShortRange(D d, V x, VI32 e)
Definition: math-inl.h:747
HWY_INLINE Vec< D > Pow2I(D d, VI32 x)
Definition: math-inl.h:739
Definition: math-inl.h:470
HWY_INLINE V LogPoly(D d, V x)
Definition: math-inl.h:778
HWY_INLINE Vec< Rebind< int32_t, D > > Log2p1NoSubnormal(D, V x)
Definition: math-inl.h:769
Definition: math-inl.h:472