(Version: 2017/4/18) Intel CPU 1 Intel CPU( AMD CPU) 64bit SIMD Inline Assemler Windows Visual C++ Linux gcc 2 FPU SSE2 Intel CPU do

(Version: 2017/4/18) Intel CPU (kashi@waseda.jp) 1 Intel CPU( AMD CPU) 64bit SIMD Inline Assemler Windows Visual C++ Linux gcc 2 FPU SSE2 Intel CPU double 8087 FPU (floating point number processing unit) SIMD (single instruction multiple data) SSE2 (Streaming SIMD Extensions 2) 2 SSE2 128bit double 2 2 C ( ) 3 32bit 64bit 64bit AMD Intel AMD64 x86 CPU 64bit Intel Itanium (IA-64) CPU 64bit AMD Intel64 AMD64 64bit CPU 32bit 64bit OS 64bit OS 32bit 64bit CPU 64bit OS 32bit OS 64bit 32bit OS 64bit OS binary 64bit CPU SSE2 32bit SSE2 CPU FPU SSE2 1

64bit SSE2 SSE2 FPU Visual C++ 64bit Inline Assembler 4 FPU SSE2 4.1 FPU Control Word FPU 16bit R R R IC RC(2) PC(2) R R PM UM OM ZM DM IM R: reserved IC: infinity control RC(2): rounding control (00: near, 01: down, 10: up, 11:chop) PC(2): precision control (00: 24bit, 01: not used, 10: 53bit, 11:64bit) PM: inexact precision mask UM: underflow mask OM: overflow mask ZM: divide by 0 mask DM: denormals mask IM: invalid numbers mask RC(2) 2bit 00: nearest mode 01: down mode 10: up mode + 11: chop mode 0 2

4.2 MXCSR Control/Status Register SSE2 32bit 0(31-16) FZ RC(2) PM UM OM ZM DM IM 0 PE UE OE ZE DE IE 0: 0 FZ: Flush to Zero mode denormal 0 bit5-0: SIMD RC(2) 2bit FPU 5 Inline Assembler 5.1 FPU Control Word unsigned short int mode1 ; // 16 bit unsigned 16bit linux OS gcc asm volatile (" fnstcw %0" : "=m"( mode1 )); mode1 asm volatile (" fldcw %0" : : "m"( mode1 )); mode1 windows Visual C++ _asm fnstcw mode1 mode1 _asm fldcw mode1 mode1 5.2 MXCSR Control/Status Register 32bit linux OS gcc mode2 mode2 windows Visual C++ 3

_asm stmxcsr mode2 mode2 _asm ldmxcsr mode2 mode2 SSE2 Intrinsic Inline Assembler SSE2 gcc, Visual C++ mode2 mode2 # include < emmintrin.h> include 6 2bit Linux 32bit FPU down _asm volatile (" fnstcw %0" : "=m"( mode1 )); mode1 = 0 x0400 ; asm volatile (" fldcw %0" : : "m"( mode1 )); 2bit 0 (~ bit ) and 2bit 0 down or down /* * Only for Little Endian */ void bit_view ( void *p, int size ) int i, j; unsigned char * p2; for (i=size -1; i >=0; i - -) p2 = ( unsigned char *) p + i; for (j =7; j >=0; j - -) if ((* p2 & (1 << j))!= 0) printf ("1"); else printf ("0"); printf ("\n"); int main () unsigned short int mode1 ; // 16 bit unsigned 4

double x = 1.; double y = 10.; double z; // default // nearest asm volatile (" fnstcw %0" : "=m"( mode1 )); mode1 = 0 x0000 ; asm volatile (" fldcw %0" : : "m"( mode1 )); // down asm volatile (" fnstcw %0" : "=m"( mode1 )); mode1 = 0 x0400 ; asm volatile (" fldcw %0" : : "m"( mode1 )); // up asm volatile (" fnstcw %0" : "=m"( mode1 )); mode1 = 0 x0800 ; asm volatile (" fldcw %0" : : "m"( mode1 )); // chop asm volatile (" fnstcw %0" : "=m"( mode1 )); mode1 = 0 x0c00 ; asm volatile (" fldcw %0" : : "m"( mode1 )); 0.10000000000000001 0011111110111001100110011001100110011001100110011001100110011010 0.10000000000000001 0011111110111001100110011001100110011001100110011001100110011010 0.099999999999999992 0011111110111001100110011001100110011001100110011001100110011001 0.10000000000000001 0011111110111001100110011001100110011001100110011001100110011010 0.099999999999999992 0011111110111001100110011001100110011001100110011001100110011001 Linux 64bit 5

/* * Only for Little Endian */ void bit_view ( void *p, int size ) int i, j; unsigned char * p2; for (i=size -1; i >=0; i - -) p2 = ( unsigned char *) p + i; for (j =7; j >=0; j - -) if ((* p2 & (1 << j))!= 0) printf ("1"); else printf ("0"); printf ("\n"); int main () double x = 1.; double y = 10.; double z; // default // nearest mode2 = 0 x00000000 ; // down mode2 = 0 x00002000 ; // up mode2 = 0 x00004000 ; // chop 6

mode2 = 0 x00006000 ; 32bit 2bit bit Windows 32bit Inline Assemler /* * Only for Little Endian */ void bit_view ( void *p, int size ) int i, j; unsigned char * p2; for (i=size -1; i >=0; i - -) p2 = ( unsigned char *) p + i; for (j =7; j >=0; j - -) if ((* p2 & (1 << j))!= 0) printf ("1"); else printf ("0"); printf ("\n"); int main () unsigned short int mode1 ; // 16 bit unsigned double x = 1.; double y = 10.; double z; // default // nearest _asm fnstcw mode1 mode1 = 0 x0000 ; _asm fldcw mode1 // down _asm fnstcw mode1 mode1 = 0 x0400 ; _asm fldcw mode1 7

// up _asm fnstcw mode1 mode1 = 0 x0800 ; _asm fldcw mode1 // chop _asm fldcw mode1 mode1 = 0 x0c00 ; _asm fldcw mode1 Windows 64bit Inline Assemler Intrinsic # include < emmintrin.h> // for _mm_getcsr, _mm_setcsr /* * Only for Little Endian */ void bit_view ( void *p, int size ) int i, j; unsigned char * p2; for (i=size -1; i >=0; i - -) p2 = ( unsigned char *) p + i; for (j =7; j >=0; j - -) if ((* p2 & (1 << j))!= 0) printf ("1"); else printf ("0"); printf ("\n"); int main () double x = 1.; double y = 10.; double z; // default // nearest mode2 = 0 x00000000 ; 8

// down mode2 = 0 x00002000 ; // up mode2 = 0 x00004000 ; // chop mode2 = 0 x00006000 ; 7 C C99 fenv.h include fesetround # include <fenv.h> /* * Only for Little Endian */ void bit_view ( void *p, int size ) int i, j; unsigned char * p2; for (i=size -1; i >=0; i - -) p2 = ( unsigned char *) p + i; for (j =7; j >=0; j - -) if ((* p2 & (1 << j))!= 0) printf ("1"); else printf ("0"); printf ("\n"); int main () 9

double x = 1.; double y = 10.; double z; // default // nearest fesetround ( FE_TONEAREST ); // down fesetround ( FE_DOWNWARD ); // up fesetround ( FE_UPWARD ); // chop fesetround ( FE_TOWARDZERO ); -lm Intel CPU Visual C++ Visual Studio 2012 C99 fenv.h Visual Studio 2013 C99 fenv.h include fesetround 64bit (2016 7 ) Visual Studio 2013 update5 Visual Studio 2015 update3 Visual C++ fesetround Visual C++ float.h include controlfp # include <float.h> /* * Only for Little Endian */ void bit_view ( void *p, int size ) int i, j; unsigned char * p2; for (i=size -1; i >=0; i - -) p2 = ( unsigned char *) p + i; for (j =7; j >=0; j - -) if ((* p2 & (1 << j))!= 0) printf ("1"); else printf ("0"); 10

printf ("\n"); int main () double x = 1.; double y = 10.; double z; // default // nearest _controlfp ( _RC_NEAR, _MCW_RC ); // down _controlfp ( _RC_DOWN, _MCW_RC ); // up _controlfp ( _RC_UP, _MCW_RC ); // chop _controlfp ( _RC_CHOP, _MCW_RC ); controlfp s # include <float.h> /* * Only for Little Endian */ void bit_view ( void *p, int size ) int i, j; unsigned char * p2; for (i=size -1; i >=0; i - -) p2 = ( unsigned char *) p + i; for (j =7; j >=0; j - -) if ((* p2 & (1 << j))!= 0) printf ("1"); else printf ("0"); printf ("\n"); int main () unsigned int cw = 0; 11

double x = 1.; double y = 10.; double z; // default // nearest _controlfp_s (& cw, _RC_NEAR, _MCW_RC ); // down _controlfp_s (& cw, _RC_DOWN, _MCW_RC ); // up _controlfp_s (& cw, _RC_UP, _MCW_RC ); // chop _controlfp_s (& cw, _RC_CHOP, _MCW_RC ); CPU 32/64bit inline 8 OS 32/64bit 32bit FPU SSE2 _MSC_VER Visual C++ Linux (gcc ) (Visual C++ ) _WIN64 64bit _WIN32 32bit Intel CPU (Linux ) i386 32bit x86_64 64bit Intel CPU (Visual C++ ) _M_IX86_FP 2 SSE2 (Linux ) SSE2_MATH SSE2 12

# if defined ( _WIN32 ) defined ( _WIN64 ) // Windows # if defined ( _WIN64 ) // Windows 64 bit # include < emmintrin.h> // for _mm_getcsr, _mm_setcsr void roundnear () void rounddown () mode2 = 0 x00002000 ; void roundup () mode2 = 0 x00004000 ; void roundchop () mode2 = 0 x00006000 ; # elif defined ( _WIN32 ) // Windows 32 bit # if _M_IX86_FP == 2 # include < emmintrin.h> // for _mm_getcsr, _mm_setcsr # endif void roundnear () unsigned short int mode1 ; // 16 bit unsigned _asm fnstcw mode1 _asm fldcw mode1 # if _M_IX86_FP == 2 # endif void rounddown () unsigned short int mode1 ; // 16 bit unsigned 13

_asm fnstcw mode1 mode1 = 0 x0400 ; _asm fldcw mode1 # if _M_IX86_FP == 2 mode2 = 0 x00002000 ; # endif void roundup () unsigned short int mode1 ; // 16 bit unsigned _asm fnstcw mode1 mode1 = 0 x0800 ; _asm fldcw mode1 # if _M_IX86_FP == 2 mode2 = 0 x00004000 ; # endif void roundchop () unsigned short int mode1 ; // 16 bit unsigned _asm fnstcw mode1 mode1 = 0 x0c00 ; _asm fldcw mode1 # if _M_IX86_FP == 2 mode2 = 0 x00006000 ; # endif # else // Windows other CPU # include <float.h> void roundnear () unsigned int cw = 0; _controlfp_s (& cw, _RC_NEAR, _MCW_RC ); void rounddown () unsigned int cw = 0; _controlfp_s (& cw, _RC_DOWN, _MCW_RC ); void roundup () unsigned int cw = 0; _controlfp_s (& cw, _RC_UP, _MCW_RC ); 14

void roundchop () unsigned int cw = 0; _controlfp_s (& cw, _RC_CHOP, _MCW_RC ); # endif # else // Linux, etc #if defined ( x86_64 ) // Linux 64 bit void roundnear () void rounddown () mode2 = 0 x00002000 ; void roundup () mode2 = 0 x00004000 ; void roundchop () mode2 = 0 x00006000 ; # elif defined ( i386 ) // Linux 32 bit void roundnear () unsigned short int mode1 ; // 16 bit unsigned asm volatile (" fnstcw %0" : "=m"( mode1 )); asm volatile (" fldcw %0" : : "m"( mode1 )); #if defined ( SSE2_MATH ) # endif 15

void rounddown () unsigned short int mode1 ; // 16 bit unsigned asm volatile (" fnstcw %0" : "=m"( mode1 )); mode1 = 0 x0400 ; asm volatile (" fldcw %0" : : "m"( mode1 )); #if defined ( SSE2_MATH ) mode2 = 0 x00002000 ; # endif void roundup () unsigned short int mode1 ; // 16 bit unsigned asm volatile (" fnstcw %0" : "=m"( mode1 )); mode1 = 0 x0800 ; asm volatile (" fldcw %0" : : "m"( mode1 )); #if defined ( SSE2_MATH ) mode2 = 0 x00004000 ; # endif void roundchop () unsigned short int mode1 ; // 16 bit unsigned asm volatile (" fnstcw %0" : "=m"( mode1 )); mode1 = 0 x0c00 ; asm volatile (" fldcw %0" : : "m"( mode1 )); #if defined ( SSE2_MATH ) mode2 = 0 x00006000 ; # endif # else // Linux other CPU # include <fenv.h> void roundnear () fesetround ( FE_TONEAREST ); void rounddown () fesetround ( FE_DOWNWARD ); void roundup () fesetround ( FE_UPWARD ); 16

void roundchop () fesetround ( FE_TOWARDZERO ); # endif # endif 9 # include " roundingmode - universal. c" double div_down ( double x, double y) double r; rounddown (); r = x / y; roundnear (); return r; double div_up ( double x, double y) double r; roundup (); r = x / y; roundnear (); return r; int main () double x = 1.; double y = 10.; double z; z = div_down (x, y); z = div_up (x, y); gcc 4.4 -O0 0.099999999999999992 0.10000000000000001 -O1, -O2, -O3 0.10000000000000001 0.10000000000000001 17

( ) # include " roundingmode - universal. c" double div_down ( double x, double y) # pragma STDC FENV_ACCESS ON double r; rounddown (); r = x / y; roundnear (); return r; double div_up ( double x, double y) # pragma STDC FENV_ACCESS ON double r; roundup (); r = x / y; roundnear (); return r; int main () double x = 1.; double y = 10.; double z; z = div_down (x, y); z = div_up (x, y); C99 FENV_ACCESS gcc gcc 4.4, gcc 4.8, gcc 5.3 # include " roundingmode - universal. c" double div_down ( double x, double y) volatile double r, x1 = x, y1 = y; 18

rounddown (); r = x1 / y1; roundnear (); return r; double div_up ( double x, double y) volatile double r, x1 = x, y1 = y; roundup (); r = x1 / y1; roundnear (); return r; int main () double x = 1.; double y = 10.; double z; z = div_down (x, y); z = div_up (x, y); volatile 10 : 10.1 gcc -m32 32bit -m64 64bit 32bit -msse2 SSE2 -mfpmath=sse SSE2 -mfpmath=387 FPU -mfpmath=sse,387 10.2 Visual C++ 64bit OS VS2013(2015) x64 Native Tools cl 64bit 32bit VS2013(2015) x86 Native Tools 32bit /arch:sse2 SSE2 FPU SSE2 11 19

11.1 Visual C++ fesetround Visual Studio 2012 C99 fenv.h Visual Studio 2013 C99 fenv.h include fesetround 64bit (2016 4 ) Visual Studio 2013 update5 Visual Studio 2015 update3 (2017 4 18 : Visual Studio 2017 ) # include <iostream > # include <cfenv > # include <cmath > int check_rounding () volatile double x, y, z; x = 1; y = pow (2., -55); z = x + y; if (z > 1) return 2; // up z = x - y; if ( z == 1) return 0; // nearest z = - x + y; if ( z == -x) return 1; // down return 3; // chop int main () std :: cout << check_rounding () << "\ n"; fesetround ( FE_TONEAREST ); std :: cout << check_rounding () << "\ n"; fesetround ( FE_DOWNWARD ); std :: cout << check_rounding () << "\ n"; fesetround ( FE_UPWARD ); std :: cout << check_rounding () << "\ n"; fesetround ( FE_TOWARDZERO ); std :: cout << check_rounding () << "\ n"; IEEE754 0 0 1 2 3 Visual C++ 64bit 20

0 0 2 1 3 32bit 11.2 Visual C++ sqrt 32bit Visual C++ sqrt FPU cygwin msys2 32bit Visual C++ sqrt matlab2007b 11.3 double rounding main () volatile double a = 5000000000000001.; volatile double b = 0.499755859375; volatile double c; printf (" %.80 g\n", a); printf (" %.80 g\n", b); c = a + b; printf (" %.80 g\n", c); Linux(64bit) -m32 32bit $ cc doubleround. c $./a. out 5000000000000001 0.499755859375 5000000000000001 $ cc - m32 doubleround. c $./a. out 5000000000000001 0.499755859375 5000000000000002 $ 5000000000000001 5000000000000002 double IEEE754 Intel CPU FPU IEEE754 double ( 80bit, 64bit) FPU ( ) double 21

double rounding 2 5000000000000001(10) = 10001110000110111100100110111111000001000000000000001(2) 0.499755859375(10) = 0.011111111111(2) 10001110000110111100100110111111000001000000000000001.011111111111(2) 53 54 64 65 10001110000110111100100110111111000001000000000000001. 01111111111 1(2) 53 54 0 10001110000110111100100110111111000001000000000000001(2) 64 53 65 1 10001110000110111100100110111111000001000000000000001. 10000000000(2) 64 1 10001110000110111100100110111111000001000000000000010(2) 1.49 1 1 2 1 1.49 1.5 2 Intel CPU double rounding FPU FPU 64bit FPU FPU 53bit (FPU Control Word PC 10 ) PC Visual C++ 10 Linux 11 # include <fenv.h> main () 22

volatile double x = -1; volatile double y = 10; volatile double z; fesetround ( FE_UPWARD ); z = -(x / y); printf (" %.20 g\n", z); 0.1 Linux 32bit 0.1 $ cc - m64 doubleround2. c - lm $./a. out 0.099999999999999991674 $ cc - m32 doubleround2. c - lm $./a. out 0.10000000000000000556 double rounding (1) x / y 0.1 ( 0.1 ) (2) 0.1 (1) x / y 64bit 0.1 ( 0.1 )64bit (2) 0.1 64bit (3) 0.1 64bit 53bit 0.1 gcc 64bit long double # include <fenv.h> main () volatile long double x = -1; volatile long double y = 10; volatile long double z1; volatile double z; fesetround ( FE_UPWARD ); z1 = -(x / y); printf (" %.20 Lg\n", z1); z = z1; printf (" %.20 g\n", z); $ cc - m64 doubleround2a. c - lm $./a. out 0.099999999999999999995 0.10000000000000000556 23

$ cc - m32 doubleround2a. c - lm $./a. out 0.099999999999999999995 0.10000000000000000556 11.4 Intel Intel (icc) SSE 0 2 1074 x < 2 1022 0 /Qftz-(Windows) -ftz-(linux, Mac) 12 fesetround windows Visual C++ controlfp 24