64bit SSE2 SSE2 FPU Visual C++ 64bit Inline Assembler 4 FPU SSE2 4.1 FPU Control Word FPU 16bit R R R IC RC(2) PC(2) R R PM UM OM ZM DM IM R: reserved



Similar documents
64bit SSE2 SSE2 FPU Visual C++ 64bit Inline Assembler 4 FPU SSE2 4.1 FPU Control Word FPU 16bit R R R IC RC(2) PC(2) R R PM UM OM ZM DM IM R: reserved

●70974_100_AC009160_KAPヘ<3099>ーシス自動車約款(11.10).indb

FFTSS Library Version 3.0 User's Guide

/* sansu1.c */ #include <stdio.h> main() { int a, b, c; /* a, b, c */ a = 200; b = 1300; /* a 200 */ /* b 200 */ c = a + b; /* a b c */ }

橡Taro9-生徒の活動.PDF

Microsoft Word - C.....u.K...doc

ワードプロセッシングについて


改訂版 :基本的な文字化の原則(Basic Transcription System for Japanese: BTSJ)

1 (bit ) ( ) PC WS CPU IEEE754 standard ( 24bit) ( 53bit)

2

康乘聡子(P105‐121)/康乘聡子 p105‐121

‡Æ‡Ý‡©457_01-12

' % % &! #


76

™ƒŒì„³001†`028.pwd

★分冊3-説明資料PDF用/02-PDF個別


thesis.dvi

ex01.dvi

FS_handbook.indd


ストリーミング SIMD 拡張命令2 (SSE2) を使用した SAXPY/DAXPY


Intel® Compilers Professional Editions

DPD Software Development Products Overview

REALV5_A4…p_Ł\1_4A_OCF

untitled

「都市から地方への人材誘致・移住促進に関する調査」

<91498EE88CA D815B2E786C73>

〔 大 会 役 員 〕

橡本体資料+参考条文.PDF

Lecture on

untitled

インテル(R) Visual Fortran Composer XE 2013 Windows版 入門ガイド


インテル(R) Visual Fortran Composer XE

-34-

C¥×¥í¥°¥é¥ß¥ó¥° ÆþÌç

Compiler Differences on OpenVMS I64

07-二村幸孝・出口大輔.indd

ストリーミング SIMD 拡張命令2 (SSE2) を使用した、倍精度浮動小数点ベクトルの最大/最小要素とそのインデックスの検出

(2 Linux Mozilla [ ] [ ] [ ] [ ] URL 2 qkc, nkc ~/.cshrc (emacs 2 set path=($path /usr/meiji/pub/linux/bin tcsh b

ex01.dvi

23 Fig. 2: hwmodulev2 3. Reconfigurable HPC 3.1 hw/sw hw/sw hw/sw FPGA PC FPGA PC FPGA HPC FPGA FPGA hw/sw hw/sw hw- Module FPGA hwmodule hw/sw FPGA h

untitled

Java

GPU.....

main.dvi

/* do-while */ #include <stdio.h> #include <math.h> int main(void) double val1, val2, arith_mean, geo_mean; printf( \n ); do printf( ); scanf( %lf, &v

Microsoft Word - w_mkl_build_howto.doc

DOPRI5.dvi

270万回再生レポート

main

SystemC 2.0を用いた簡易CPUバスモデルの設計

PC Windows 95, Windows 98, Windows NT, Windows 2000, MS-DOS, UNIX CPU

Battle Ship

44 6 MPI 4 : #LIB=-lmpich -lm 5 : LIB=-lmpi -lm 7 : mpi1: mpi1.c 8 : $(CC) -o mpi1 mpi1.c $(LIB) 9 : 10 : clean: 11 : -$(DEL) mpi1 make mpi1 1 % mpiru

H.264/AVC 2 H.265/HEVC 1 H.265 JCT-VC HM(HEVC Test Model) HM 5 5 SIMD HM 33%

「慰安婦」問題調査報告・1999

(300, 150) 120 getchar() HgBox(x, y, w, h) (x, y), w, h #include <stdio.h> #include <handy.h> int main(void) { int i; double w, h; } HgO

1-18

2. OpenMP OpenMP OpenMP OpenMP #pragma#pragma omp #pragma omp parallel #pragma omp single #pragma omp master #pragma omp for #pragma omp critica

untitled

( CUDA CUDA CUDA CUDA ( NVIDIA CUDA I

main.dvi

Slides: TimeGraph: GPU Scheduling for Real-Time Multi-Tasking Environments

TLS _final

01_OpenMP_osx.indd


(Basic Theory of Information Processing) 1

( ) 1 1: 1 #include <s t d i o. h> 2 #include <GL/ g l u t. h> 3 #include <math. h> 4 #include <s t d l i b. h> 5 #include <time. h>


SQUFOF NTT Shanks SQUFOF SQUFOF Pentium III Pentium 4 SQUFOF 2.03 (Pentium 4 2.0GHz Willamette) N UBASIC 50 / 200 [

Transcription:

(Version: 2013/5/16) Intel CPU (kashi@waseda.jp) 1 Intel CPU( AMD CPU) 64bit SIMD Inline Assemler Windows Visual C++ Linux gcc 2 FPU SSE2 Intel CPU double 8087 FPU (floating point number processing unit) SIMD (single instruction multiple data) SSE2 (Streaming SIMD Extensions 2) 2 SSE2 128bit double 2 2 C ( ) 3 32bit 64bit 64bit AMD Intel AMD64 x86 CPU 64bit Intel Itanium (IA-64) CPU 64bit AMD Intel64 AMD64 64bit CPU 32bit 64bit OS 64bit OS 32bit 64bit CPU 64bit OS 32bit OS 64bit 32bit OS 64bit OS binary 64bit CPU SSE2 32bit SSE2 CPU FPU SSE2 1

64bit SSE2 SSE2 FPU Visual C++ 64bit Inline Assembler 4 FPU SSE2 4.1 FPU Control Word FPU 16bit R R R IC RC(2) PC(2) R R PM UM OM ZM DM IM R: reserved IC: infinity control RC(2): rounding control (00: near, 01: down, 10: up, 11:chop) PC(2): precision control (00: 24bit, 01: not used, 10: 53bit, 11:64bit) PM: inexact precision mask UM: underflow mask OM: overflow mask ZM: divide by 0 mask DM: denormals mask IM: invalid numbers mask RC(2) 2bit 00: nearest mode 01: down mode 10: up mode + 11: chop mode 0 2

4.2 MXCSR Control/Status Register SSE2 32bit 0(31-16) FZ RC(2) PM UM OM ZM DM IM 0 PE UE OE ZE DE IE 0: 0 FZ: Flush to Zero mode denormal 0 bit5-0: SIMD RC(2) 2bit FPU 5 Inline Assembler 5.1 FPU Control Word 16bit linux OS gcc mode1 mode1 windows Visual C++ _asm fnstcw mode1 mode1 _asm fldcw mode1 mode1 5.2 MXCSR Control/Status Register 32bit linux OS gcc mode2 mode2 windows Visual C++ 3

_asm stmxcsr mode2 mode2 _asm ldmxcsr mode2 mode2 SSE2 Intrinsic Inline Assembler SSE2 gcc, Visual C++ mode2 mode2 # include < emmintrin.h> include 6 2bit Linux 32bit FPU down _asm volatile (" fnstcw %0" : "=m"( mode1 )); mode1 = 0 x0400 ; 2bit 0 (~ bit ) and 2bit 0 down or down # include <stdio.h> /* * Only for Little Endian */ void bit_view ( void *p, int size ) int i, j; unsigned char * p2; for (i=size -1; i >=0; i - -) p2 = ( unsigned char *) p + i; for (j =7; j >=0; j - -) if ((* p2 & (1 << j))!= 0) printf ("1"); else printf ("0"); printf ("\n"); int main () 4

double x = 1.; double y = 10.; double z; // default // nearest mode1 = 0 x0000 ; // down mode1 = 0 x0400 ; // up mode1 = 0 x0800 ; // chop mode1 = 0 x0c00 ; 0.10000000000000001 0011111110111001100110011001100110011001100110011001100110011010 0.10000000000000001 0011111110111001100110011001100110011001100110011001100110011010 0.099999999999999992 0011111110111001100110011001100110011001100110011001100110011001 0.10000000000000001 0011111110111001100110011001100110011001100110011001100110011010 0.099999999999999992 0011111110111001100110011001100110011001100110011001100110011001 Linux 64bit 5

# include <stdio.h> /* * Only for Little Endian */ void bit_view ( void *p, int size ) int i, j; unsigned char * p2; for (i=size -1; i >=0; i - -) p2 = ( unsigned char *) p + i; for (j =7; j >=0; j - -) if ((* p2 & (1 << j))!= 0) printf ("1"); else printf ("0"); printf ("\n"); int main () double x = 1.; double y = 10.; double z; // default // nearest mode2 = 0 x00000000 ; // down mode2 = 0 x00002000 ; // up mode2 = 0 x00004000 ; // chop 6

mode2 = 0 x00006000 ; 32bit 2bit bit Windows 32bit Inline Assemler # include <stdio.h> /* * Only for Little Endian */ void bit_view ( void *p, int size ) int i, j; unsigned char * p2; for (i=size -1; i >=0; i - -) p2 = ( unsigned char *) p + i; for (j =7; j >=0; j - -) if ((* p2 & (1 << j))!= 0) printf ("1"); else printf ("0"); printf ("\n"); int main () double x = 1.; double y = 10.; double z; // default // nearest _asm fnstcw mode1 mode1 = 0 x0000 ; _asm fldcw mode1 // down _asm fnstcw mode1 mode1 = 0 x0400 ; _asm fldcw mode1 7

// up _asm fnstcw mode1 mode1 = 0 x0800 ; _asm fldcw mode1 // chop _asm fldcw mode1 mode1 = 0 x0c00 ; _asm fldcw mode1 Windows 64bit Inline Assemler Intrinsic # include <stdio.h> # include < emmintrin.h> // for _mm_getcsr, _mm_setcsr /* * Only for Little Endian */ void bit_view ( void *p, int size ) int i, j; unsigned char * p2; for (i=size -1; i >=0; i - -) p2 = ( unsigned char *) p + i; for (j =7; j >=0; j - -) if ((* p2 & (1 << j))!= 0) printf ("1"); else printf ("0"); printf ("\n"); int main () double x = 1.; double y = 10.; double z; // default // nearest mode2 = 0 x00000000 ; 8

// down mode2 = 0 x00002000 ; // up mode2 = 0 x00004000 ; // chop mode2 = 0 x00006000 ; 7 OS 32/64bit 32bit FPU SSE2 _WIN32 _WIN64 Visual C++ Linux (Visual C++ ) _WIN64 64bit 32bit (Linux ) x86_64 64bit (Visual C++ ) _M_IX86_FP 2 SSE2 (Linux ) SSE2_MATH SSE2 # include <stdio.h> # if defined ( _WIN64 ) _M_IX86_FP == 2 # include < emmintrin.h> // for _mm_getcsr, _mm_setcsr void roundnear () 9

# if defined ( _WIN32 ) defined ( _WIN64 ) // Windows #if! defined ( _WIN64 ) _asm fnstcw mode1 mode1 = 0 x0000 ; _asm fldcw mode1 # if defined ( _WIN64 ) _M_IX86_FP == 2 mode2 = 0 x00000000 ; # else // Linux, etc #if! defined ( x86_64 ) mode1 = 0 x0000 ; #if defined ( x86_64 ) defined ( SSE2_MATH ) mode2 = 0 x00000000 ; void rounddown () # if defined ( _WIN32 ) defined ( _WIN64 ) // Windows #if! defined ( _WIN64 ) _asm fnstcw mode1 mode1 = 0 x0400 ; _asm fldcw mode1 # if defined ( _WIN64 ) _M_IX86_FP == 2 mode2 = 0 x00002000 ; # else // Linux, etc #if! defined ( x86_64 ) mode1 = 0 x0400 ; #if defined ( x86_64 ) defined ( SSE2_MATH ) mode2 = 0 x00002000 ; 10

void roundup () # if defined ( _WIN32 ) defined ( _WIN64 ) // Windows #if! defined ( _WIN64 ) _asm fnstcw mode1 mode1 = 0 x0800 ; _asm fldcw mode1 # if defined ( _WIN64 ) _M_IX86_FP == 2 mode2 = 0 x00004000 ; # else // Linux, etc #if! defined ( x86_64 ) mode1 = 0 x0800 ; #if defined ( x86_64 ) defined ( SSE2_MATH ) mode2 = 0 x00004000 ; void roundchop () # if defined ( _WIN32 ) defined ( _WIN64 ) // Windows #if! defined ( _WIN64 ) _asm fnstcw mode1 mode1 = 0 x0c00 ; _asm fldcw mode1 # if defined ( _WIN64 ) _M_IX86_FP == 2 mode2 = 0 x00006000 ; # else // Linux, etc #if! defined ( x86_64 ) mode1 = 0 x0c00 ; #if defined ( x86_64 ) defined ( SSE2_MATH ) mode2 = 0 x00006000 ; 11

8 : 8.1 gcc -m32 32bit -m64 64bit 32bit -msse2 SSE2 -mfpmath=sse SSE2 -mfpmath=387 FPU \mfpmath=sse,387 8.2 Visual C++ 64bit OS Visual Studio 2005(2008) x64 Win64 cl 64bit 32bit Visual Studio 2005(2008) 32bit /arch:sse2 SSE2 FPU SSE2 12