2
|
|
- あまめ つちかね
- 5 years ago
- Views:
Transcription
1 ( ) 1
2 2
3 3
4 1.CPU, 2.,,,,,, 3. register, register, 4.L1, L2, (L3), (L4) 4
5 register L1 cache L2 cache Main Memory,, L2, L1 CPU L2, L1, CPU 5
6 , 6
7 dgem2vu 7
8 ? Wiedemann algorithm u 0, w 0, s i, s i = u 0 Ai w 0 t 0 = w 0, t 1 = At 0, t 2 = At 1, s 0 = u 0 t 0, s 1 = u 0 t 1, s 2 = u 0 t 2, 8
9 s i = u 0 Ai w 0 s 0 = (u 0 )(w 0) = (u 0 ) (w 0 ) s 1 = (u 0 )(Aw 0) = (u 0 ) (Aw 0 ) s 2 = (u 0 A)(Aw 0) = (A u 0 ) (Aw 0 ) s 3 = (u 0 A)(A 2 w 0 ) = (A u 0 ) (A 2 w 0 ) s 4 = (u 0 A2 )(A 2 w 0 ) = ((A ) 2 u 0 ) (A 2 w 0 ). 2 t 0 = w 0, t 1 = At 0, t 2 = At 1, v 0 = u 0, v 1 = A v 0, v 2 = A v 1, 2, 9
10 s 0 = v0 t 0 s 1 = v0 t 1 s 2 = v1 t 1 s 3 = v1 t 2 s 4 = v2 t 2. Wilkinson, Ax, A y, 10
11 for (j = 0; j < N; j++) ATY_TMP[j]=0; for (j = 0; j < N; j++){ TMP1 = 0; for (k = 0; k < N; k++){ TMP1 = (ULL) A[j][k] * X[k] + TMP1; ATY_TMP[k]=(ULL) A[j][k] * Y[j] + ATY_TMP[k]; } AX[j] = TMP1 % P; } for (j = 0; j < N; j++) ATY[j]=ATY_TMP[j] % P; Wiedemann algorithm, Z/pZ, % P, 1, 2 11
12 Ax, A y,,? Intel Math Kernel Libirary, BLAS LAPACK, BLAS?gem2vu The?gem2vu routines perform two matrix-vector operations defined as y1 := alpha*a*x1 + beta*y1, and y2 := alpha*a *x2 + beta*y2 12
13 two-stage algorithm 13
14 A n n, AV = V D, V V = V V = I n, λ 1 D =..., V = v 1 v n λ n, λ i (1 i n), V (n n) 14
15 A 1., A 3 T (1)Householder (Dongarra, ) (2)Bischof/Wu + 2. T V = V D, V V = V V = I n 3. V V 15
16 Bischof/Wu + n n A, (Bischof/Wu ), 3 T ( ) A 16
17 Bischof/Wu, O(n 3 )., O(Ln 2 ), L,. Bischof/Wu,, CPU. L,,,,, two-stage algorithm 17
18 two-stage algorithm Z/pZ, N N A, =, N N A, C, C Wiedemann algorithm A 18
19 N N, [1,10] N Wiedemann two-stage Hensel sec 0.579sec 0.263sec sec 1.983sec 2.757sec sec 5.891sec sec sec sec sec sec sec sec Intel(R) Core(TM) i7-4850hq 2.30GHz, L4 :128MB, Wiedemann algorithm CPU, Mem 16GB, Fodora 20 19
20 dgem2vu? two-stage algorithm, Wiedemann algorithm,, = 1, two-stage algorithm, Wiedemann algorithm 20
21 Hensel,,, ( ), v 0, Q, A k 1 v 0 A n 2 v 0 Av 0 v 0 c k 1. c 0 = A k v 0,, Hensel (p ) v 0 generic, 21
22 3 (1) (2) 1. AVX2 2. AVX bits 22
23 :two-stage algorithm n n A, 0 0 A 2 2,, 23
24 SIMD 24
25 SIMD(single instruction multiple data) IP x y y α x + y, SIMD (SSE,AVX ) SIMD, 25
26 s = min i, SIMD a i s 0 = a 0, s 1 = a 1, s 2 = a 2, s 3 = a 3 s 0 = min(s 0, a 4 ), s 1 = min(s 1, a 5 ), s 2 = min(s 2, a 6 ), s 3 = min(s 3, a 7 ) s 0 = min(s 0, a 8 ), s 1 = min(s 1, a 9 ), s 2 = min(s 2, a 10 ), s 3 = min(s 3, a 11 ) s = min(s 0, s 1, s 2, s 3 ) 26
27 , s = min i a i SIMD? C tmp=a[0]; for(i=step;i<n;i=i+step){ tmp=a[i] < tmp? A[i]:tmp; },, s = min i a i C, 27
28 FORTRAN Fortran =minval( ( )), SIMD, 28
29 C?, Intel Array Notation = sec reduce min( [ : : ]); Array Notation C
30 GNU minval Fortran gfortran -O3 -mtune=native -march=native -c program min.f program min.o gcc -O3 -mtune=native -march=native -o test test.c program min.o, Fortran C 30
31 2 ( ) 31
32 C89 double A[5][4];, OK, int N=atoi(argv[1]); double A[N][N]; double **A; A=(double **)malloc(sizeof(double *)*N); for(i=0;i<n;i++){ A[i]=(double *)malloc(sizeof(double)*n); }, 32
33 , 2, double *A; A=(double *)malloc(sizeof(double)*n*n);, A[i*N+j],, C99 int N=atoi(argv[1]); double A[N][N];,,, 33
34 , static double A[N][N];,, C99 int N=atoi(argv[1]); double (* restrict A)[N]; A=(double (* restrict)[n])malloc(sizeof(double)*n*n); A, N 1,, A[i][j], 34
35 restrict? Fortran C double precision A(N,N),B(N,N), Fortran, A B,, C, restrict,, 35
36 36
37 X X mod 2 = 1 X mod 3 = 2, X =, 13, 7, 1, 5, 11, 17,, X, 37
38 , X 1, X = 7, 1, 5,, 3 X mod 2 = 1 X mod 3 = 2 X mod 5 = 3,, X = 7,, 38
39 ( ) 2 2 : q, q 1 = q 2 = α 1,,α s c(α 1,, α s ), α 1,,α s c(α 1,, α s ) 2,, q = α 1,,α s c(α 1,, α s )x α 1 1 xα s s Z[x 1,, x s ] 39
40 A = (a i,j ) Z N N Hadamard, det(a) min N i=1 N j=1 a i,j 2, N j=1 N i=1 a i,j 2 A = (a i,j ) Z[x 1,, x s ] N N Goldstein Graham, det(a) 2 min N i=1 N j=1 a i,j 2 1, N j=1 N i=1 a i,j
41 B = a 2b 3c 4d + f = 4ad + 1af 6bc G.&G. = min( , ) < 13.1 A f(x) = det(xi A) f 1 (x) = f(x) mod p 1 f 2 (x) = f(x) mod p 2., f(x) 41
42 Z/pZ 42
43 .1:C GNU p, a, b Z, p < 2 63, 0 a, b < p, a b = (a + b) mod p 0 a 0,, a < p, r = a 0 a 1 a unsigned long long a[ ],r; r=0; for(i=0;i< ;i++) r=(r+a[i]) % p; 43
44 r = a 0 a 1 a = (a 0 + a a ) mod p typedef unsigned int uint128_t attribute ((mode(ti))); unsigned long long a[ ],r; uint128_t t=0; for(i=0;i< ;i++) t+=a[i]; r=t % p; 44
45 .2: Basic Linear Algebra Subprograms BLAS , p 1 2 p 1 2, N,, N p p 2 N + 1 p, BLAS 45
46 .3: SIMD 0 < p, N + 1 p Z/pZ, SIMD typedef unsigned int UI; typedef unsigned long long ULL; UI a[n],b[n],r,p; ULL t=0; for(i=0;i<n;i++) t+=(ull)a[i]*b[i]; r=t % p 46
47 .4: c i = a i mod p 0 c i < p, CPU, c i, 0 c i < 2p, 47
48 M = p ( ), M,, 128bits bits 64bits
49 (2) 1.0, M = (2 64 1)/p 1.0/p 2 64 p, M (a i M) 64bits, a i /p,, 1 (, ) c i = a i p (a i M 64bits) 49
50 64bits 64bits, xxx, asm ("mulq %3":"=a"(xxx),"=d"(r):"a"(a1),"g"(a2)) 50
51 Tropical Determinant 51
52 A:n A = (a ij ), det A det A = A = σ S n sgn(σ)a 1σ(1) a 2σ(2) a nσ(n) Permanent A:n A = (a ij ), per A per A = σ S n a 1σ(1) a 2σ(2) a nσ(n) 52
53 ultradiscretization, a + b max(a, b) a b a + b a/b a b ultradiscrete permanent A:n A = (a ij ), udper A udper A = max σ S n a 1σ(1) + a 2σ(2) + + a nσ(n) ultradiscrete permanent, Tropical Determinant 53
54 Tropical Determinant Linear Assignment Problem Tomizawa, N. : On some techniques useful for the solution of transportation problems. Networks 1, (1971). Jonker-Volgenant algorithm(lapjv),
55 Tropical Determinant A = 2x x 3 + 6x x x x 3, B = , B Tropical Determinant, A 55
56 Newton 56
57 Newton 1 n f(x) Newton f(x) = f[x 0 ] + (x x 0 )f[x 0, x 1 ] (x x 0 ) (x x n )f[x 0, x 1, x 2,..., x n ] f[x 0, x 1, x 2,..., x n ], 1, 2, f[x 0, x 1 ] = f(x 1) f(x 0 ) x 1 x 0, f[x 0, x 1, x 2 ] = f[x 0, x 2 ] f[x 0, x 1 ] x 2 x 1, 57
58 , n. f[x 0, x 1,..., x n ] = f[x 0, x 1,..., x n 2, x n ] f[x 0, x 1,..., x n 2, x n 1 ] x n x n 1, x n x n 1,, Z/pZ,, 1, 58
59 Newton (1) (x 0, f(x 0 )), (x 1, f(x 1 )),, (x N, f(x N )), f(x) = f 0 + (x x 0 )f 1 + (x x 0 )(x x 1 )f (x x 0 )(x x 1 ) (x x N )f N 59
60 f(x 0 ) = f 0, f(x 1 ) = f 0 + (x 1 x 0 )f 1, f 1 = f(x 1) f 0 x 1 x 0 f(x 2 ) = f 0 + (x 2 x 0 )f 1 + (x 2 x 0 )(x 2 x 1 )f 2, f 2 = f(x 2) (f 0 + (x 2 x 0 )f 1 ) (x 2 x 0 )(x 2 x 1 ) f 0 + (x 2 x 0 )f 1, 60
61 Newton (2) f(x j ) = f 0 + (x j x 0 )f 1 + (x j x 0 )(x j x 1 )f (x j x 0 )(x j x 1 ) (x j x N )f N (x j x 0 ), (x j x 0 )(x j x 1 ),, (x j x 0 )(x j x 1 ) (x j x N ),, f(x j ) = 1 f 0 + { (x j x 0 ) } f 1 + { (xj x 0 )(x j x 1 ) } f { (xj x 0 )(x j x 1 ) (x j x N ) } f N,, SIMD 61
62 Hyper-Threading Technology 62
63 1.x1=(ULL)a[i]*(ULL)b[i]+(ULL)c[i] 2. asm ("mulq %3":"=a"(xxx),"=d"(x2):"a"(x1),"g"(INV_CMX)) 3.z[i]=(UI)x1-(UI)x2*(UI)CM, z[i] = mod(a[i] b[i] + c[i], p) (ULL=unsigned long long,ui=unsigned int), i, , CPU ( CPU ) Hyper-Threading Technology 63
64 Intel Hyper-Threading Technology(HTT) Intel web page, Hyper-Threading Technology. ( HT ) 1 HT 64
65 E6(a)=a^27+12*p2*a^25+60*p2^2*a^23-48*p1*a^22+(168*p2^3+96*q2)*a^21-336*p2*p1*a^20+(294*p2^4+528*q2*p2+480*p0)*a^19+(-1008*p2^2*p1-1344*q1)*a^18 +(144*p1^2+336*p2^5+1152*q2*p2^2+2304*p0*p2)*a^17 +((-1680*p2^3-768*q2)*p1-5568*q1*p2)*a^16 +(608*p2*p1^2+252*p2^6+1200*q2*p2^3+4768*p0*p2^ *q0-1248*q2^2)*a^15 +((-1680*p2^4-2688*q2*p2+2304*p0)*p1-8832*q1*p2^2)*a^14 +(976*p2^2*p1^2+3264*q1*p1+120*p2^7+480*q2*p2^4+5696*p0*p2^3+ (43776*q0-4800*q2^2)*p *q2*p0)*a^13 +(832*p1^3+(-1008*p2^5-3072*q2*p2^2+5888*p0*p2)*p1-6528*q1*p2^ *q2*q1)*a^12 +((704*p2^3+4224*q2)*p1^2+2688*q1*p2*p1+33*p2^8-144*q2*p2^5+4384*p0*p2^4 +(41472*q0-6720*q2^2)*p2^ *q2*p0*p *p0^2)*a^11 +(2560*p2*p1^3+(-336*p2^6-768*q2*p2^3+3584*p0*p2^ *q0+8448*q2^2)*p1-2112*q1*p2^ *q2*q1*p *p0*q1)*a^10 +((176*p2^4+8960*q2*p *p0)*p1^2-5504*q1*p2^2*p1+4*p2^9-192*q2*p2^ *p0*p2^5+(22528*q0-3840*q2^2)*p2^ *q2*p0*p2^ *p0^2*p *q2*q *q1^2+5120*q2^3)*a^9 65
66 +(2688*p2^2*p1^3+4608*q1*p1^2+(-48*p2^7+768*q2*p2^4-1536*p0*p2^3 +(82944*q *q2^2)*p *q2*p0)*p1-192*q1*p2^ *q2*q1*p2^ *p0*q1*p2)*a^8 +(-2560*p1^4+(-32*p2^5+5376*q2*p2^ *p0*p2)*p1^2+(-6144*q1*p2^ *q2*q1)*p1-48*q2*p2^7+608*p0*p2^6+(9600*q0-480*q2^2)*p2^ *q2*p0*p2^ *p0^2*p2^2+(156672*q2*q *q1^2+9984*q2^3)*p *p0*q *q2^2*p0)*a^7 +((1024*p2^ *q2)*p1^ *q1*p2*p1^2+(384*q2*p2^5-1792*p0*p2^4 +(21504*q0+6912*q2^2)*p2^ *q2*p0*p *p0^2)*p1+1536*q2*q1*p2^ *p0*q1*p2^ *q1*q *q2^2*q1)*a^6 +(-1536*p2*p1^4+(-16*p2^6+768*q2*p2^3-4608*p0*p2^ *q *q2^2)*p1^2 +(-1344*q1*p2^ *q2*q1*p2-9216*p0*q1)*p1+64*p0*p2^7 +(2304*q0+192*q2^2)*p2^5-3072*p0^2*p2^3+(55296*q2*q *q1^ *q2^3)*p2^2+( *p0*q *q2^2*p0)*p *q2*p0^2)*a^5 +((64*p2^4-4096*q2*p2+8192*p0)*p1^3-512*q1*p2^2*p1^2+(-256*p0*p2^5+ (3072*q0-768*q2^2)*p2^3-8192*q2*p0*p2^ *p0^2*p *q2*q *q1^ *q2^3)*p1-1024*p0*q1*p2^3+(-36864*q1*q0-3072*q2^2*q1)*p *q2*p0*q1)*a^4 +(256*p2^2*p1^ *q1*p1^3+(128*q2*p2^4-1024*p0*p2^3+(-6144*q0
67 -2560*q2^2)*p2+8192*q2*p0)*p1^2+(-128*q1*p2^5+2048*q2*q1*p2^ *p0*q1*p2)*p1+256*q0*p2^6-256*q2*p0*p2^5+256*p0^2*p2^4+(9216*q2*q0-2560*q1^2-256*q2^3)*p2^3+(-18432*p0*q0-7680*q2^2*p0)*p2^ *q2*p0^2*p *q0^ *q2^2*q *q2*q1^ *p0^3-6912*q2^4)*a^3 +(-1024*p1^5+4096*p0*p2*p1^ *q2*q1*p1^ *q1^2*p2*p1)*a^2 +(-2048*q2*p1^4+2048*q1*p2*p1^3+((-3072*q0-256*q2^2)*p2^2+4096*q2*p0*p2-4096*p0^2)*p1^2+(512*q2*q1*p2^3-1024*p0*q1*p2^ *q1*q *q2^2*q1)*p1-256*q1^2*p2^4-6144*q2*q1^2*p *p0*q1^2)*a +(4096*q0-1024*q2^2)*p1^3+(2048*q2*q1*p2-4096*p0*q1)*p1^2-1024*q1^2*p2^2*p1-4096*q1^3 E6(a)
68 E6 k (a) = E6(a) mod a k+1,, CPU:Intel Core i7 980X(6 Core), Mem:24G, OS:Fedora 13 GNU GCC compiler Option:-O3 -mtune=native -march=native -fopenmp Kimura Parallel Kimura Parallel k Kimura Serial without HTT with HTT 7 5m46.000s 1m13.400s s 66
69 Intel C++ compiler Option:-fast -openmp Kimura Parallel Kimura Parallel k Kimura Serial without HTT with HTT 7 6m11.804s 1m11.837s s 6 Core CPU, 6 (super-linear) 67
70 E6(a) CPU:Intel Core i7 980X(6Core) Mem:24G Compiler:GCC Option:-O3 -mtune=native -march=native -fopenmp : (txt :2.5G) Serial: 10913m45.857s Parallel: 1773m28.272s Speed Up: 6.15 superlinear 68
untitled
A = QΛQ T A n n Λ Q A = XΛX 1 A n n Λ X GPGPU A 3 T Q T AQ = T (Q: ) T u i = λ i u i T {λ i } {u i } QR MR 3 v i = Q u i A {v i } A n = 9000 Quad Core Xeon 2 LAPACK (4/3) n 3 O(n 2 ) O(n 3 ) A {v i }
More informationuntitled
A = QΛQ T A n n Λ Q A = XΛX 1 A n n Λ X GPGPU A 3 T Q T AQ = T (Q: ) T u i = λ i u i T {λ i } {u i } QR MR 3 v i = Q u i A {v i } A n = 9000 Quad Core Xeon 2 LAPACK (4/3) n 3 O(n 2 ) O(n 3 ) A {v i }
More informationCPU Levels in the memory hierarchy Level 1 Level 2... Increasing distance from the CPU in access time Level n Size of the memory at each level 1: 2.2
FFT 1 Fourier fast Fourier transform FFT FFT FFT 1 FFT FFT 2 Fourier 2.1 Fourier FFT Fourier discrete Fourier transform DFT DFT n 1 y k = j=0 x j ω jk n, 0 k n 1 (1) x j y k ω n = e 2πi/n i = 1 (1) n DFT
More information(Basic Theory of Information Processing) 1
(Basic Theory of Information Processing) 1 10 (p.178) Java a[0] = 1; 1 a[4] = 7; i = 2; j = 8; a[i] = j; b[0][0] = 1; 2 b[2][3] = 10; b[i][j] = a[2] * 3; x = a[2]; a[2] = b[i][3] * x; 2 public class Array0
More informationインテル(R) Visual Fortran Composer XE
Visual Fortran Composer XE 1. 2. 3. 4. 5. Visual Studio 6. Visual Studio 7. 8. Compaq Visual Fortran 9. Visual Studio 10. 2 https://registrationcenter.intel.com/regcenter/ w_fcompxe_all_jp_2013_sp1.1.139.exe
More informationII 2 3.,, A(B + C) = AB + AC, (A + B)C = AC + BC. 4. m m A, m m B,, m m B, AB = BA, A,, I. 5. m m A, m n B, AB = B, A I E, 4 4 I, J, K
II. () 7 F 7 = { 0,, 2, 3, 4, 5, 6 }., F 7 a, b F 7, a b, F 7,. (a) a, b,,. (b) 7., 4 5 = 20 = 2 7 + 6, 4 5 = 6 F 7., F 7,., 0 a F 7, ab = F 7 b F 7. (2) 7, 6 F 6 = { 0,, 2, 3, 4, 5 },,., F 6., 0 0 a F
More informationad bc A A A = ad bc ( d ) b c a n A n A n A A det A A ( ) a b A = c d det A = ad bc σ {,,,, n} {,,, } {,,, } {,,, } ( ) σ = σ() = σ() = n sign σ sign(
I n n A AX = I, YA = I () n XY A () X = IX = (YA)X = Y(AX) = YI = Y X Y () XY A A AB AB BA (AB)(B A ) = A(BB )A = AA = I (BA)(A B ) = B(AA )B = BB = I (AB) = B A (BA) = A B A B A = B = 5 5 A B AB BA A
More informationMBLAS¤ÈMLAPACK; ¿ÇÜĹÀºÅÙÈǤÎBLAS/LAPACK¤ÎºîÀ®
MBLAS MLAPACK; BLAS/LAPACK maho@riken.jp February 23, 2009 MPACK(MBLAS/MLAPACK) ( ) (2007 ) ( ) http://accc.riken.jp/maho/ BLAS/LAPACK http://mplapack.sourceforge.net/ BLAS (Basic Linear Algebra Subprograms)
More informationx, y x 3 y xy 3 x 2 y + xy 2 x 3 + y 3 = x 3 y xy 3 x 2 y + xy 2 x 3 + y 3 = 15 xy (x y) (x + y) xy (x y) (x y) ( x 2 + xy + y 2) = 15 (x y)
x, y x 3 y xy 3 x 2 y + xy 2 x 3 + y 3 = 15 1 1977 x 3 y xy 3 x 2 y + xy 2 x 3 + y 3 = 15 xy (x y) (x + y) xy (x y) (x y) ( x 2 + xy + y 2) = 15 (x y) ( x 2 y + xy 2 x 2 2xy y 2) = 15 (x y) (x + y) (xy
More informationSO(2)
TOP URL http://amonphys.web.fc2.com/ 1 12 3 12.1.................................. 3 12.2.......................... 4 12.3............................. 5 12.4 SO(2).................................. 6
More informationLINEAR ALGEBRA I Hiroshi SUZUKI Department of Mathematics International Christian University
LINEAR ALGEBRA I Hiroshi SUZUKI Department of Mathematics International Christian University 2002 2 2 2 2 22 2 3 3 3 3 3 4 4 5 5 6 6 7 7 8 8 9 Cramer 9 0 0 E-mail:hsuzuki@icuacjp 0 3x + y + 2z 4 x + y
More informationMicrosoft Word - Sample_CQS-Report_English_backslant.doc
***** Corporation ANSI C compiler test system System test report 2005/11/16 Japan Novel Corporation *****V43/NQP-DS-501-1 Contents Contents......2 1. Evaluated compiler......3 1.1. smp-compiler compiler...3
More informationインテル(R) Visual Fortran Composer XE 2013 Windows版 入門ガイド
Visual Fortran Composer XE 2013 Windows* エクセルソフト株式会社 www.xlsoft.com Rev. 1.1 (2012/12/10) Copyright 1998-2013 XLsoft Corporation. All Rights Reserved. 1 / 53 ... 3... 4... 4... 5 Visual Studio... 9...
More information1 n A a 11 a 1n A =.. a m1 a mn Ax = λx (1) x n λ (eigenvalue problem) x = 0 ( x 0 ) λ A ( ) λ Ax = λx x Ax = λx y T A = λy T x Ax = λx cx ( 1) 1.1 Th
1 n A a 11 a 1n A = a m1 a mn Ax = λx (1) x n λ (eigenvalue problem) x = ( x ) λ A ( ) λ Ax = λx x Ax = λx y T A = λy T x Ax = λx cx ( 1) 11 Th9-1 Ax = λx λe n A = λ a 11 a 12 a 1n a 21 λ a 22 a n1 a n2
More information2001 年度 『数学基礎 IV』 講義録
4 A 95 96 4 1 n {1, 2,,n} n n σ ( ) 1 2 n σ(1) σ(2) σ(n) σ σ 2 1 n 1 2 {1, 2,,n} n n! n S n σ, τ S n {1, 2,,n} τ σ {1, 2,,n} n τ σ σ, τ τσ σ n σ 1 n σ 1 ( σ σ ) 1 σ = σσ 1 = ι 1 2 n ι 1 2 n 4.1. 4 σ =
More information1 (bit ) ( ) PC WS CPU IEEE754 standard ( 24bit) ( 53bit)
GNU MP BNCpack tkouya@cs.sist.ac.jp 2002 9 20 ( ) Linux Conference 2002 1 1 (bit ) ( ) PC WS CPU IEEE754 standard ( 24bit) ( 53bit) 10 2 2 3 4 5768:9:; = %? @BADCEGFH-I:JLKNMNOQP R )TSVU!" # %$ & " #
More information01_OpenMP_osx.indd
OpenMP* / 1 1... 2 2... 3 3... 5 4... 7 5... 9 5.1... 9 5.2 OpenMP* API... 13 6... 17 7... 19 / 4 1 2 C/C++ OpenMP* 3 Fortran OpenMP* 4 PC 1 1 9.0 Linux* Windows* Xeon Itanium OS 1 2 2 WEB OS OS OS 1 OS
More informationnumb.dvi
11 Poisson kanenko@mbkniftycom alexeikanenko@docomonejp http://wwwkanenkocom/ , u = f, ( u = u+f u t, u = f t ) 1 D R 2 L 2 (D) := {f(x,y) f(x,y) 2 dxdy < )} D D f,g L 2 (D) (f,g) := f(x,y)g(x,y)dxdy (L
More information1 return main() { main main C 1 戻り値の型 関数名 引数 関数ブロックをあらわす中括弧 main() 関数の定義 int main(void){ printf("hello World!!\n"); return 0; 戻り値 1: main() 2.2 C main
C 2007 5 29 C 1 11 2 2.1 main() 1 FORTRAN C main() main main() main() 1 return 1 1 return main() { main main C 1 戻り値の型 関数名 引数 関数ブロックをあらわす中括弧 main() 関数の定義 int main(void){ printf("hello World!!\n"); return
More informationD 24 D D D
5 Paper I.R. 2001 5 Paper HP Paper 5 3 5.1................................................... 3 5.2.................................................... 4 5.3.......................................... 6
More informationセアラの暗号
1 Cayley-Purser 1 Sarah Flannery 16 1 [1] [1] [1]314 www.cayley-purser.ie http://cryptome.org/flannery-cp.htm [2] Cryptography: An Investigation of a New Algorithm vs. the RSA(1999 RSA 1999 9 11 2 (17
More information, = = 7 6 = 42, =
http://www.ss.u-tokai.ac.jp/~mahoro/2016autumn/alg_intro/ 1 1 2016.9.26, http://www.ss.u-tokai.ac.jp/~mahoro/2016autumn/alg_intro/ 1.1 1 214 132 = 28258 2 + 1 + 4 1 + 3 + 2 = 7 6 = 42, 4 + 2 = 6 2 + 8
More information() x + y + y + x dy dx = 0 () dy + xy = x dx y + x y ( 5) ( s55906) 0.7. (). 5 (). ( 6) ( s6590) 0.8 m n. 0.9 n n A. ( 6) ( s6590) f A (λ) = det(a λi)
0. A A = 4 IC () det A () A () x + y + z = x y z X Y Z = A x y z ( 5) ( s5590) 0. a + b + c b c () a a + b + c c a b a + b + c 0 a b c () a 0 c b b c 0 a c b a 0 0. A A = 7 5 4 5 0 ( 5) ( s5590) () A ()
More information連載講座 : 高生産並列言語を使いこなす (5) 分子動力学シミュレーション 田浦健次朗 東京大学大学院情報理工学系研究科, 情報基盤センター 目次 1 問題の定義 17 2 逐次プログラム 分子 ( 粒子 ) セル 系の状態 ステップ 18
連載講座 : 高生産並列言語を使いこなす (5) 分子動力学シミュレーション 田浦健次朗 東京大学大学院情報理工学系研究科, 情報基盤センター 目次 1 問題の定義 17 2 逐次プログラム 17 2.1 分子 ( 粒子 ) 17 2.2 セル 17 2.3 系の状態 18 2.4 1ステップ 18 2.5 力の計算 19 2.6 速度と位置の更新 20 2.7 セル間の分子の移動 21 3 OpenMP
More informationストリーミング SIMD 拡張命令2 (SSE2) を使用した SAXPY/DAXPY
SIMD 2(SSE2) SAXPY/DAXPY 2.0 2000 7 : 248600J-001 01/12/06 1 305-8603 115 Fax: 0120-47-8832 * Copyright Intel Corporation 1999, 2000 01/12/06 2 1...5 2 SAXPY DAXPY...5 2.1 SAXPY DAXPY...6 2.1.1 SIMD C++...6
More information211 年ハイパフォーマンスコンピューティングと計算科学シンポジウム Computing Symposium 211 HPCS /1/18 a a 1 a 2 a 3 a a GPU Graphics Processing Unit GPU CPU GPU GPGPU G
211 年ハイパフォーマンスコンピューティングと計算科学シンポジウム Computing Symposium 211 HPCS211 211/1/18 GPU 4 8 BLAS 4 8 BLAS Basic Linear Algebra Subprograms GPU Graphics Processing Unit 4 8 double 2 4 double-double DD 4 4 8 quad-double
More informationRaVioli SIMD
RaVioli SIMD 17 17115074 i RaVioli SIMD PC PC PC PC CPU RaVioli RaVioli CPU RaVioli CPU SIMD RaVioli RaVioli SIMD RaVioli SIMD RaVioli SIMD 1 1 2 RaVioli 2 2.1 RaVioli.......................................
More information6 2 2 x y x y t P P = P t P = I P P P ( ) ( ) ,, ( ) ( ) cos θ sin θ cos θ sin θ, sin θ cos θ sin θ cos θ y x θ x θ P
6 x x 6.1 t P P = P t P = I P P P 1 0 1 0,, 0 1 0 1 cos θ sin θ cos θ sin θ, sin θ cos θ sin θ cos θ x θ x θ P x P x, P ) = t P x)p ) = t x t P P ) = t x = x, ) 6.1) x = Figure 6.1 Px = x, P=, θ = θ P
More informationA µ : A A A µ(x, y) x y (x y) z = x (y z) A x, y, z x y = y x A x, y A e x e = e x = x A x e A e x A xy = yx = e y x x x y y = x A (1)
7 2 2.1 A µ : A A A µ(x, y) x y (x y) z = x (y z) A x, y, z x y = y x A x, y A e x e = e x = x A x e A e x A xy = yx = e y x x x y y = x 1 2.1.1 A (1) A = R x y = xy + x + y (2) A = N x y = x y (3) A =
More information1 [1, 2, 3, 4, 5, 8, 9, 10, 12, 15] The Boston Public Schools system, BPS (Deferred Acceptance system, DA) (Top Trading Cycles system, TTC) cf. [13] [
Vol.2, No.x, April 2015, pp.xx-xx ISSN xxxx-xxxx 2015 4 30 2015 5 25 253-8550 1100 Tel 0467-53-2111( ) Fax 0467-54-3734 http://www.bunkyo.ac.jp/faculty/business/ 1 [1, 2, 3, 4, 5, 8, 9, 10, 12, 15] The
More informationSecond-semi.PDF
PC 2000 2 18 2 HPC Agenda PC Linux OS UNIX OS Linux Linux OS HPC 1 1CPU CPU Beowulf PC (PC) PC CPU(Pentium ) Beowulf: NASA Tomas Sterling Donald Becker 2 (PC ) Beowulf PC!! Linux Cluster (1) Level 1:
More information20 9 19 1 3 11 1 3 111 3 112 1 4 12 6 121 6 122 7 13 7 131 8 132 10 133 10 134 12 14 13 141 13 142 13 143 15 144 16 145 17 15 19 151 1 19 152 20 2 21 21 21 211 21 212 1 23 213 1 23 214 25 215 31 22 33
More information[1] [2] [3] (RTT) 2. Android OS Android OS Google OS 69.7% [4] 1 Android Linux [5] Linux OS Android Runtime Dalvik Dalvik UI Application(Home,T
LAN Android Transmission-Control Middleware on multiple Android Terminals in a WLAN Environment with consideration of Round Trip Time Ai HAYAKAWA, Saneyasu YAMAGUCHI, and Masato OGUCHI Ochanomizu University
More information.5 z = a + b + c n.6 = a sin t y = b cos t dy d a e e b e + e c e e e + e 3 s36 3 a + y = a, b > b 3 s363.7 y = + 3 y = + 3 s364.8 cos a 3 s365.9 y =,
[ ] IC. r, θ r, θ π, y y = 3 3 = r cos θ r sin θ D D = {, y ; y }, y D r, θ ep y yddy D D 9 s96. d y dt + 3dy + y = cos t dt t = y = e π + e π +. t = π y =.9 s6.3 d y d + dy d + y = y =, dy d = 3 a, b
More informationJanuary 27, 2015
e-mail : kigami@i.kyoto-u.ac.jp January 27, 205 Contents 2........................ 2.2....................... 3.3....................... 6.4......................... 2 6 2........................... 6
More information4 倍精度基本線形代数ルーチン群 QPBLAS の紹介 [index] 1. Introduction 2. Double-double algorithm 3. QPBLAS 4. QPBLAS-GPU 5. Summary 佐々成正 1, 山田進 1, 町田昌彦 1, 今村俊幸 2, 奥田洋司
4 倍精度基本線形代数ルーチン群 QPBLAS の紹介 [index] 1. Introduction 2. Double-double algorithm 3. QPBLAS 4. QPBLAS-GPU 5. Summary 佐々成正 1, 山田進 1, 町田昌彦 1, 今村俊幸 2, 奥田洋司 3 1 1 日本原子力研究開発機構システム計算科学センター 2 理科学研究所計算科学研究機構 3 東京大学新領域創成科学研究科
More informationJacobson Prime Avoidance
2016 2017 2 22 1 1 3 2 4 2.1 Jacobson................. 4 2.2.................... 5 3 6 3.1 Prime Avoidance....................... 7 3.2............................. 8 3.3..............................
More information20 4 20 i 1 1 1.1............................ 1 1.2............................ 4 2 11 2.1................... 11 2.2......................... 11 2.3....................... 19 3 25 3.1.............................
More informationII Time-stamp: <05/09/30 17:14:06 waki> ii
II waki@cc.hirosaki-u.ac.jp 18 1 30 II Time-stamp: ii 1 1 1.1.................................................. 1 1.2................................................... 3 1.3..................................................
More informationA A = a 41 a 42 a 43 a 44 A (7) 1 (3) A = M 12 = = a 41 (8) a 41 a 43 a 44 (3) n n A, B a i AB = A B ii aa
1 2 21 2 2 [ ] a 11 a 12 A = a 21 a 22 (1) A = a 11 a 22 a 12 a 21 (2) 3 3 n n A A = n ( 1) i+j a ij M ij i =1 n (3) j=1 M ij A i j (n 1) (n 1) 2-1 3 3 A A = a 11 a 12 a 13 a 21 a 22 a 23 a 31 a 32 a 33
More information/* sansu1.c */ #include <stdio.h> main() { int a, b, c; /* a, b, c */ a = 200; b = 1300; /* a 200 */ /* b 200 */ c = a + b; /* a b c */ }
C 2: A Pedestrian Approach to the C Programming Language 2 2-1 2.1........................... 2-1 2.1.1.............................. 2-1 2.1.2......... 2-4 2.1.3..................................... 2-6
More information76 3 B m n AB P m n AP : PB = m : n A P B P AB m : n m < n n AB Q Q m A B AQ : QB = m : n (m n) m > n m n Q AB m : n A B Q P AB Q AB 3. 3 A(1) B(3) C(
3 3.1 3.1.1 1 1 A P a 1 a P a P P(a) a P(a) a P(a) a a 0 a = a a < 0 a = a a < b a > b A a b a B b B b a b A a 3.1 A() B(5) AB = 5 = 3 A(3) B(1) AB = 3 1 = A(a) B(b) AB AB = b a 3.1 (1) A(6) B(1) () A(
More information2012年度HPCサマーセミナー_多田野.pptx
! CCS HPC! I " tadano@cs.tsukuba.ac.jp" " 1 " " " " " " " 2 3 " " Ax = b" " " 4 Ax = b" A = a 11 a 12... a 1n a 21 a 22... a 2n...... a n1 a n2... a nn, x = x 1 x 2. x n, b = b 1 b 2. b n " " 5 Gauss LU
More informationQD library! Feature! Easy to use high precision! Easy to understand the structure of arithmetic! 2 type high precision arithmetic! Double-Double precision (pseudo quadruple precision)! Quad-Double precision
More information,2,4
2005 12 2006 1,2,4 iii 1 Hilbert 14 1 1.............................................. 1 2............................................... 2 3............................................... 3 4.............................................
More informationUntitled
VASP 2703 2006 3 VASP 100 PC 3,4 VASP VASP VASP FFT. (LAPACK,BLAS,FFT), CPU VASP. 1 C LAPACK,BLAS VASP VASP VASP VASP bench.hg VASP CPU CPU CPU northwood LAPACK lmkl lapack64, BLAS lmkl p4 LA- PACK liblapack,
More information.1 A cos 2π 3 sin 2π 3 sin 2π 3 cos 2π 3 T ra 2 deta T ra 2 deta T ra 2 deta a + d 2 ad bc a 2 + d 2 + ad + bc A 3 a b a 2 + bc ba + d c d ca + d bc +
.1 n.1 1 A T ra A A a b c d A 2 a b a b c d c d a 2 + bc ab + bd ac + cd bc + d 2 a 2 + bc ba + d ca + d bc + d 2 A a + d b c T ra A T ra A 2 A 2 A A 2 A 2 A n A A n cos 2π sin 2π n n A k sin 2π cos 2π
More information5 / / $\mathrm{p}$ $\mathrm{r}$ 8 7 double 4 22 / [10][14][15] 23 P double 1 $\mathrm{m}\mathrm{p}\mathrm{f}\mathrm{u}\mathrm{n}/\mathrm{a
double $\mathrm{j}\mathrm{s}\mathrm{t}$ $\mathrm{q}$ 1505 2006 1-13 1 / (Kinji Kimura) Japan Science and Technology Agency Faculty of Science Rikkyo University 1 / / 6 1 2 3 4 5 Kronecker 6 2 21 $\mathrm{p}$
More information21 20 20413525 22 2 4 i 1 1 2 4 2.1.................................. 4 2.1.1 LinuxOS....................... 7 2.1.2....................... 10 2.2........................ 15 3 17 3.1.................................
More information1 8, : 8.1 1, 2 z = ax + by + c ax by + z c = a b +1 x y z c = 0, (0, 0, c), n = ( a, b, 1). f = n i=1 a ii x 2 i + i<j 2a ij x i x j = ( x, A x), f =
1 8, : 8.1 1, z = ax + by + c ax by + z c = a b +1 x y z c = 0, (0, 0, c), n = ( a, b, 1). f = a ii x i + i
More information連載講座 : 高生産並列言語を使いこなす (3) ゲーム木探索問題 田浦健次朗 東京大学大学院情報理工学系研究科, 情報基盤センター 目次 1 概要 17 2 ゲーム木探索 必勝 必敗 引き分け 盤面の評価値 αβ 法 指し手の順序付け (mo
連載講座 : 高生産並列言語を使いこなす (3) ゲーム木探索問題 田浦健次朗 東京大学大学院情報理工学系研究科, 情報基盤センター 目次 1 概要 17 2 ゲーム木探索 17 2.1 必勝 必敗 引き分け 17 2.2 盤面の評価値 18 2.3 αβ 法 19 2.4 指し手の順序付け (move ordering) 20 3 Andersson の詰み探索およびその並列化 21 3.1 Andersson
More information( ) a C n ( R n ) R a R C n. a C n (or R n ) a 0 2. α C( R ) a C n αa = α a 3. a, b C n a + b a + b ( ) p 8..2 (p ) a = [a a n ] T C n p n a
9 8 m n mn N.J.Nigham, Accuracy and Stability of Numerical Algorithms 2nd ed., (SIAM) x x = x2 + y 2 = x + y = max( x, y ) x y x () (norm) (condition number) 8. R C a, b C a b 0 a, b a = a 0 0 0 n C n
More informationn ( (
1 2 27 6 1 1 m-mat@mathscihiroshima-uacjp 2 http://wwwmathscihiroshima-uacjp/~m-mat/teach/teachhtml 2 1 3 11 3 111 3 112 4 113 n 4 114 5 115 5 12 7 121 7 122 9 123 11 124 11 125 12 126 2 2 13 127 15 128
More informationHPC146
2 3 4 5 6 int array[16]; #pragma xmp nodes p(4) #pragma xmp template t(0:15) #pragma xmp distribute t(block) on p #pragma xmp align array[i] with t(i) array[16] 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 Node
More information16 B
16 B (1) 3 (2) (3) 5 ( ) 3 : 2 3 : 3 : () 3 19 ( ) 2 ax 2 + bx + c = 0 (a 0) x = b ± b 2 4ac 2a 3, 4 5 1824 5 Contents 1. 1 2. 7 3. 13 4. 18 5. 22 6. 25 7. 27 8. 31 9. 37 10. 46 11. 50 12. 56 i 1 1. 1.1..
More information数理計画法入門 サンプルページ この本の定価 判型などは, 以下の URL からご覧いただけます. このサンプルページの内容は, 初版 1 刷発行時のものです.
数理計画法入門 サンプルページ この本の定価 判型などは, 以下の URL からご覧いただけます. http://www.morikita.co.jp/books/mid/092181 このサンプルページの内容は, 初版 1 刷発行時のものです. i 1947 G.B. Dantzig 3 (1) (2) (3) (4) Microsoft Excel Web 4 1 2 2 2 2 3 ii 4
More informationexample2_time.eps
Google (20/08/2 ) ( ) Random Walk & Google Page Rank Agora on Aug. 20 / 67 Introduction ( ) Random Walk & Google Page Rank Agora on Aug. 20 2 / 67 Introduction Google ( ) Random Walk & Google Page Rank
More information64bit SSE2 SSE2 FPU Visual C++ 64bit Inline Assembler 4 FPU SSE2 4.1 FPU Control Word FPU 16bit R R R IC RC(2) PC(2) R R PM UM OM ZM DM IM R: reserved
(Version: 2013/5/16) Intel CPU (kashi@waseda.jp) 1 Intel CPU( AMD CPU) 64bit SIMD Inline Assemler Windows Visual C++ Linux gcc 2 FPU SSE2 Intel CPU double 8087 FPU (floating point number processing unit)
More informationPowerPoint プレゼンテーション
応用数理概論 準備 端末上で cd ~/ mkdir cppwork cd cppwork wget http://271.jp/gairon/main.cpp wget http://271.jp/gairon/matrix.hpp とコマンドを記入. ls とコマンドをうち,main.cppとmatrix.hppがダウンロードされていることを確認. 1 準備 コンパイル c++ -I. -std=c++0x
More informationx V x x V x, x V x = x + = x +(x+x )=(x +x)+x = +x = x x = x x = x =x =(+)x =x +x = x +x x = x ( )x = x =x =(+( ))x =x +( )x = x +( )x ( )x = x x x R
V (I) () (4) (II) () (4) V K vector space V vector K scalor K C K R (I) x, y V x + y V () (x + y)+z = x +(y + z) (2) x + y = y + x (3) V x V x + = x (4) x V x + x = x V x x (II) x V, α K αx V () (α + β)x
More information数学Ⅱ演習(足助・09夏)
II I 9/4/4 9/4/2 z C z z z z, z 2 z, w C zw z w 3 z, w C z + w z + w 4 t R t C t t t t t z z z 2 z C re z z + z z z, im z 2 2 3 z C e z + z + 2 z2 + 3! z3 + z!, I 4 x R e x cos x + sin x 2 z, w C e z+w
More information23_33.indd
23 2TB 1TB 6TB 3TB 2TB 3TB 3TB 2TB 2TB 1TB 1TB 500GB 4TB 1TB 1TB 500GB 2TB 2TB 1TB 1TB RT RT RT RT RT RT RT MAC 10. 10. 10.6 10.5 MAC 10. 10. 10.6 10.5 MAC 10. 10.6 10.5 MAC 10. 10. 10.6 10.5 MAC 10. 10.6
More information倍々精度RgemmのnVidia C2050上への実装と応用
.. maho@riken.jp http://accc.riken.jp/maho/,,, 2011/2/16 1 - : GPU : SDPA-DD 10 1 - Rgemm : 4 (32 ) nvidia C2050, GPU CPU 150, 24GFlops 25 20 GFLOPS 15 10 QuadAdd Cray, QuadMul Sloppy Kernel QuadAdd Cray,
More information31 4 MATLAB A, B R 3 3 A = , B = mat_a, mat_b >> mat_a = [-1, -2, -3; -4, -5, -6; -7, -8, -9] mat_a =
3 4 MATLAB 3 4. A, B R 3 3 2 3 4 5 6 7 8 9, B = mat_a, mat_b >> mat_a = [-, -2, -3; -4, -5, -6; -7, -8, -9] 9 8 7 6 5 4 3 2 mat_a = - -2-3 -4-5 -6-7 -8-9 >> mat_b = [-9, -8, -7; -6, -5, -4; -3, -2, -]
More informationAppendix A BASIC BASIC Beginner s All-purpose Symbolic Instruction Code FORTRAN COBOL C JAVA PASCAL (NEC N88-BASIC Windows BASIC (1) (2) ( ) BASIC BAS
Appendix A BASIC BASIC Beginner s All-purpose Symbolic Instruction Code FORTRAN COBOL C JAVA PASCAL (NEC N88-BASIC Windows BASIC (1 (2 ( BASIC BASIC download TUTORIAL.PDF http://hp.vector.co.jp/authors/va008683/
More information4 4 θ X θ P θ 4. 0, 405 P 0 X 405 X P 4. () 60 () 45 () 40 (4) 765 (5) 40 B 60 0 P = 90, = ( ) = X
4 4. 4.. 5 5 0 A P P P X X X X +45 45 0 45 60 70 X 60 X 0 P P 4 4 θ X θ P θ 4. 0, 405 P 0 X 405 X P 4. () 60 () 45 () 40 (4) 765 (5) 40 B 60 0 P 0 0 + 60 = 90, 0 + 60 = 750 0 + 60 ( ) = 0 90 750 0 90 0
More informationインテル(R) C++ Composer XE 2011 Windows版 入門ガイド
C++ Composer XE 2011 Windows* エクセルソフト株式会社 www.xlsoft.com Rev. 1.2 (2011/05/03) Copyright 1998-2011 XLsoft Corporation. All Rights Reserved. 1 / 70 ... 4... 5... 6... 8 /... 8... 10 /... 11... 11 /... 13
More information並列計算の数理とアルゴリズム サンプルページ この本の定価 判型などは, 以下の URL からご覧いただけます. このサンプルページの内容は, 初版 1 刷発行時のものです.
並列計算の数理とアルゴリズム サンプルページ この本の定価 判型などは, 以下の URL からご覧いただけます. http://www.morikita.co.jp/books/mid/080711 このサンプルページの内容は, 初版 1 刷発行時のものです. Calcul scientifique parallèle by Frédéric Magoulès and François-Xavier
More informationGauss
15 1 LU LDL T 6 : 1g00p013-5 1 6 1.1....................................... 7 1.2.................................. 8 1.3.................................. 8 2 Gauss 9 2.1.....................................
More information(I) GotoBALS, http://www-is.amp.i.kyoto-u.ac.jp/ kkimur/charpoly.html 2
sdmp Maple - (Ver.2) ( ) September 27, 2011 1 (I) GotoBALS, http://www-is.amp.i.kyoto-u.ac.jp/ kkimur/charpoly.html 2 (II) Nehalem CPU GotoBLAS Intel CPU Nehalem CPU, GotoBLAS, Hyper-Thread technology
More informationR R 16 ( 3 )
(017 ) 9 4 7 ( ) ( 3 ) ( 010 ) 1 (P3) 1 11 (P4) 1 1 (P4) 1 (P15) 1 (P16) (P0) 3 (P18) 3 4 (P3) 4 3 4 31 1 5 3 5 4 6 5 9 51 9 5 9 6 9 61 9 6 α β 9 63 û 11 64 R 1 65 13 66 14 7 14 71 15 7 R R 16 http://wwwecoosaka-uacjp/~tazak/class/017
More informationver Web
ver201723 Web 1 4 11 4 12 5 13 7 2 9 21 9 22 10 23 10 24 11 3 13 31 n 13 32 15 33 21 34 25 35 (1) 27 4 30 41 30 42 32 43 36 44 (2) 38 45 45 46 45 5 46 51 46 52 48 53 49 54 51 55 54 56 58 57 (3) 61 2 3
More informationスパコンに通じる並列プログラミングの基礎
2018.06.04 2018.06.04 1 / 62 2018.06.04 2 / 62 Windows, Mac Unix 0444-J 2018.06.04 3 / 62 Part I Unix GUI CUI: Unix, Windows, Mac OS Part II 2018.06.04 4 / 62 0444-J ( : ) 6 4 ( ) 6 5 * 6 19 SX-ACE * 6
More informationMicrosoft PowerPoint - sales2.ppt
最適化とは何? CPU アーキテクチャに沿った形で最適な性能を抽出できるようにする技法 ( 性能向上技法 ) コンパイラによるプログラム最適化 コンパイラメーカの技量 経験量に依存 最適化ツールによるプログラム最適化 KAP (Kuck & Associates, Inc. ) 人によるプログラム最適化 アーキテクチャのボトルネックを知ること 3 使用コンパイラによる性能の違い MFLOPS 90
More information2 1 x 1.1: v mg x (t) = v(t) mv (t) = mg 0 x(0) = x 0 v(0) = v 0 x(t) = x 0 + v 0 t 1 2 gt2 v(t) = v 0 gt t x = x 0 + v2 0 2g v2 2g 1.1 (x, v) θ
1 1 1.1 (Isaac Newton, 1642 1727) 1. : 2. ( ) F = ma 3. ; F a 2 t x(t) v(t) = x (t) v (t) = x (t) F 3 3 3 3 3 3 6 1 2 6 12 1 3 1 2 m 2 1 x 1.1: v mg x (t) = v(t) mv (t) = mg 0 x(0) = x 0 v(0) = v 0 x(t)
More informationDecember 28, 2018
e-mail : kigami@i.kyoto-u.ac.jp December 28, 28 Contents 2............................. 3.2......................... 7.3..................... 9.4................ 4.5............. 2.6.... 22 2 36 2..........................
More information1 OpenCL OpenCL 1 OpenCL GPU ( ) 1 OpenCL Compute Units Elements OpenCL OpenCL SPMD (Single-Program, Multiple-Data) SPMD OpenCL work-item work-group N
GPU 1 1 2 1, 3 2, 3 (Graphics Unit: GPU) GPU GPU GPU Evaluation of GPU Computing Based on An Automatic Program Generation Technology Makoto Sugawara, 1 Katsuto Sato, 1 Kazuhiko Komatsu, 2 Hiroyuki Takizawa
More information2012 A, N, Z, Q, R, C
2012 A, N, Z, Q, R, C 1 2009 9 2 2011 2 3 2012 9 1 2 2 5 3 11 4 16 5 22 6 25 7 29 8 32 1 1 1.1 3 1 1 1 1 1 1? 3 3 3 3 3 3 3 1 1, 1 1 + 1 1 1+1 2 2 1 2+1 3 2 N 1.2 N (i) 2 a b a 1 b a < b a b b a a b (ii)
More informationItanium2ベンチマーク
HPC CPU mhori@ile.osaka-u.ac.jp Special thanks Timur Esirkepov HPC 2004 2 25 1 1. CPU 2. 3. Itanium 2 HPC 2 1 Itanium2 CPU CPU 3 ( ) Intel Itanium2 NEC SX-6 HP Alpha Server ES40 PRIMEPOWER SR8000 Intel
More informationuntitled
16 4 1 17 1 50 -1- -2- -3- -4- -5- -6- -7- 1 2-8- -9- -10- -11- Web -12- (1) (2)(1) (3) (4) (1)()(2) (3)(4) -13- -14- -15- -16- -17- -18- -19- -20- -21- -22- -23- (2)(1) (3) -24- -25- -26- -27- -28- -29-
More information1 n 1 1 2 2 3 3 3.1............................ 3 3.2............................. 6 3.2.1.............. 6 3.2.2................. 7 3.2.3........................... 10 4 11 4.1..........................
More informationA11 (1993,1994) 29 A12 (1994) 29 A13 Trefethen and Bau Numerical Linear Algebra (1997) 29 A14 (1999) 30 A15 (2003) 30 A16 (2004) 30 A17 (2007) 30 A18
2013 8 29y, 2016 10 29 1 2 2 Jordan 3 21 3 3 Jordan (1) 3 31 Jordan 4 32 Jordan 4 33 Jordan 6 34 Jordan 8 35 9 4 Jordan (2) 10 41 x 11 42 x 12 43 16 44 19 441 19 442 20 443 25 45 25 5 Jordan 26 A 26 A1
More information1 5 13 4 1 41 1 411 1 412 2 413 3 414 3 415 4 42 6 43 LU 7 431 LU 10 432 11 433 LU 11 44 12 441 13 442 13 443 SOR ( ) 14 444 14 445 15 446 16 447 SOR 16 448 16 45 17 4 41 n x 1,, x n a 11 x 1 + a 1n x
More information( )
18 10 01 ( ) 1 2018 4 1.1 2018............................... 4 1.2 2018......................... 5 2 2017 7 2.1 2017............................... 7 2.2 2017......................... 8 3 2016 9 3.1 2016...............................
More information直交座標系の回転
b T.Koama x l x, Lx i ij j j xi i i i, x L T L L, L ± x L T xax axx, ( a a ) i, j ij i j ij ji λ λ + λ + + λ i i i x L T T T x ( L) L T xax T ( T L T ) A( L) T ( LAL T ) T ( L AL) λ ii L AL Λ λi i axx
More informationindex calculus
index calculus 2008 3 8 1 generalized Weil descent p :, E/F p 3 : Y 2 = f(x), where f(x) = X 3 + AX + B, A F p, B F p 3 E(F p 3) 3 : Generalized Weil descent E(F p 4) 2 Index calculus Plain version Double-large-prime
More informationall.dvi
5,, Euclid.,..,... Euclid,.,.,, e i (i =,, ). 6 x a x e e e x.:,,. a,,. a a = a e + a e + a e = {e, e, e } a (.) = a i e i = a i e i (.) i= {a,a,a } T ( T ),.,,,,. (.),.,...,,. a 0 0 a = a 0 + a + a 0
More information,4) 1 P% P%P=2.5 5%!%! (1) = (2) l l Figure 1 A compilation flow of the proposing sampling based architecture simulation
1 1 1 1 SPEC CPU 2000 EQUAKE 1.6 50 500 A Parallelizing Compiler Cooperative Multicore Architecture Simulator with Changeover Mechanism of Simulation Modes GAKUHO TAGUCHI 1 YOUICHI ABE 1 KEIJI KIMURA 1
More information福岡大学人文論叢47-3
679 pp. 1 680 2 681 pp. 3 682 4 683 5 684 pp. 6 685 7 686 8 687 9 688 pp. b 10 689 11 690 12 691 13 692 pp. 14 693 15 694 a b 16 695 a b 17 696 a 18 697 B 19 698 A B B B A B B A A 20 699 pp. 21 700 pp.
More information2 2 MATHEMATICS.PDF 200-2-0 3 2 (p n ), ( ) 7 3 4 6 5 20 6 GL 2 (Z) SL 2 (Z) 27 7 29 8 SL 2 (Z) 35 9 2 40 0 2 46 48 2 2 5 3 2 2 58 4 2 6 5 2 65 6 2 67 7 2 69 2 , a 0 + a + a 2 +... b b 2 b 3 () + b n a
More information[ ] 0.1 lim x 0 e 3x 1 x IC ( 11) ( s114901) 0.2 (1) y = e 2x (x 2 + 1) (2) y = x/(x 2 + 1) 0.3 dx (1) 1 4x 2 (2) e x sin 2xdx (3) sin 2 xdx ( 11) ( s
[ ]. lim e 3 IC ) s49). y = e + ) ) y = / + ).3 d 4 ) e sin d 3) sin d ) s49) s493).4 z = y z z y s494).5 + y = 4 =.6 s495) dy = 3e ) d dy d = y s496).7 lim ) lim e s49).8 y = e sin ) y = sin e 3) y =
More informationlinearal1.dvi
19 4 30 I 1 1 11 1 12 2 13 3 131 3 132 4 133 5 134 6 14 7 2 9 21 9 211 9 212 10 213 13 214 14 22 15 221 15 222 16 223 17 224 20 3 21 31 21 32 21 33 22 34 23 341 23 342 24 343 27 344 29 35 31 351 31 352
More informationスパコンに通じる並列プログラミングの基礎
2016.06.06 2016.06.06 1 / 60 2016.06.06 2 / 60 Windows, Mac Unix 0444-J 2016.06.06 3 / 60 Part I Unix GUI CUI: Unix, Windows, Mac OS Part II 0444-J 2016.06.06 4 / 60 ( : ) 6 6 ( ) 6 10 6 16 SX-ACE 6 17
More information140 120 100 80 60 40 20 0 115 107 102 99 95 97 95 97 98 100 64 72 37 60 50 53 50 36 32 18 H18 H19 H20 H21 H22 H23 H24 H25 H26 H27 1 100 () 80 60 40 20 0 1 19 16 10 11 6 8 9 5 10 35 76 83 73 68 46 44 H11
More informationC による数値計算法入門 ( 第 2 版 ) 新装版 サンプルページ この本の定価 判型などは, 以下の URL からご覧いただけます. このサンプルページの内容は, 新装版 1 刷発行時のものです.
C による数値計算法入門 ( 第 2 版 ) 新装版 サンプルページ この本の定価 判型などは, 以下の URL からご覧いただけます. http://www.morikita.co.jp/books/mid/009383 このサンプルページの内容は, 新装版 1 刷発行時のものです. i 2 22 2 13 ( ) 2 (1) ANSI (2) 2 (3) Web http://www.morikita.co.jp/books/mid/009383
More informationx x x 2, A 4 2 Ax.4 A A A A λ λ 4 λ 2 A λe λ λ2 5λ + 6 0,...λ 2, λ 2 3 E 0 E 0 p p Ap λp λ 2 p 4 2 p p 2 p { 4p 2 2p p + 2 p, p 2 λ {
K E N Z OU 2008 8. 4x 2x 2 2 2 x + x 2. x 2 2x 2, 2 2 d 2 x 2 2.2 2 3x 2... d 2 x 2 5 + 6x 0 2 2 d 2 x 2 + P t + P 2tx Qx x x, x 2 2 2 x 2 P 2 tx P tx 2 + Qx x, x 2. d x 4 2 x 2 x x 2.3 x x x 2, A 4 2
More informationDVIOUT-HYOU
() P. () AB () AB ³ ³, BA, BA ³ ³ P. A B B A IA (B B)A B (BA) B A ³, A ³ ³ B ³ ³ x z ³ A AA w ³ AA ³ x z ³ x + z +w ³ w x + z +w ½ x + ½ z +w x + z +w x,,z,w ³ A ³ AA I x,, z, w ³ A ³ ³ + + A ³ A A P.
More informationXcalableMP入門
XcalableMP 1 HPC-Phys@, 2018 8 22 XcalableMP XMP XMP Lattice QCD!2 XMP MPI MPI!3 XMP 1/2 PCXMP MPI Fortran CCoarray C++ MPIMPI XMP OpenMP http://xcalablemp.org!4 XMP 2/2 SPMD (Single Program Multiple Data)
More information@@ ;; QQ a @@@@ ;;;; QQQQ @@@@ ;;;; QQQQ a a @@@ ;;; QQQ @@@ ;;; QQQ a a a ; ; ; @ @ @ ; ; ; Q Q Q ;; ;; @@ @@ ;; ;; QQ QQ ;; @@ ;; QQ a a a a @@@ ;;; QQQ @@@ ;;; QQQ ;;; ;;; @@@ @@@ ;;; ;;; QQQ QQQ
More information