XMPによる並列化実装2

Similar documents
XcalableMP入門

XACC講習会

HPC146

1.overview

XACCの概要

Microsoft PowerPoint - sps14_kogi6.pptx

研究背景 大規模な演算を行うためには 分散メモリ型システムの利用が必須 Message Passing Interface MPI 並列プログラムの大半はMPIを利用 様々な実装 OpenMPI, MPICH, MVAPICH, MPI.NET プログラミングコストが高いため 生産性が悪い 新しい並

comment.dvi

ex01.dvi

[1] #include<stdio.h> main() { printf("hello, world."); return 0; } (G1) int long int float ± ±

/* do-while */ #include <stdio.h> #include <math.h> int main(void) double val1, val2, arith_mean, geo_mean; printf( \n ); do printf( ); scanf( %lf, &v

DKA ( 1) 1 n i=1 α i c n 1 = 0 ( 1) 2 n i 1 <i 2 α i1 α i2 c n 2 = 0 ( 1) 3 n i 1 <i 2 <i 3 α i1 α i2 α i3 c n 3 = 0. ( 1) n 1 n i 1 <i 2 < <i


Krylov (b) x k+1 := x k + α k p k (c) r k+1 := r k α k Ap k ( := b Ax k+1 ) (d) β k := r k r k 2 2 (e) : r k 2 / r 0 2 < ε R (f) p k+1 :=

sim98-8.dvi

r07.dvi

ohp07.dvi

115 9 MPIBNCpack 9.1 BNCpack 1CPU X = , B =

tuat1.dvi

ex01.dvi

1 1.1 C 2 1 double a[ ][ ]; 1 3x x3 ( ) malloc() malloc 2 #include <stdio.h> #include

Gauss

Microsoft Word - 計算科学演習第1回3.doc

PowerPoint プレゼンテーション

£Ã¥×¥í¥°¥é¥ß¥ó¥°ÆþÌç (2018) - Â裵²ó ¨¡ À©¸æ¹½Â¤¡§¾ò·ïʬ´ô ¨¡

01_OpenMP_osx.indd

1 1.1 C 2 1 double a[ ][ ]; 1 3x x3 ( ) malloc() 2 double *a[ ]; double 1 malloc() dou

WinHPC ppt

II 3 yacc (2) 2005 : Yacc 0 ~nakai/ipp2 1 C main main 1 NULL NULL for 2 (a) Yacc 2 (b) 2 3 y

j x j j j + 1 l j l j = x j+1 x j, n x n x 1 = n 1 l j j=1 H j j + 1 l j l j E

:30 12:00 I. I VI II. III. IV. a d V. VI

C言語によるアルゴリズムとデータ構造

I. Backus-Naur BNF : N N 0 N N N N N N 0, 1 BNF N N 0 11 (parse tree) 11 (1) (2) (3) (4) II. 0(0 101)* (

untitled

:30 12:00 I. I VI II. III. IV. a d V. VI

nakao

PC Windows 95, Windows 98, Windows NT, Windows 2000, MS-DOS, UNIX CPU

II ( ) prog8-1.c s1542h017%./prog8-1 1 => 35 Hiroshi 2 => 23 Koji 3 => 67 Satoshi 4 => 87 Junko 5 => 64 Ichiro 6 => 89 Mari 7 => 73 D

AHPを用いた大相撲の新しい番付編成

HPC143

練習&演習問題

QR

1 4 2 EP) (EP) (EP)

卒 業 研 究 報 告.PDF

I. Backus-Naur BNF S + S S * S S x S +, *, x BNF S (parse tree) : * x + x x S * S x + S S S x x (1) * x x * x (2) * + x x x (3) + x * x + x x (4) * *

第7章 有限要素法のプログラミング

Microsoft Word - C.....u.K...doc

file:///D|/C言語の擬似クラス.txt

joho07-1.ppt

新版明解C言語 実践編

£Ã¥×¥í¥°¥é¥ß¥ó¥°(2018) - Âè11²ó – ½ÉÂꣲ¤Î²òÀ⡤±é½¬£² –

1.ppt

untitled

lexex.dvi

C言語による数値計算プログラミング演習

実際の株価データを用いたオプション料の計算

PowerPoint プレゼンテーション

02_C-C++_osx.indd

9 8 7 (x-1.0)*(x-1.0) *(x-1.0) (a) f(a) (b) f(a) Figure 1: f(a) a =1.0 (1) a 1.0 f(1.0)

#define N1 N+1 double x[n1] =.5, 1., 2.; double hokan[n1] = 1.65, 2.72, 7.39 ; double xx[]=.2,.4,.6,.8,1.2,1.4,1.6,1.8; double lagrng(double xx); main

¥Ñ¥Ã¥±¡¼¥¸ Rhpc ¤Î¾õ¶·

program7app.ppt

I ASCII ( ) NUL 16 DLE SP P p 1 SOH 17 DC1! 1 A Q a q STX 2 18 DC2 " 2 B R b

r08.dvi

file"a" file"b" fp = fopen("a", "r"); while(fgets(line, BUFSIZ, fp)) {... fclose(fp); fp = fopen("b", "r"); while(fgets(line, BUFSIZ, fp)) {... fclose

1.3 ( ) ( ) C

Intel® Compilers Professional Editions

1 return main() { main main C 1 戻り値の型 関数名 引数 関数ブロックをあらわす中括弧 main() 関数の定義 int main(void){ printf("hello World!!\n"); return 0; 戻り値 1: main() 2.2 C main

Microsoft PowerPoint - KHPCSS pptx

/ SCHEDULE /06/07(Tue) / Basic of Programming /06/09(Thu) / Fundamental structures /06/14(Tue) / Memory Management /06/1

±é½¬£²¡§£Í£Ð£É½éÊâ

C ( ) C ( ) C C C C C 1 Fortran Character*72 name Integer age Real income 3 1 C mandata mandata ( ) name age income mandata ( ) mandat

第9回 配列(array)型の変数

ex14.dvi

OpenMP (1) 1, 12 1 UNIX (FUJITSU GP7000F model 900), 13 1 (COMPAQ GS320) FUJITSU VPP5000/64 1 (a) (b) 1: ( 1(a))

kiso2-09.key

ohp03.dvi

スライド 1

PowerPoint プレゼンテーション

Microsoft Word - openmp-txt.doc

PowerPoint Presentation

main

橡Pro PDF

OHP.dvi

ohp08.dvi

I117 II I117 PROGRAMMING PRACTICE II 2 SOFTWARE DEVELOPMENT ENV. 2 Research Center for Advanced Computing Infrastructure (RCACI) / Yasuhiro Ohara yasu

Microsoft Word - no206.docx

C

AutoTuned-RB

2 [1] 7 5 C 2.1 (kikuchi-fem-mac ) input.dat (cat input.dat type input.dat ))

[ 1] 1 Hello World!! 1 #include <s t d i o. h> 2 3 int main ( ) { 4 5 p r i n t f ( H e l l o World!! \ n ) ; 6 7 return 0 ; 8 } 1:

SystemC言語概論

コンピュータ概論

P06.ppt

PowerPoint Presentation

C C UNIX C ( ) 4 1 HTML 1

bioinfo-a10s-4_align

スライド 1

44 6 MPI 4 : #LIB=-lmpich -lm 5 : LIB=-lmpi -lm 7 : mpi1: mpi1.c 8 : $(CC) -o mpi1 mpi1.c $(LIB) 9 : 10 : clean: 11 : -$(DEL) mpi1 make mpi1 1 % mpiru

1) OOP 2) ( ) 3.2) printf Number3-2.cpp #include <stdio.h> class Number Number(); // ~Number(); // void setnumber(float n); float getnumber();

Transcription:

2

3 C Fortran Exercise 1 Exercise 2 Serial init.c init.f90 XMP xmp_init.c xmp_init.f90 Serial laplace.c laplace.f90 XMP xmp_laplace.c xmp_laplace.f90

#include <stdio.h> int a[10]; program init integer :: a(10), i int main(){ for(int i=0;i<10;i++) a[i] = i+1; for(int i=0;i<10;i++) printf("%d n", a[i]); do i=1,10 a(i)=i end do do i=1,10 print *, a(i) end do } return 0; end program init 4

#pragma xmp nodes p[2] #pragma xmp template t[10] #pragma xmp distribute t[block] onto p int a[10]; [align directive] int main(){ [loop directive] for(int i=0;i<10;i++) a[i] = i+1; [loop directive] for(int i=0;i<10;i++) printf( [%d] %d n", xmpc_node_num(), a[i]); return 0; } program init!$xmp nodes p(2)!$xmp template t(10)!$xmp distribute t(block) onto p integer :: a(10), i [align directive] [loop directive] do i=1,10 a(i)=i end do [loop directive] do i=1,10 print *, xmp_node_num(), a(i) end do end program init 5

6

7

8

#pragma xmp loop (j,i) on t[j][i] for(int j=0;j<10;j++) for(int i=0;i<10;i++) u[j][i] = uu[j][i];!$xmp loop (i,j) on t(i,j) do j=1,10 do i=1,10 u(i,j) = u(i,j) end do end do 9

#pragma xmp shadow uu[1:1][1:1] : #pragma xmp reflect (uu) : #pragma xmp loop (j,i) on t[j][i] for(int j=0;j<10;j++) for(int i=0;i<10;i++) u[j][i] = uu[j-1][i] + uu[j][i-1] +...!$xmp shadow uu(1:1,1:1) :!$xmp reflect (uu) :!$xmp loop (i,j) on t(i,j) do j=1,10 do i=1,10 u(i,j) = u(i,j-1) + uu(i-1,j) + end do end do 10

#pragma xmp loop (j,i) on t[j][i] reduction (+:s) for(int j=0;j<10;j++) for(int i=0;i<10;i++) s += abs(uu[j][i] - u[j][i]);!$xmp loop (i,j) on t(i,j) reduction(+:s) do j=1,10 do i=1,10 s = s + abs(uu(i,j) - u(i,j)) end do end do 11

12

13

14

15 #include <stdio.h> #include <stdlib.h> #include <math.h> #define N1 64 #define N2 64 double u[n2][n1],uu[n2][n1]; #pragma xmp nodes p[*][4] #pragma xmp template t[n2][n1] [distribute directive] [align directive] [align directive] [shadow directive] int main(int argc, char **argv) { int j,i,k,niter = 100; double value = 0.0; #pragma xmp loop (j,i) on t[j][i] for(j = 0; j < N2; j++){ for(i = 0; i < N1; i++){ u[j][i] = 0.0; uu[j][i] = 0.0; } } pattern 1 [loop directive] for(j = 1; j < N2-1; j++) for(i = 1; i < N1-1; i++) u[j][i] = sin((double)i/n1*m_pi) + cos((double)j/n2*m_pi); for(k = 0; k < niter; k++){ /* old <- new */ [loop directive] for(j = 1; j < N2-1; j++) for(i = 1; i < N1-1; i++) uu[j][i] = u[j][i]; [reflect directive] [loop directive] for(j = 1; j < N2-1; j++) for(i = 1; i < N1-1; i++) u[j][i] = (uu[j-1][i] + uu[j+1][i] + uu[j][i-1] + uu[j][i+1])/4.0; } pattern 1 pattern 1 pattern 2 /* check value */ value = 0.0; #pragma xmp loop (j,i) on t[j][i] [reduction clause] for(j = 1; j < N2-1; j++) for(i = 1; i < N1-1; i++) value += fabs(uu[j][i]-u[j][i]); pattern 3

16

17 Coarray 1 : Put/Get 2 : Put/Get 3 :

(Put/Get) int a[10]; int b[10]:[*]; // b is a coarray image 0 image 1 : if(xmpc_this_image() == 0) a[0:3] = b[3:3]:[1] // Get a[10] b[10] a[10] b[10] 3 5 18

base, length, step int Base: 0 Length : Step : 1 19

20 A[10:10] A[10:] A[:10] A[10:5:2] A[:] A[10] A[19] 10 A[10] A[99] 90 A[0] A[9] 10 A[10], A[12], A[14], A[16], A[18] 5 A (A[0] A[99])

(Put/Get) integer :: a(10) integer :: b(10)[*] // b is a coarray : if(this_image() == 1) then a(1:3) = b(3:5)[2] // Get image 1 image 2 a(10) a(10) b(10) b[10] 3 5 21

void xmp_sync_all(int *status) image 0 image 1 sync all Put sync_all sync_all 22

23 if(xmpc_this_image() == 0){ tmp = val:[1]; // Get val:[1] = val; // Put } xmp_sync_all(null); if(this_image() == 1) then tmp = val[2]! Get val[2] = val! Put end if sync all

xmp_sync_all(null); if(xmpc_this_image() == 0){ tmp = val:[1]; // Get val:[1] = val; // Put } xmp_sync_all(null); val = 1 tmp = 0 Get val:[1] to tmp image 0 image 1 val = 2 tmp = 0 xmp_sync_all [START] My image is 0, val = 1 tmp = 0 [START] My image is 1, val = 2 tmp = 0 [END] My image is 0, val = 1 tmp = 2 [END] My image is 1, val = 1 tmp = 0 tmp = 2 Put val to val:[1] val = 1 xmp_sync_all 24

sync all if(this_image() == 1) then tmp = val:[2]; // Get val:[2] = val; // Put end if sync all val = 1 tmp = 0 Get val:[2] to tmp image 1 image 2 val = 2 tmp = 0 sync all [START] My image is 1, val = 1 tmp = 0 [START] My image is 2, val = 2 tmp = 0 [END] My image is 1, val = 1 tmp = 2 [END] My image is 2, val = 1 tmp = 0 tmp = 2 Put val to val:[2] val = 1 sync all 25

[C] 0, [F] 1 a[10] = {0, 1,.., 9}; b[10] = {0, 1,.., 9}; c[10][10] = {{0, 1,.., 9}, {10, 11,.., 19},... {90, 91,.., 99}}; [C] 1, [F] 2 a[10] = {10, 11,.., 19}; b[10] = {10, 11,.., 19}; c[10][10] = {{100, 101,.., 109}, {110, 111,.., 119},... {190, 191,.., 199}}; 26

[C] if(xmpc_this_image() == 0){ a[0:3] = a[5:3]:[1]; // Get } [F] if(this_image() == 1) then a(1:3) = a(6:8)[2]! Get end if [C] a[0] = 15 a[1] = 16 a[2] = 17 a[3] = 3 a[4] = 4 a[5] = 5 a[6] = 6 a[7] = 7 a[8] = 8 a[9] = 9 [F] a(1) = 15 a(2) = 16 a(3) = 17 a(4) = 3 a(5) = 4 a(6) = 5 a(7) = 6 a(8) = 7 a(9) = 8 a(10) = 9 27

[C] if(xmpc_this_image() == 0){ b[0:5:2] = b[0:5:2]:[1]; // Get } [F] if(this_image() == 1) then b(1:10:2) = b(1:10:2)[2]! Get end if [C] b[0] = 10 b[1] = 1 b[2] = 12 b[3] = 3 b[4] = 14 b[5] = 5 b[6] = 16 b[7] = 7 b[8] = 18 b[9] = 9 [F] b(1) = 10 b(2) = 1 b(3) = 12 b(4) = 3 b(5) = 14 b(6) = 5 b(7) = 16 b(8) = 7 b(9) = 18 b(10) = 9 28

[C] if(xmpc_this_image() == 0){ c[0:5][0:5]:[1] = c[0:5][0:5]; // Put } [F] if(this_image() == 1) then c(1:5,1:5)[2] = c(1:5,1:5) // Put end if 0 1 2 3 4 105 106 107 108 109 10 11 12 13 14 115 116 117 118 119 20 21 22 23 24 125 126 127 128 129 30 31 32 33 34 135 136 137 138 139 40 41 42 43 44 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 29

30 for(i=0;i<n;i++) for(j=0;j<n;j++) for(k=0;k<n;k++) c[i][k] += a[i][j] * b[j][k];

31 Calculation by sub-matrix C00 C01 = A00 A01 B00 B01 C10 C11 A10 A11 B10 B11 C00 = A00 B00 + A01 B10 C01 = A00 B01 + A01 B11 C10 = A10 B00 + A11 B10 C11 = A10 B01 + A11 B11 XMP/C XMP/Fortran 0, 1 1, 2 2, 3 3, 4

32 Calculation by sub-matrix C00 C01 = A00 A01 B00 B01 C10 C11 A10 A11 B10 B11 C00 = A00 B00 + A01 B10 C01 = A00 B01 + A01 B11 C10 = A10 B00 + A11 B10 C11 = A10 B01 + A11 B11 XMP/C XMP/Fortran 0, 1 1, 2 2, 3 3, 4

33 matmul.c or matmul.f90 XMP