6a7c06a10cfab9a046d6bc3cf4e9ea0b71a88b45
[cuda-ada.git] / doc / presentation.lyx
1 #LyX 2.0 created this file. For more info see http://www.lyx.org/
2 \lyxformat 413
3 \begin_document
4 \begin_header
5 \textclass beamer
6 \begin_preamble
7 \usepackage{listings}
8 \usetheme{Frankfurt}
9 % or ...
10 %\usetheme{Antibes}     % tree outline, neat
11 %\usetheme{JuanLesPins} % like Antibes, with shading
12 %\usetheme{Bergen}      % outline on side
13 %\usetheme{Luebeck}     % like Warsaw, square sides
14 %\usetheme{Berkeley}    % interesting left bar outline
15 %\usetheme{Madrid}      % clean, nice.  7/12 page numbers
16 %\usetheme{Berlin}      % dots show slide number
17 %\usetheme{Malmoe}      % OK, plain, unshaded
18 %\usetheme{Boadilla}    % nice, white bg, no top bar
19 %\usetheme{Marburg}     % nice, outline on right
20 %\usetheme{boxes}       % ???
21 %\usetheme{Montpellier} % tree outline on top, plainish white
22 %\usetheme{Copenhagen}  % like Warsaw
23 %\usetheme{PaloAlto}    % looks good
24 %\usetheme{Darmstadt}   % like Warsaw with circle outline
25 %\usetheme{Pittsburgh}
26 %\usetheme{default}
27 %\usetheme{Rochester}   % like boxy, unshaded warsaw
28 %\usetheme{Dresden}     % circle outline on top
29 %\usetheme{Singapore}   % purple gradient top
30 %\usetheme{Frankfurt}   % like Warsaw with circle outline on top
31 %\usetheme{Szeged}
32 %\usetheme{Goettingen}  % light purple right bar outline
33 %\usetheme{Warsaw}
34 %\usetheme{Hannover}    % like Goett with bar on left
35 %\usetheme{compatibility}
36 %\usetheme{Ilmenau}
37
38 \setbeamercovered{transparent}
39 % or whatever (possibly just delete it)
40
41 \useinnertheme{rectangles}
42
43 %\usecolortheme{seahorse}
44 %\usecolortheme{rose}
45
46 % seems to fix typewriter font in outline header:
47 \usepackage{ae,aecompl}
48
49 \definecolor{newyellow}{rgb}{1,1,0.8}
50 \definecolor{colKeys}{rgb}{0,0,1}
51 \definecolor{colIdentifier}{rgb}{0,0,0}
52 \definecolor{colComments}{rgb}{1,0,0}
53 \definecolor{colString}{rgb}{0,0.5,0}
54 \end_preamble
55 \use_default_options false
56 \maintain_unincluded_children false
57 \language english
58 \language_package default
59 \inputencoding auto
60 \fontencoding global
61 \font_roman default
62 \font_sans default
63 \font_typewriter default
64 \font_default_family default
65 \use_non_tex_fonts false
66 \font_sc false
67 \font_osf false
68 \font_sf_scale 100
69 \font_tt_scale 100
70
71 \graphics default
72 \default_output_format default
73 \output_sync 0
74 \bibtex_command default
75 \index_command default
76 \paperfontsize default
77 \spacing single
78 \use_hyperref false
79 \papersize default
80 \use_geometry true
81 \use_amsmath 2
82 \use_esint 0
83 \use_mhchem 1
84 \use_mathdots 1
85 \cite_engine basic
86 \use_bibtopic false
87 \use_indices false
88 \paperorientation portrait
89 \suppress_date false
90 \use_refstyle 0
91 \index Index
92 \shortcut idx
93 \color #008000
94 \end_index
95 \secnumdepth 2
96 \tocdepth 2
97 \paragraph_separation indent
98 \paragraph_indentation default
99 \quotes_language english
100 \papercolumns 1
101 \papersides 1
102 \paperpagestyle default
103 \listings_params "backgroundcolor={\color{newyellow}},basicstyle={\ttfamily\small},breakautoindent=true,breaklines=true,captionpos=b,commentstyle={\color{colComments}},extendedchars=true,frame=single,identifierstyle={\color{colIdentifier}},keywordstyle={\color{colKeys}},language=Ada,numbers=left,numberstyle={\tiny},showspaces=false,showstringspaces=false,stringstyle={\color{colString}},tabsize=4"
104 \tracking_changes false
105 \output_changes false
106 \html_math_output 0
107 \html_css_as_file 0
108 \html_be_strict false
109 \end_header
110
111 \begin_body
112
113 \begin_layout Title
114 CUDA/Ada
115 \begin_inset Argument
116 status open
117
118 \begin_layout Plain Layout
119 CUDA/Ada
120 \end_layout
121
122 \end_inset
123
124
125 \end_layout
126
127 \begin_layout Subtitle
128 An Ada binding to CUDA
129 \end_layout
130
131 \begin_layout Author
132 Reto Bürki, Adrian-Ken Rüegsegger
133 \begin_inset ERT
134 status collapsed
135
136 \begin_layout Plain Layout
137
138
139 \backslash
140
141 \backslash
142
143 \end_layout
144
145 \end_inset
146
147 University of Applied Sciences Rapperswil (HSR), Switzerland
148 \begin_inset Argument
149 status open
150
151 \begin_layout Plain Layout
152 Reto Bürki, Adrian-Ken Rüegsegger
153 \end_layout
154
155 \end_inset
156
157
158 \end_layout
159
160 \begin_layout Date
161 1/16/2012
162 \begin_inset ERT
163 status collapsed
164
165 \begin_layout Plain Layout
166
167
168 \backslash
169
170 \backslash
171
172 \end_layout
173
174 \end_inset
175
176 Master seminar: Progam Analysis and Transformation
177 \end_layout
178
179 \begin_layout BeginFrame
180 Outline
181 \end_layout
182
183 \begin_layout Standard
184 \begin_inset CommandInset toc
185 LatexCommand tableofcontents
186
187 \end_inset
188
189
190 \end_layout
191
192 \begin_layout EndFrame
193
194 \end_layout
195
196 \begin_layout Section
197 Introduction
198 \end_layout
199
200 \begin_layout Subsection
201 CUDA
202 \end_layout
203
204 \begin_layout BeginFrame
205 CUDA
206 \end_layout
207
208 \begin_layout Block
209 \begin_inset ERT
210 status collapsed
211
212 \begin_layout Plain Layout
213
214 {
215 \end_layout
216
217 \end_inset
218
219 What is CUDA?
220 \begin_inset ERT
221 status collapsed
222
223 \begin_layout Plain Layout
224
225 }
226 \end_layout
227
228 \end_inset
229
230
231 \end_layout
232
233 \begin_deeper
234 \begin_layout Itemize
235 Parallel computing architecture developed by NVIDIA
236 \end_layout
237
238 \begin_layout Itemize
239 \begin_inset Quotes eld
240 \end_inset
241
242 Compute Unified Device Architecture
243 \begin_inset Quotes erd
244 \end_inset
245
246
247 \end_layout
248
249 \begin_layout Itemize
250 General purpose computation using GPU (GPGPU)
251 \end_layout
252
253 \begin_layout Itemize
254 Use GPU as dedicated massively parallel co-processor
255 \end_layout
256
257 \begin_layout Itemize
258 Tremendous performance improvements possible
259 \end_layout
260
261 \end_deeper
262 \begin_layout EndFrame
263
264 \end_layout
265
266 \begin_layout BeginFrame
267 CUDA Processing
268 \end_layout
269
270 \begin_layout Standard
271 \align center
272 \begin_inset Graphics
273         filename cuda-processing.png
274         scale 33
275
276 \end_inset
277
278
279 \end_layout
280
281 \begin_layout EndFrame
282
283 \end_layout
284
285 \begin_layout Subsection
286 Ada
287 \end_layout
288
289 \begin_layout BeginFrame
290 Ada
291 \end_layout
292
293 \begin_layout Block
294 \begin_inset ERT
295 status collapsed
296
297 \begin_layout Plain Layout
298
299 {
300 \end_layout
301
302 \end_inset
303
304 The Ada programming language
305 \begin_inset ERT
306 status collapsed
307
308 \begin_layout Plain Layout
309
310 }
311 \end_layout
312
313 \end_inset
314
315
316 \end_layout
317
318 \begin_deeper
319 \begin_layout Itemize
320 Structured, strongly typed programming language
321 \end_layout
322
323 \begin_layout Itemize
324 Initiated by the US Department of Defense (DoD)
325 \end_layout
326
327 \begin_layout Itemize
328 First standardized high-level programming language
329 \end_layout
330
331 \begin_layout Itemize
332 Emphasizes safety and security
333 \end_layout
334
335 \begin_layout Itemize
336 Supports all modern programming paradigms
337 \end_layout
338
339 \begin_layout Itemize
340 Current language standard is Ada 2005, next release 2012
341 \end_layout
342
343 \begin_layout Itemize
344 Mostly used in aviation, railway systems, banking, military and space technology
345 \end_layout
346
347 \end_deeper
348 \begin_layout EndFrame
349
350 \end_layout
351
352 \begin_layout BeginFrame
353 Ada Compiler
354 \end_layout
355
356 \begin_layout Block
357 \begin_inset ERT
358 status collapsed
359
360 \begin_layout Plain Layout
361
362 {
363 \end_layout
364
365 \end_inset
366
367 GNAT
368 \begin_inset ERT
369 status collapsed
370
371 \begin_layout Plain Layout
372
373 }
374 \end_layout
375
376 \end_inset
377
378
379 \end_layout
380
381 \begin_deeper
382 \begin_layout Itemize
383 Free-software compiler for Ada
384 \end_layout
385
386 \begin_layout Itemize
387 Part of the GNU Compiler Collection (GCC)
388 \end_layout
389
390 \begin_layout Itemize
391 Supports all versions of Ada (83, 95, 2005)
392 \end_layout
393
394 \begin_layout Itemize
395 Available on most operating systems
396 \end_layout
397
398 \begin_layout Itemize
399 100% compliant with Ada Conformity Assessment Test Suite (ACATS)
400 \end_layout
401
402 \end_deeper
403 \begin_layout EndFrame
404
405 \end_layout
406
407 \begin_layout Subsection
408 Motivation
409 \end_layout
410
411 \begin_layout BeginFrame
412 Motivation
413 \end_layout
414
415 \begin_layout Block
416 \begin_inset ERT
417 status collapsed
418
419 \begin_layout Plain Layout
420
421 {
422 \end_layout
423
424 \end_inset
425
426 Why CUDA/Ada?
427 \begin_inset ERT
428 status collapsed
429
430 \begin_layout Plain Layout
431
432 }
433 \end_layout
434
435 \end_inset
436
437
438 \end_layout
439
440 \begin_deeper
441 \begin_layout Itemize
442 CUDA wrappers exist for many languages but not for Ada
443 \end_layout
444
445 \begin_layout Itemize
446 Make CUDA accessible for Ada developers
447 \end_layout
448
449 \begin_layout Itemize
450 Benefit from the advantages and processing power of a GPU in Ada applications
451 \end_layout
452
453 \begin_layout Itemize
454 Curiosity
455 \end_layout
456
457 \begin_deeper
458 \begin_layout Itemize
459 How well do CUDA and Ada match up?
460 \end_layout
461
462 \begin_layout Itemize
463 Can it be done in a nice way?
464 \end_layout
465
466 \begin_layout Itemize
467 Possible perfomance penalties from Ada runtime?
468 \end_layout
469
470 \end_deeper
471 \end_deeper
472 \begin_layout EndFrame
473
474 \end_layout
475
476 \begin_layout Section
477 Bindings in Ada
478 \end_layout
479
480 \begin_layout Subsection
481 pragma Import
482 \end_layout
483
484 \begin_layout BeginFrame
485 Bindings in Ada
486 \end_layout
487
488 \begin_layout Block
489 \begin_inset ERT
490 status collapsed
491
492 \begin_layout Plain Layout
493
494 {
495 \end_layout
496
497 \end_inset
498
499
500 \family typewriter
501 Import
502 \family default
503  pragma
504 \begin_inset ERT
505 status collapsed
506
507 \begin_layout Plain Layout
508
509 }
510 \end_layout
511
512 \end_inset
513
514
515 \end_layout
516
517 \begin_deeper
518 \begin_layout Itemize
519 Used to access functionality which is written in another programming language
520 \end_layout
521
522 \begin_layout Itemize
523 Pragmas are directives which control the compiler
524 \end_layout
525
526 \begin_layout Itemize
527 General form: 
528 \family typewriter
529 pragma Name (Parameter_List);
530 \end_layout
531
532 \begin_layout Itemize
533 Predefined packages that make it easier to interface with C
534 \end_layout
535
536 \begin_deeper
537 \begin_layout Itemize
538
539 \family typewriter
540 Interfaces.C
541 \end_layout
542
543 \begin_layout Itemize
544
545 \family typewriter
546 System
547 \end_layout
548
549 \end_deeper
550 \end_deeper
551 \begin_layout EndFrame
552
553 \end_layout
554
555 \begin_layout Standard
556 \begin_inset ERT
557 status open
558
559 \begin_layout Plain Layout
560
561
562 \backslash
563 begin{frame}[fragile]
564 \backslash
565 frametitle{Importing a C function}
566 \end_layout
567
568 \end_inset
569
570
571 \end_layout
572
573 \begin_layout Standard
574
575 \family typewriter
576 \series bold
577 C function
578 \end_layout
579
580 \begin_layout Standard
581 \begin_inset listings
582 lstparams "language=C"
583 inline false
584 status open
585
586 \begin_layout Plain Layout
587
588 int bind(int sockfd, const struct sockaddr *addr, socklen_t addrlen);
589 \end_layout
590
591 \end_inset
592
593
594 \end_layout
595
596 \begin_layout Standard
597
598 \family typewriter
599 \series bold
600 Ada import
601 \end_layout
602
603 \begin_layout Standard
604 \begin_inset listings
605 inline false
606 status open
607
608 \begin_layout Plain Layout
609
610 function C_Bind
611 \end_layout
612
613 \begin_layout Plain Layout
614
615  (S       : Interfaces.C.int;
616 \end_layout
617
618 \begin_layout Plain Layout
619
620   Name    : System.Address;
621 \end_layout
622
623 \begin_layout Plain Layout
624
625   Namelen : Interfaces.C.int)
626 \end_layout
627
628 \begin_layout Plain Layout
629
630   return Interfaces.C.int; 
631 \end_layout
632
633 \begin_layout Plain Layout
634
635 pragma Import (C, C_Bind, "bind"); 
636 \end_layout
637
638 \end_inset
639
640
641 \end_layout
642
643 \begin_layout Standard
644 \begin_inset ERT
645 status open
646
647 \begin_layout Plain Layout
648
649
650 \backslash
651 end{frame}
652 \end_layout
653
654 \end_inset
655
656
657 \end_layout
658
659 \begin_layout Subsection
660 Definition
661 \end_layout
662
663 \begin_layout BeginFrame
664
665 \family typewriter
666 Definition
667 \end_layout
668
669 \begin_layout AlertBlock
670 \begin_inset ERT
671 status collapsed
672
673 \begin_layout Plain Layout
674
675 {
676 \end_layout
677
678 \end_inset
679
680
681 \family typewriter
682 Binding
683 \family default
684
685 \begin_inset ERT
686 status collapsed
687
688 \begin_layout Plain Layout
689
690 }
691 \end_layout
692
693 \end_inset
694
695
696 \end_layout
697
698 \begin_deeper
699 \begin_layout Quote
700 A library which wraps another library not written in Ada is called a 
701 \begin_inset Quotes eld
702 \end_inset
703
704 binding
705 \begin_inset Quotes erd
706 \end_inset
707
708 .
709  Other programming languages also use the term 
710 \begin_inset Quotes eld
711 \end_inset
712
713 wrapper
714 \begin_inset Quotes erd
715 \end_inset
716
717  or 
718 \begin_inset Quotes eld
719 \end_inset
720
721 library wrapper
722 \begin_inset Quotes erd
723 \end_inset
724
725 .
726  
727 \end_layout
728
729 \end_deeper
730 \begin_layout EndFrame
731
732 \end_layout
733
734 \begin_layout Subsection
735 Thin- and Thick-Binding
736 \end_layout
737
738 \begin_layout BeginFrame
739 Thin-Binding
740 \end_layout
741
742 \begin_layout Block
743 \begin_inset ERT
744 status collapsed
745
746 \begin_layout Plain Layout
747
748 {
749 \end_layout
750
751 \end_inset
752
753
754 \family typewriter
755 What is a Thin-Binding
756 \family default
757 ?
758 \begin_inset ERT
759 status collapsed
760
761 \begin_layout Plain Layout
762
763 }
764 \end_layout
765
766 \end_inset
767
768
769 \end_layout
770
771 \begin_deeper
772 \begin_layout Itemize
773 One-to-one mapping of the foreign library interface to Ada
774 \end_layout
775
776 \begin_layout Itemize
777 Straight forward to create but time consuming and error prone
778 \end_layout
779
780 \begin_layout Itemize
781 Cumbersome to work with (because of direct mapping)
782 \end_layout
783
784 \begin_layout Itemize
785 No protection normally guaranteed by Ada
786 \end_layout
787
788 \end_deeper
789 \begin_layout EndFrame
790
791 \end_layout
792
793 \begin_layout BeginFrame
794 Thick-Binding
795 \end_layout
796
797 \begin_layout Block
798 \begin_inset ERT
799 status collapsed
800
801 \begin_layout Plain Layout
802
803 {
804 \end_layout
805
806 \end_inset
807
808
809 \family typewriter
810 What is a Thick-Binding
811 \family default
812 ?
813 \begin_inset ERT
814 status collapsed
815
816 \begin_layout Plain Layout
817
818 }
819 \end_layout
820
821 \end_inset
822
823
824 \end_layout
825
826 \begin_deeper
827 \begin_layout Itemize
828 Provides a more abstract, Ada-like view of foreign library
829 \end_layout
830
831 \begin_layout Itemize
832 Provides proper Ada types and operations
833 \end_layout
834
835 \begin_layout Itemize
836 Ensure safe usage of wrapper library
837 \end_layout
838
839 \begin_layout Itemize
840 Easier to work with but takes more work and time to create
841 \end_layout
842
843 \end_deeper
844 \begin_layout EndFrame
845
846 \end_layout
847
848 \begin_layout BeginFrame
849 Thin- and Thick-Bindings
850 \end_layout
851
852 \begin_layout AlertBlock
853 \begin_inset ERT
854 status collapsed
855
856 \begin_layout Plain Layout
857
858 {
859 \end_layout
860
861 \end_inset
862
863 Using both
864 \begin_inset ERT
865 status collapsed
866
867 \begin_layout Plain Layout
868
869 }
870 \end_layout
871
872 \end_inset
873
874
875 \end_layout
876
877 \begin_deeper
878 \begin_layout Itemize
879 Thin- and Thick-Binding layers are often used in conjunction
880 \end_layout
881
882 \begin_layout Itemize
883 Thin-Binding wraps foreign language library (low-level)
884 \end_layout
885
886 \begin_layout Itemize
887 Thick-Binding abstracts Thin-Binding to provide an Ada-like 
888 \begin_inset Quotes eld
889 \end_inset
890
891 look and feel
892 \begin_inset Quotes erd
893 \end_inset
894
895
896 \end_layout
897
898 \begin_layout Itemize
899 Separation improves maintainability because both layers can be adapted when
900  needed
901 \end_layout
902
903 \end_deeper
904 \begin_layout EndFrame
905
906 \end_layout
907
908 \begin_layout Section
909 CUDA/Ada Design
910 \end_layout
911
912 \begin_layout Subsection
913 Design Goals
914 \end_layout
915
916 \begin_layout BeginFrame
917 Design Goals
918 \end_layout
919
920 \begin_layout Block
921 \begin_inset ERT
922 status collapsed
923
924 \begin_layout Plain Layout
925
926 {
927 \end_layout
928
929 \end_inset
930
931
932 \family typewriter
933 Inspiration
934 \family default
935
936 \begin_inset ERT
937 status collapsed
938
939 \begin_layout Plain Layout
940
941 }
942 \end_layout
943
944 \end_inset
945
946
947 \end_layout
948
949 \begin_deeper
950 \begin_layout Itemize
951 CUDA/Ada heavily inspired by PyCUDA
952 \end_layout
953
954 \begin_layout Itemize
955 Great binding for Python from Andreas Klöckner
956 \end_layout
957
958 \end_deeper
959 \begin_layout EndFrame
960
961 \end_layout
962
963 \begin_layout BeginFrame
964 Design Goals II
965 \end_layout
966
967 \begin_layout Block
968 \begin_inset ERT
969 status collapsed
970
971 \begin_layout Plain Layout
972
973 {
974 \end_layout
975
976 \end_inset
977
978
979 \family typewriter
980 Goals
981 \family default
982
983 \begin_inset ERT
984 status collapsed
985
986 \begin_layout Plain Layout
987
988 }
989 \end_layout
990
991 \end_inset
992
993
994 \end_layout
995
996 \begin_deeper
997 \begin_layout Itemize
998 Seamless access to CUDA from Ada
999 \end_layout
1000
1001 \begin_layout Itemize
1002 High abstraction
1003 \end_layout
1004
1005 \begin_layout Itemize
1006 Auto-initialization
1007 \end_layout
1008
1009 \begin_layout Itemize
1010 JIT-Compilation of CUDA kernels
1011 \end_layout
1012
1013 \begin_layout Itemize
1014 Convenient argument handling
1015 \end_layout
1016
1017 \begin_layout Itemize
1018 Error-handling using Ada Exceptions
1019 \end_layout
1020
1021 \begin_layout Itemize
1022 Speed
1023 \end_layout
1024
1025 \begin_layout Itemize
1026 Automatically generated Thin-Binding
1027 \end_layout
1028
1029 \end_deeper
1030 \begin_layout EndFrame
1031
1032 \end_layout
1033
1034 \begin_layout Section
1035 CUDA Binding
1036 \end_layout
1037
1038 \begin_layout Subsection
1039 Thin-Binding
1040 \end_layout
1041
1042 \begin_layout Standard
1043 \begin_inset ERT
1044 status open
1045
1046 \begin_layout Plain Layout
1047
1048
1049 \backslash
1050 begin{frame}[fragile]
1051 \backslash
1052 frametitle{Thin-Binding to CUDA}
1053 \end_layout
1054
1055 \end_inset
1056
1057
1058 \end_layout
1059
1060 \begin_layout Block
1061 \begin_inset ERT
1062 status collapsed
1063
1064 \begin_layout Plain Layout
1065
1066 {
1067 \end_layout
1068
1069 \end_inset
1070
1071
1072 \family typewriter
1073 Creation
1074 \family default
1075
1076 \begin_inset ERT
1077 status collapsed
1078
1079 \begin_layout Plain Layout
1080
1081 }
1082 \end_layout
1083
1084 \end_inset
1085
1086
1087 \end_layout
1088
1089 \begin_deeper
1090 \begin_layout Itemize
1091 Auto-generated using 
1092 \family typewriter
1093 -fdump-ada-spec
1094 \family default
1095  of GNAT
1096 \end_layout
1097
1098 \begin_layout Itemize
1099 CUDA/Ada imports CUDA driver and runtime API 
1100 \end_layout
1101
1102 \begin_deeper
1103 \begin_layout Itemize
1104
1105 \family typewriter
1106 cuda.h
1107 \end_layout
1108
1109 \begin_layout Itemize
1110
1111 \family typewriter
1112 cuda_runtime.h
1113 \end_layout
1114
1115 \end_deeper
1116 \begin_layout Itemize
1117 Support for i686 and x86_64
1118 \end_layout
1119
1120 \end_deeper
1121 \begin_layout Standard
1122 \begin_inset ERT
1123 status open
1124
1125 \begin_layout Plain Layout
1126
1127
1128 \backslash
1129 begin{verbatim}
1130 \end_layout
1131
1132 \begin_layout Plain Layout
1133
1134 gcc -c -fdump-ada-spec cuda.h
1135 \end_layout
1136
1137 \begin_layout Plain Layout
1138
1139 gcc -c -fdump-ada-spec cuda_runtime.h
1140 \end_layout
1141
1142 \begin_layout Plain Layout
1143
1144
1145 \backslash
1146 end{verbatim}
1147 \end_layout
1148
1149 \end_inset
1150
1151
1152 \end_layout
1153
1154 \begin_layout Standard
1155 \begin_inset ERT
1156 status open
1157
1158 \begin_layout Plain Layout
1159
1160
1161 \backslash
1162 end{frame}
1163 \end_layout
1164
1165 \end_inset
1166
1167
1168 \end_layout
1169
1170 \begin_layout Standard
1171 \begin_inset ERT
1172 status open
1173
1174 \begin_layout Plain Layout
1175
1176
1177 \backslash
1178 begin{frame}[fragile]
1179 \backslash
1180 frametitle{Architecture (build logic)}
1181 \end_layout
1182
1183 \end_inset
1184
1185
1186 \end_layout
1187
1188 \begin_layout Standard
1189 \begin_inset ERT
1190 status open
1191
1192 \begin_layout Plain Layout
1193
1194
1195 \backslash
1196 begin{verbatim}
1197 \end_layout
1198
1199 \begin_layout Plain Layout
1200
1201 ARCH ?= $(shell uname -m)
1202 \end_layout
1203
1204 \begin_layout Plain Layout
1205
1206 gnatmake -Pcuda -XARCH=$(ARCH)
1207 \end_layout
1208
1209 \begin_layout Plain Layout
1210
1211
1212 \backslash
1213 end{verbatim}
1214 \end_layout
1215
1216 \end_inset
1217
1218
1219 \end_layout
1220
1221 \begin_layout Standard
1222 \begin_inset listings
1223 inline false
1224 status open
1225
1226 \begin_layout Plain Layout
1227
1228 with "thin/binding";
1229 \end_layout
1230
1231 \end_inset
1232
1233
1234 \end_layout
1235
1236 \begin_layout Standard
1237 \begin_inset listings
1238 inline false
1239 status open
1240
1241 \begin_layout Plain Layout
1242
1243 type Arch_Type is ("x86_64", "i686"); 
1244 \end_layout
1245
1246 \begin_layout Plain Layout
1247
1248 Arch : Arch_Type := external ("ARCH", "x86_64");
1249 \end_layout
1250
1251 \begin_layout Plain Layout
1252
1253 \end_layout
1254
1255 \begin_layout Plain Layout
1256
1257 for Source_Dirs use (".", ARCH);
1258 \end_layout
1259
1260 \end_inset
1261
1262
1263 \end_layout
1264
1265 \begin_layout Standard
1266 \begin_inset ERT
1267 status open
1268
1269 \begin_layout Plain Layout
1270
1271
1272 \backslash
1273 end{frame}
1274 \end_layout
1275
1276 \end_inset
1277
1278
1279 \end_layout
1280
1281 \begin_layout Subsection
1282 Thick-Binding
1283 \end_layout
1284
1285 \begin_layout Standard
1286 \begin_inset ERT
1287 status open
1288
1289 \begin_layout Plain Layout
1290
1291
1292 \backslash
1293 begin{frame}[fragile]
1294 \backslash
1295 frametitle{Autoinit}
1296 \end_layout
1297
1298 \end_inset
1299
1300
1301 \end_layout
1302
1303 \begin_layout Block
1304 \begin_inset ERT
1305 status collapsed
1306
1307 \begin_layout Plain Layout
1308
1309 {
1310 \end_layout
1311
1312 \end_inset
1313
1314
1315 \family typewriter
1316 Functionality
1317 \family default
1318
1319 \begin_inset ERT
1320 status collapsed
1321
1322 \begin_layout Plain Layout
1323
1324 }
1325 \end_layout
1326
1327 \end_inset
1328
1329
1330 \end_layout
1331
1332 \begin_deeper
1333 \begin_layout Itemize
1334 Takes care of CUDA initialisation task
1335 \end_layout
1336
1337 \begin_deeper
1338 \begin_layout Itemize
1339 cuInit
1340 \end_layout
1341
1342 \begin_layout Itemize
1343 cuDeviceGet
1344 \end_layout
1345
1346 \begin_layout Itemize
1347 cuCtxCreate
1348 \end_layout
1349
1350 \end_deeper
1351 \begin_layout Itemize
1352 Handles release of CUDA context
1353 \end_layout
1354
1355 \begin_deeper
1356 \begin_layout Itemize
1357 cuCtxDestroy
1358 \end_layout
1359
1360 \end_deeper
1361 \end_deeper
1362 \begin_layout Standard
1363 \begin_inset listings
1364 inline false
1365 status open
1366
1367 \begin_layout Plain Layout
1368
1369 with CUDA.Autoinit;
1370 \end_layout
1371
1372 \end_inset
1373
1374
1375 \end_layout
1376
1377 \begin_layout Standard
1378 \begin_inset ERT
1379 status open
1380
1381 \begin_layout Plain Layout
1382
1383
1384 \backslash
1385 end{frame}
1386 \end_layout
1387
1388 \end_inset
1389
1390
1391 \end_layout
1392
1393 \begin_layout Standard
1394 \begin_inset ERT
1395 status open
1396
1397 \begin_layout Plain Layout
1398
1399
1400 \backslash
1401 begin{frame}[fragile]
1402 \backslash
1403 frametitle{Source-Modules}
1404 \end_layout
1405
1406 \end_inset
1407
1408
1409 \end_layout
1410
1411 \begin_layout Block
1412 \begin_inset ERT
1413 status collapsed
1414
1415 \begin_layout Plain Layout
1416
1417 {
1418 \end_layout
1419
1420 \end_inset
1421
1422
1423 \family typewriter
1424 Functionality
1425 \family default
1426
1427 \begin_inset ERT
1428 status collapsed
1429
1430 \begin_layout Plain Layout
1431
1432 }
1433 \end_layout
1434
1435 \end_inset
1436
1437
1438 \end_layout
1439
1440 \begin_deeper
1441 \begin_layout Itemize
1442 Used to define CUDA kernels 
1443 \begin_inset Quotes eld
1444 \end_inset
1445
1446 inline
1447 \begin_inset Quotes erd
1448 \end_inset
1449
1450
1451 \end_layout
1452
1453 \end_deeper
1454 \begin_layout Standard
1455 \begin_inset listings
1456 inline false
1457 status open
1458
1459 \begin_layout Plain Layout
1460
1461 N   : constant                    := 32 * 1024;
1462 \end_layout
1463
1464 \begin_layout Plain Layout
1465
1466 Src : Compiler.Source_Module_Type :=
1467 \end_layout
1468
1469 \begin_layout Plain Layout
1470
1471  Compiler.Create
1472 \end_layout
1473
1474 \begin_layout Plain Layout
1475
1476   (Preamble  => "#define N" & N'Img,
1477 \end_layout
1478
1479 \begin_layout Plain Layout
1480
1481    Operation => "__global__ void add(float *a, float *b) {" 
1482 \end_layout
1483
1484 \begin_layout Plain Layout
1485
1486     & "  int tid = blockIdx.x;"                         
1487 \end_layout
1488
1489 \begin_layout Plain Layout
1490
1491     & "  while (tid < N) {"
1492 \end_layout
1493
1494 \begin_layout Plain Layout
1495
1496     & "    b[tid] = a[tid] + 10;"                       
1497 \end_layout
1498
1499 \begin_layout Plain Layout
1500
1501     & "    tid += gridDim.x;"
1502 \end_layout
1503
1504 \begin_layout Plain Layout
1505
1506     & "}}");
1507 \end_layout
1508
1509 \end_inset
1510
1511
1512 \end_layout
1513
1514 \begin_layout Standard
1515 \begin_inset ERT
1516 status open
1517
1518 \begin_layout Plain Layout
1519
1520
1521 \backslash
1522 end{frame}
1523 \end_layout
1524
1525 \end_inset
1526
1527
1528 \end_layout
1529
1530 \begin_layout Standard
1531 \begin_inset ERT
1532 status open
1533
1534 \begin_layout Plain Layout
1535
1536
1537 \backslash
1538 begin{frame}[fragile]
1539 \backslash
1540 frametitle{JIT-Compiler}
1541 \end_layout
1542
1543 \end_inset
1544
1545
1546 \end_layout
1547
1548 \begin_layout Block
1549 \begin_inset ERT
1550 status collapsed
1551
1552 \begin_layout Plain Layout
1553
1554 {
1555 \end_layout
1556
1557 \end_inset
1558
1559
1560 \family typewriter
1561 Functionality
1562 \family default
1563
1564 \begin_inset ERT
1565 status collapsed
1566
1567 \begin_layout Plain Layout
1568
1569 }
1570 \end_layout
1571
1572 \end_inset
1573
1574
1575 \end_layout
1576
1577 \begin_deeper
1578 \begin_layout Itemize
1579 Compile CUDA kernels at runtime
1580 \end_layout
1581
1582 \begin_layout Itemize
1583 Uses 
1584 \family typewriter
1585 nvcc
1586 \family default
1587  to generate CUBIN binary code
1588 \end_layout
1589
1590 \begin_layout Itemize
1591 Upload modules to GPU
1592 \end_layout
1593
1594 \begin_layout Itemize
1595 Caching of compiled modules
1596 \end_layout
1597
1598 \end_deeper
1599 \begin_layout Standard
1600 \begin_inset listings
1601 inline false
1602 status open
1603
1604 \begin_layout Plain Layout
1605
1606    ...
1607 \end_layout
1608
1609 \begin_layout Plain Layout
1610
1611 is
1612 \end_layout
1613
1614 \begin_layout Plain Layout
1615
1616    Module : Compiler.Module_Type; 
1617 \end_layout
1618
1619 \begin_layout Plain Layout
1620
1621 begin
1622 \end_layout
1623
1624 \begin_layout Plain Layout
1625
1626    Module := Compiler.Compile (Source => Src); 
1627 \end_layout
1628
1629 \begin_layout Plain Layout
1630
1631    ...
1632  
1633 \end_layout
1634
1635 \end_inset
1636
1637
1638 \end_layout
1639
1640 \begin_layout Standard
1641 \begin_inset ERT
1642 status open
1643
1644 \begin_layout Plain Layout
1645
1646
1647 \backslash
1648 end{frame}
1649 \end_layout
1650
1651 \end_inset
1652
1653
1654 \end_layout
1655
1656 \begin_layout BeginFrame
1657 JIT Workflow
1658 \end_layout
1659
1660 \begin_layout Standard
1661 \align center
1662 \begin_inset Graphics
1663         filename jit-compiler.eps
1664         scale 53
1665
1666 \end_inset
1667
1668
1669 \end_layout
1670
1671 \begin_layout EndFrame
1672
1673 \end_layout
1674
1675 \begin_layout Standard
1676 \begin_inset ERT
1677 status open
1678
1679 \begin_layout Plain Layout
1680
1681
1682 \backslash
1683 begin{frame}[fragile]
1684 \backslash
1685 frametitle{Calling a function}
1686 \end_layout
1687
1688 \end_inset
1689
1690
1691 \end_layout
1692
1693 \begin_layout Standard
1694 \begin_inset listings
1695 inline false
1696 status open
1697
1698 \begin_layout Plain Layout
1699
1700    ...
1701 \end_layout
1702
1703 \begin_layout Plain Layout
1704
1705    Func   : Compiler.Function_Type;
1706 \end_layout
1707
1708 \begin_layout Plain Layout
1709
1710    Module : Compiler.Module_Type; 
1711 \end_layout
1712
1713 \begin_layout Plain Layout
1714
1715 begin
1716 \end_layout
1717
1718 \begin_layout Plain Layout
1719
1720    Module := Compiler.Compile (Source => Src); 
1721 \end_layout
1722
1723 \begin_layout Plain Layout
1724
1725    Func   := Compiler.Get_Function
1726 \end_layout
1727
1728 \begin_layout Plain Layout
1729
1730      (Module => Module,
1731 \end_layout
1732
1733 \begin_layout Plain Layout
1734
1735       Name   => "add");
1736 \end_layout
1737
1738 \begin_layout Plain Layout
1739
1740 \end_layout
1741
1742 \begin_layout Plain Layout
1743
1744    Func.Call 
1745 \end_layout
1746
1747 \begin_layout Plain Layout
1748
1749      (Args =>
1750 \end_layout
1751
1752 \begin_layout Plain Layout
1753
1754         (1 => In_Arg (Data => A),
1755 \end_layout
1756
1757 \begin_layout Plain Layout
1758
1759          2 => In_Arg (Data => B),
1760 \end_layout
1761
1762 \begin_layout Plain Layout
1763
1764          3 => Out_Arg (Data => C'Access)));
1765 \end_layout
1766
1767 \end_inset
1768
1769
1770 \end_layout
1771
1772 \begin_layout Standard
1773 \begin_inset ERT
1774 status open
1775
1776 \begin_layout Plain Layout
1777
1778
1779 \backslash
1780 end{frame}
1781 \end_layout
1782
1783 \end_inset
1784
1785
1786 \end_layout
1787
1788 \begin_layout BeginFrame
1789 Kernel arguments
1790 \end_layout
1791
1792 \begin_layout Block
1793 \begin_inset ERT
1794 status collapsed
1795
1796 \begin_layout Plain Layout
1797
1798 {
1799 \end_layout
1800
1801 \end_inset
1802
1803
1804 \family typewriter
1805 Functionality
1806 \family default
1807
1808 \begin_inset ERT
1809 status collapsed
1810
1811 \begin_layout Plain Layout
1812
1813 }
1814 \end_layout
1815
1816 \end_inset
1817
1818
1819 \end_layout
1820
1821 \begin_deeper
1822 \begin_layout Itemize
1823 Take care of device memory handling (allocation / freeing)
1824 \end_layout
1825
1826 \begin_layout Itemize
1827 Copy data from host to device and from device back to host
1828 \end_layout
1829
1830 \begin_layout Itemize
1831 Completely transparent to users of CUDA/Ada
1832 \end_layout
1833
1834 \begin_layout Itemize
1835 Implemented using Ada generics
1836 \end_layout
1837
1838 \begin_layout Itemize
1839 Three different argument types: 
1840 \end_layout
1841
1842 \begin_deeper
1843 \begin_layout Itemize
1844
1845 \shape italic
1846 In
1847 \end_layout
1848
1849 \begin_layout Itemize
1850
1851 \shape italic
1852 Out
1853 \end_layout
1854
1855 \begin_layout Itemize
1856
1857 \shape italic
1858 InOut
1859 \end_layout
1860
1861 \end_deeper
1862 \begin_layout Itemize
1863 Similar to Ada's formal parameter modes (in, out, in out)
1864 \end_layout
1865
1866 \end_deeper
1867 \begin_layout EndFrame
1868
1869 \end_layout
1870
1871 \begin_layout Standard
1872 \begin_inset ERT
1873 status open
1874
1875 \begin_layout Plain Layout
1876
1877
1878 \backslash
1879 begin{frame}[fragile]
1880 \backslash
1881 frametitle{Kernel arguments II}
1882 \end_layout
1883
1884 \end_inset
1885
1886
1887 \end_layout
1888
1889 \begin_layout Standard
1890 \begin_inset listings
1891 inline false
1892 status open
1893
1894 \begin_layout Plain Layout
1895
1896 generic
1897 \end_layout
1898
1899 \begin_layout Plain Layout
1900
1901    type Data_Type (<>) is private;
1902 \end_layout
1903
1904 \begin_layout Plain Layout
1905
1906 package Arg_Creators is
1907 \end_layout
1908
1909 \begin_layout Plain Layout
1910
1911    function In_Arg (Data : Data_Type) return Arg_Type;
1912 \end_layout
1913
1914 \begin_layout Plain Layout
1915
1916 \end_layout
1917
1918 \begin_layout Plain Layout
1919
1920    function Out_Arg (Data : not null access Data_Type) return Arg_Type;
1921 \end_layout
1922
1923 \begin_layout Plain Layout
1924
1925 \end_layout
1926
1927 \begin_layout Plain Layout
1928
1929    function In_Out_Arg (Data : not null access Data_Type) return Arg_Type;
1930 \end_layout
1931
1932 \begin_layout Plain Layout
1933
1934 end Arg_Creators;
1935 \end_layout
1936
1937 \end_inset
1938
1939
1940 \end_layout
1941
1942 \begin_layout Standard
1943 \begin_inset ERT
1944 status open
1945
1946 \begin_layout Plain Layout
1947
1948
1949 \backslash
1950 end{frame}
1951 \end_layout
1952
1953 \end_inset
1954
1955
1956 \end_layout
1957
1958 \begin_layout Standard
1959 \begin_inset ERT
1960 status open
1961
1962 \begin_layout Plain Layout
1963
1964
1965 \backslash
1966 begin{frame}[fragile]
1967 \backslash
1968 frametitle{Kernel arguments III}
1969 \end_layout
1970
1971 \end_inset
1972
1973
1974 \end_layout
1975
1976 \begin_layout Standard
1977 \begin_inset listings
1978 inline false
1979 status open
1980
1981 \begin_layout Plain Layout
1982
1983    ...
1984 \end_layout
1985
1986 \begin_layout Plain Layout
1987
1988    package Matrix_Args is new CUDA.Compiler.Arg_Creators
1989 \end_layout
1990
1991 \begin_layout Plain Layout
1992
1993      (Data_Type => Ada.Numerics.Real_Arrays.Real_Matrix);
1994 \end_layout
1995
1996 \begin_layout Plain Layout
1997
1998    use Matrix_Args;
1999 \end_layout
2000
2001 \begin_layout Plain Layout
2002
2003 \end_layout
2004
2005 \begin_layout Plain Layout
2006
2007    Matrix : Ada.Numerics.Real_Arrays.Real_Matrix
2008 \end_layout
2009
2010 \begin_layout Plain Layout
2011
2012      := (1 ..
2013  N => (1 ..
2014  N => 0.0));
2015 \end_layout
2016
2017 \begin_layout Plain Layout
2018
2019    Arg    : CUDA.Compiler.Arg_Type
2020 \end_layout
2021
2022 \begin_layout Plain Layout
2023
2024      := In_Arg (Data => Matrix);
2025 \end_layout
2026
2027 \begin_layout Plain Layout
2028
2029 begin
2030 \end_layout
2031
2032 \begin_layout Plain Layout
2033
2034    ...
2035 \end_layout
2036
2037 \end_inset
2038
2039
2040 \end_layout
2041
2042 \begin_layout Standard
2043 \begin_inset ERT
2044 status open
2045
2046 \begin_layout Plain Layout
2047
2048
2049 \backslash
2050 end{frame}
2051 \end_layout
2052
2053 \end_inset
2054
2055
2056 \end_layout
2057
2058 \begin_layout Standard
2059 \begin_inset ERT
2060 status open
2061
2062 \begin_layout Plain Layout
2063
2064
2065 \backslash
2066 begin{frame}[fragile]
2067 \backslash
2068 frametitle{Kernel arguments IV}
2069 \end_layout
2070
2071 \end_inset
2072
2073
2074 \end_layout
2075
2076 \begin_layout Standard
2077
2078 \family typewriter
2079 \series bold
2080 Kernel signature
2081 \end_layout
2082
2083 \begin_layout Standard
2084 \begin_inset listings
2085 lstparams "language=C"
2086 inline false
2087 status open
2088
2089 \begin_layout Plain Layout
2090
2091 void mul(float* A, float* B, float* C)
2092 \end_layout
2093
2094 \end_inset
2095
2096
2097 \end_layout
2098
2099 \begin_layout Standard
2100
2101 \family typewriter
2102 \series bold
2103 CUDA/Ada call
2104 \end_layout
2105
2106 \begin_layout Standard
2107 \begin_inset listings
2108 inline false
2109 status open
2110
2111 \begin_layout Plain Layout
2112
2113    Func.Call 
2114 \end_layout
2115
2116 \begin_layout Plain Layout
2117
2118      (Args =>
2119 \end_layout
2120
2121 \begin_layout Plain Layout
2122
2123         (1 => In_Arg  (Data => A),
2124 \end_layout
2125
2126 \begin_layout Plain Layout
2127
2128          2 => In_Arg  (Data => B),
2129 \end_layout
2130
2131 \begin_layout Plain Layout
2132
2133          3 => Out_Arg (Data => C'Access)));
2134 \end_layout
2135
2136 \end_inset
2137
2138
2139 \end_layout
2140
2141 \begin_layout Standard
2142 \begin_inset ERT
2143 status open
2144
2145 \begin_layout Plain Layout
2146
2147
2148 \backslash
2149 end{frame}
2150 \end_layout
2151
2152 \end_inset
2153
2154
2155 \end_layout
2156
2157 \begin_layout Standard
2158 \begin_inset ERT
2159 status open
2160
2161 \begin_layout Plain Layout
2162
2163
2164 \backslash
2165 begin{frame}[fragile]
2166 \backslash
2167 frametitle{Error handling}
2168 \end_layout
2169
2170 \end_inset
2171
2172
2173 \end_layout
2174
2175 \begin_layout Block
2176 \begin_inset ERT
2177 status collapsed
2178
2179 \begin_layout Plain Layout
2180
2181 {
2182 \end_layout
2183
2184 \end_inset
2185
2186
2187 \family typewriter
2188 Functionality
2189 \family default
2190
2191 \begin_inset ERT
2192 status collapsed
2193
2194 \begin_layout Plain Layout
2195
2196 }
2197 \end_layout
2198
2199 \end_inset
2200
2201
2202 \end_layout
2203
2204 \begin_deeper
2205 \begin_layout Itemize
2206 Translation of CUDA errors to Ada exceptions
2207 \end_layout
2208
2209 \begin_layout Itemize
2210 Automated error checking of all low-level Thin-Binding calls
2211 \end_layout
2212
2213 \begin_layout Itemize
2214 Resolution of error code to error message 
2215 \end_layout
2216
2217 \end_deeper
2218 \begin_layout ExampleBlock
2219 \begin_inset ERT
2220 status collapsed
2221
2222 \begin_layout Plain Layout
2223
2224 {
2225 \end_layout
2226
2227 \end_inset
2228
2229 Requesting a nonexistent kernel 'Matrix_Mul'
2230 \begin_inset ERT
2231 status collapsed
2232
2233 \begin_layout Plain Layout
2234
2235 }
2236 \end_layout
2237
2238 \end_inset
2239
2240
2241 \end_layout
2242
2243 \begin_deeper
2244 \begin_layout Standard
2245 \begin_inset ERT
2246 status open
2247
2248 \begin_layout Plain Layout
2249
2250
2251 \backslash
2252 begin{verbatim}
2253 \end_layout
2254
2255 \begin_layout Plain Layout
2256
2257 Execution terminated by unhandled exception
2258 \end_layout
2259
2260 \begin_layout Plain Layout
2261
2262 Exception name: CUDA.CUDA_ERROR
2263 \end_layout
2264
2265 \begin_layout Plain Layout
2266
2267 Message: Could not get function Matrix_Mul (Not found)
2268 \end_layout
2269
2270 \begin_layout Plain Layout
2271
2272 Call stack traceback locations:
2273 \end_layout
2274
2275 \begin_layout Plain Layout
2276
2277 0x4079b6 0x40bc3a 0x406a4b 0x406299
2278 \end_layout
2279
2280 \begin_layout Plain Layout
2281
2282 0x7f159e4afc4b 0x405bd7
2283 \end_layout
2284
2285 \begin_layout Plain Layout
2286
2287
2288 \backslash
2289 end{verbatim}
2290 \end_layout
2291
2292 \end_inset
2293
2294
2295 \end_layout
2296
2297 \end_deeper
2298 \begin_layout Standard
2299 \begin_inset ERT
2300 status open
2301
2302 \begin_layout Plain Layout
2303
2304
2305 \backslash
2306 end{frame}
2307 \end_layout
2308
2309 \end_inset
2310
2311
2312 \end_layout
2313
2314 \begin_layout Standard
2315 \begin_inset ERT
2316 status open
2317
2318 \begin_layout Plain Layout
2319
2320
2321 \backslash
2322 begin{frame}[fragile]
2323 \backslash
2324 frametitle{Querying CUDA devices}
2325 \end_layout
2326
2327 \end_inset
2328
2329
2330 \end_layout
2331
2332 \begin_layout Block
2333 \begin_inset ERT
2334 status collapsed
2335
2336 \begin_layout Plain Layout
2337
2338 {
2339 \end_layout
2340
2341 \end_inset
2342
2343
2344 \family typewriter
2345 Functionality
2346 \family default
2347
2348 \begin_inset ERT
2349 status collapsed
2350
2351 \begin_layout Plain Layout
2352
2353 }
2354 \end_layout
2355
2356 \end_inset
2357
2358
2359 \end_layout
2360
2361 \begin_deeper
2362 \begin_layout Itemize
2363 Enumerate all available CUDA devices
2364 \end_layout
2365
2366 \begin_layout Itemize
2367 Query device properties like name, compute capability, etc.
2368 \end_layout
2369
2370 \end_deeper
2371 \begin_layout Separator
2372
2373 \end_layout
2374
2375 \begin_layout Standard
2376 \begin_inset listings
2377 inline false
2378 status open
2379
2380 \begin_layout Plain Layout
2381
2382 with Ada.Text_IO;
2383 \end_layout
2384
2385 \begin_layout Plain Layout
2386
2387 with CUDA.Driver; use CUDA.Driver;
2388 \end_layout
2389
2390 \begin_layout Plain Layout
2391
2392 \end_layout
2393
2394 \begin_layout Plain Layout
2395
2396 procedure Enum_Devices is
2397 \end_layout
2398
2399 \begin_layout Plain Layout
2400
2401    procedure Print_Name (Dev : Device_Type) is
2402 \end_layout
2403
2404 \begin_layout Plain Layout
2405
2406    begin
2407 \end_layout
2408
2409 \begin_layout Plain Layout
2410
2411       Ada.Text_IO.Put_Line (Name (Dev));
2412 \end_layout
2413
2414 \begin_layout Plain Layout
2415
2416    end Print_Name;
2417 \end_layout
2418
2419 \begin_layout Plain Layout
2420
2421 begin
2422 \end_layout
2423
2424 \begin_layout Plain Layout
2425
2426    Iterate (Process => Print_Name'Access);
2427 \end_layout
2428
2429 \begin_layout Plain Layout
2430
2431 end Enum_Devices;
2432 \end_layout
2433
2434 \end_inset
2435
2436
2437 \end_layout
2438
2439 \begin_layout Standard
2440 \begin_inset ERT
2441 status open
2442
2443 \begin_layout Plain Layout
2444
2445
2446 \backslash
2447 end{frame}
2448 \end_layout
2449
2450 \end_inset
2451
2452
2453 \end_layout
2454
2455 \begin_layout Section
2456 Conclusion
2457 \end_layout
2458
2459 \begin_layout Subsection
2460 Performance
2461 \end_layout
2462
2463 \begin_layout BeginFrame
2464 Performance
2465 \end_layout
2466
2467 \begin_layout Block
2468 \begin_inset ERT
2469 status collapsed
2470
2471 \begin_layout Plain Layout
2472
2473 {
2474 \end_layout
2475
2476 \end_inset
2477
2478
2479 \family typewriter
2480 Ambition
2481 \family default
2482
2483 \begin_inset ERT
2484 status collapsed
2485
2486 \begin_layout Plain Layout
2487
2488 }
2489 \end_layout
2490
2491 \end_inset
2492
2493
2494 \end_layout
2495
2496 \begin_deeper
2497 \begin_layout Quote
2498 Programs using CUDA/Ada should reach 
2499 \begin_inset Quotes eld
2500 \end_inset
2501
2502 native
2503 \begin_inset Quotes erd
2504 \end_inset
2505
2506  CUDA speed.
2507  The overhead imposed by Ada should be kept minimal.
2508 \end_layout
2509
2510 \end_deeper
2511 \begin_layout Separator
2512
2513 \end_layout
2514
2515 \begin_layout Block
2516 \begin_inset ERT
2517 status collapsed
2518
2519 \begin_layout Plain Layout
2520
2521 {
2522 \end_layout
2523
2524 \end_inset
2525
2526
2527 \family typewriter
2528 Measurement
2529 \family default
2530  methodology
2531 \begin_inset ERT
2532 status collapsed
2533
2534 \begin_layout Plain Layout
2535
2536 }
2537 \end_layout
2538
2539 \end_inset
2540
2541
2542 \end_layout
2543
2544 \begin_deeper
2545 \begin_layout Itemize
2546 Cumulated runtime of 20 matrix multiplications (512 x 512)
2547 \end_layout
2548
2549 \begin_layout Itemize
2550 Comparison of different implementations:
2551 \end_layout
2552
2553 \begin_deeper
2554 \begin_layout Itemize
2555 Ada (CPU)
2556 \end_layout
2557
2558 \begin_layout Itemize
2559 CUDA Runtime API
2560 \end_layout
2561
2562 \begin_layout Itemize
2563 CUDA Driver API
2564 \end_layout
2565
2566 \begin_layout Itemize
2567 CUDA/Ada
2568 \end_layout
2569
2570 \end_deeper
2571 \begin_layout Itemize
2572 Same grid and block size for all CUDA implementations
2573 \end_layout
2574
2575 \end_deeper
2576 \begin_layout BeginFrame
2577 Performance II
2578 \end_layout
2579
2580 \begin_layout Standard
2581 \begin_inset Float table
2582 wide false
2583 sideways false
2584 status open
2585
2586 \begin_layout Plain Layout
2587 \align center
2588 \begin_inset Tabular
2589 <lyxtabular version="3" rows="7" columns="2">
2590 <features tabularvalignment="middle">
2591 <column alignment="center" valignment="top" width="0">
2592 <column alignment="center" valignment="top" width="0pt">
2593 <row>
2594 <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
2595 \begin_inset Text
2596
2597 \begin_layout Plain Layout
2598 Processor
2599 \end_layout
2600
2601 \end_inset
2602 </cell>
2603 <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
2604 \begin_inset Text
2605
2606 \begin_layout Plain Layout
2607 AMD Phenom II X4 940
2608 \end_layout
2609
2610 \end_inset
2611 </cell>
2612 </row>
2613 <row>
2614 <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
2615 \begin_inset Text
2616
2617 \begin_layout Plain Layout
2618 Graphics Card
2619 \end_layout
2620
2621 \end_inset
2622 </cell>
2623 <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
2624 \begin_inset Text
2625
2626 \begin_layout Plain Layout
2627 GeForce GTX 560 Ti
2628 \end_layout
2629
2630 \end_inset
2631 </cell>
2632 </row>
2633 <row>
2634 <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
2635 \begin_inset Text
2636
2637 \begin_layout Plain Layout
2638 Operating System
2639 \end_layout
2640
2641 \end_inset
2642 </cell>
2643 <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
2644 \begin_inset Text
2645
2646 \begin_layout Plain Layout
2647 Debian Linux 6.0
2648 \end_layout
2649
2650 \end_inset
2651 </cell>
2652 </row>
2653 <row>
2654 <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
2655 \begin_inset Text
2656
2657 \begin_layout Plain Layout
2658 Kernel
2659 \end_layout
2660
2661 \end_inset
2662 </cell>
2663 <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
2664 \begin_inset Text
2665
2666 \begin_layout Plain Layout
2667  2.6.32-5-amd64
2668 \end_layout
2669
2670 \end_inset
2671 </cell>
2672 </row>
2673 <row>
2674 <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
2675 \begin_inset Text
2676
2677 \begin_layout Plain Layout
2678 Ada Compiler
2679 \end_layout
2680
2681 \end_inset
2682 </cell>
2683 <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
2684 \begin_inset Text
2685
2686 \begin_layout Plain Layout
2687 FSF GNAT 4.4.5
2688 \end_layout
2689
2690 \end_inset
2691 </cell>
2692 </row>
2693 <row>
2694 <cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none">
2695 \begin_inset Text
2696
2697 \begin_layout Plain Layout
2698 NVIDIA Graphics Driver
2699 \end_layout
2700
2701 \end_inset
2702 </cell>
2703 <cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none">
2704 \begin_inset Text
2705
2706 \begin_layout Plain Layout
2707 270.41.19, Linux 64-bit
2708 \end_layout
2709
2710 \end_inset
2711 </cell>
2712 </row>
2713 <row>
2714 <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" usebox="none">
2715 \begin_inset Text
2716
2717 \begin_layout Plain Layout
2718 NVIDIA CUDA Toolkit
2719 \end_layout
2720
2721 \end_inset
2722 </cell>
2723 <cell alignment="center" valignment="top" topline="true" bottomline="true" leftline="true" rightline="true" usebox="none">
2724 \begin_inset Text
2725
2726 \begin_layout Plain Layout
2727 4.0.17, Linux 64-bit
2728 \end_layout
2729
2730 \end_inset
2731 </cell>
2732 </row>
2733 </lyxtabular>
2734
2735 \end_inset
2736
2737
2738 \begin_inset Caption
2739
2740 \begin_layout Plain Layout
2741 Test system specs
2742 \end_layout
2743
2744 \end_inset
2745
2746
2747 \end_layout
2748
2749 \end_inset
2750
2751
2752 \end_layout
2753
2754 \begin_layout EndFrame
2755
2756 \end_layout
2757
2758 \begin_layout BeginFrame
2759 Benchmarking results
2760 \end_layout
2761
2762 \begin_layout Standard
2763 \align center
2764 \begin_inset Graphics
2765         filename performance-chart.pdf
2766         scale 76
2767
2768 \end_inset
2769
2770
2771 \end_layout
2772
2773 \begin_layout EndFrame
2774
2775 \end_layout
2776
2777 \begin_layout BeginFrame
2778 Performance III
2779 \end_layout
2780
2781 \begin_layout Block
2782 \begin_inset ERT
2783 status collapsed
2784
2785 \begin_layout Plain Layout
2786
2787 {
2788 \end_layout
2789
2790 \end_inset
2791
2792
2793 \family typewriter
2794 Interpretation
2795 \family default
2796
2797 \begin_inset ERT
2798 status collapsed
2799
2800 \begin_layout Plain Layout
2801
2802 }
2803 \end_layout
2804
2805 \end_inset
2806
2807
2808 \end_layout
2809
2810 \begin_deeper
2811 \begin_layout Itemize
2812 Native Ada (CPU) is slow: CPU vs.
2813  GPU/CUDA
2814 \end_layout
2815
2816 \begin_layout Itemize
2817 CUDA/Ada is faster than CUDA Runtime API:
2818 \end_layout
2819
2820 \begin_deeper
2821 \begin_layout Itemize
2822 CUDA Runtime API generates management as well as kernel launch code, etc.
2823 \end_layout
2824
2825 \begin_layout Itemize
2826 CUDA/Ada performs bare minimum of management operations
2827 \end_layout
2828
2829 \end_deeper
2830 \begin_layout Itemize
2831 CUDA/Ada is negligibly slower than CUDA Driver API
2832 \end_layout
2833
2834 \begin_layout Itemize
2835 No visible performance penalty
2836 \end_layout
2837
2838 \end_deeper
2839 \begin_layout EndFrame
2840
2841 \end_layout
2842
2843 \begin_layout Subsection
2844 Review
2845 \end_layout
2846
2847 \begin_layout BeginFrame
2848 Review
2849 \end_layout
2850
2851 \begin_layout Block
2852 \begin_inset ERT
2853 status collapsed
2854
2855 \begin_layout Plain Layout
2856
2857 {
2858 \end_layout
2859
2860 \end_inset
2861
2862 Assessment of results
2863 \begin_inset ERT
2864 status collapsed
2865
2866 \begin_layout Plain Layout
2867
2868 }
2869 \end_layout
2870
2871 \end_inset
2872
2873
2874 \end_layout
2875
2876 \begin_deeper
2877 \begin_layout Itemize
2878 Thick-Binding provides easy usage of CUDA from Ada
2879 \end_layout
2880
2881 \begin_layout Itemize
2882 High level abstractions go well with other Ada language constructs
2883 \end_layout
2884
2885 \begin_layout Itemize
2886 Simple Autoinitialization of CUDA via 
2887 \family typewriter
2888 Autoinit
2889 \family default
2890  package
2891 \end_layout
2892
2893 \begin_layout Itemize
2894 JIT compilation of kernels provides flexibilty and agility
2895 \end_layout
2896
2897 \begin_layout Itemize
2898 Speed of CUDA/Ada is excellent
2899 \end_layout
2900
2901 \begin_layout Itemize
2902 Automated Thin-Binding generation allows for easy integration of future
2903  CUDA API changes
2904 \end_layout
2905
2906 \begin_layout Itemize
2907 Paper documents how to write a modern, maintainable language binding for
2908  Ada in general
2909 \end_layout
2910
2911 \end_deeper
2912 \begin_layout EndFrame
2913
2914 \end_layout
2915
2916 \begin_layout BeginFrame
2917 Review II
2918 \end_layout
2919
2920 \begin_layout Block
2921 \begin_inset ERT
2922 status collapsed
2923
2924 \begin_layout Plain Layout
2925
2926 {
2927 \end_layout
2928
2929 \end_inset
2930
2931 Deliverables
2932 \begin_inset ERT
2933 status collapsed
2934
2935 \begin_layout Plain Layout
2936
2937 }
2938 \end_layout
2939
2940 \end_inset
2941
2942
2943 \end_layout
2944
2945 \begin_deeper
2946 \begin_layout Itemize
2947 Paper
2948 \end_layout
2949
2950 \begin_layout Itemize
2951 Slides
2952 \end_layout
2953
2954 \begin_layout Itemize
2955 Source code (GPLv3+)
2956 \family typewriter
2957
2958 \begin_inset Newline newline
2959 \end_inset
2960
2961
2962 \family default
2963
2964 \begin_inset Flex URL
2965 status collapsed
2966
2967 \begin_layout Plain Layout
2968
2969 http://git.codelabs.ch/?p=cuda-ada.git
2970 \end_layout
2971
2972 \end_inset
2973
2974
2975 \family typewriter
2976
2977 \begin_inset Newline newline
2978 \end_inset
2979
2980 git clone http://git.codelabs.ch/git/cuda-ada.git
2981 \end_layout
2982
2983 \begin_layout Itemize
2984 Website with all documents and additional information: 
2985 \begin_inset Flex URL
2986 status collapsed
2987
2988 \begin_layout Plain Layout
2989
2990 http://www.codelabs.ch/cuda-ada/
2991 \end_layout
2992
2993 \end_inset
2994
2995
2996 \end_layout
2997
2998 \end_deeper
2999 \begin_layout EndFrame
3000
3001 \end_layout
3002
3003 \begin_layout Subsection
3004 Outlook
3005 \end_layout
3006
3007 \begin_layout BeginFrame
3008 Outlook
3009 \end_layout
3010
3011 \begin_layout Block
3012 \begin_inset ERT
3013 status collapsed
3014
3015 \begin_layout Plain Layout
3016
3017 {
3018 \end_layout
3019
3020 \end_inset
3021
3022 CUDA/Ada
3023 \begin_inset ERT
3024 status collapsed
3025
3026 \begin_layout Plain Layout
3027
3028 }
3029 \end_layout
3030
3031 \end_inset
3032
3033
3034 \end_layout
3035
3036 \begin_deeper
3037 \begin_layout Itemize
3038 Implement additional features:
3039 \end_layout
3040
3041 \begin_deeper
3042 \begin_layout Itemize
3043 Abstractions for kernel generation (e.g.
3044  element-wise)
3045 \end_layout
3046
3047 \begin_layout Itemize
3048 Kernel grid- and block size depending on device capabilities
3049 \end_layout
3050
3051 \begin_layout Itemize
3052 Multi-Device support
3053 \end_layout
3054
3055 \begin_layout Itemize
3056 ...
3057 \end_layout
3058
3059 \end_deeper
3060 \begin_layout Itemize
3061 Announce project to Ada community
3062 \end_layout
3063
3064 \end_deeper
3065 \begin_layout Separator
3066
3067 \end_layout
3068
3069 \begin_layout Block
3070 \begin_inset ERT
3071 status collapsed
3072
3073 \begin_layout Plain Layout
3074
3075 {
3076 \end_layout
3077
3078 \end_inset
3079
3080 Ada and CUDA
3081 \begin_inset ERT
3082 status collapsed
3083
3084 \begin_layout Plain Layout
3085
3086 }
3087 \end_layout
3088
3089 \end_inset
3090
3091
3092 \end_layout
3093
3094 \begin_deeper
3095 \begin_layout Itemize
3096 Study NVIDIA's LLVM-based CUDA compiler (announced, not yet released)
3097 \end_layout
3098
3099 \end_deeper
3100 \begin_layout EndFrame
3101
3102 \end_layout
3103
3104 \begin_layout BeginFrame
3105 Questions
3106 \end_layout
3107
3108 \begin_layout Standard
3109 \align center
3110
3111 \size giant
3112 Thank you for your attention!
3113 \end_layout
3114
3115 \begin_layout EndFrame
3116
3117 \end_layout
3118
3119 \end_body
3120 \end_document