multiplication in Ada, CUDA/Ada and native CUDA C. The benchmarks can be run by
issuing the following command:
- $ make perf COUNT=10
+ $ make perf COUNT=20
-This will print the cumulated execution times of ten successive matrix
-operations for the three different implementations.
+This will print the cumulated execution times of twenty successive matrix
+operations for the different implementations.
-html/*.html
-html/*.css
+html
-a disable-javascript \
-o -
-all: $(DESTDIR)/asciidoc.css $(DESTDIR)/index.html
+all: $(DESTDIR)/asciidoc.css $(DESTDIR)/index.html \
+ $(DESTDIR)/performance-chart.png
$(DESTDIR)/asciidoc.css: asciidoc.css
cp $< $@
$(DESTDIR)/index.html: index page.conf ../README ../src/add.adb
asciidoc $(ASCIIDOC_OPTS) $< > $@
+$(DESTDIR)/performance-chart.png: performance-chart.png
+ cp $< $@
+
article: $(DOCNAME).pdf
$(DOCNAME).pdf: $(DOC)
clean:
@rm -f $(DESTDIR)/*.html
@rm -f $(DESTDIR)/*.css
+ @rm -f $(DESTDIR)/*.png
.PHONY: clean
include::../README[]
+image:performance-chart.png[alt="CUDA/Ada performance"]
+
+The chart shows the cumulated execution times of performing a matrix
+multiplication operation on a 512 by 512 matrix 20 times. All CUDA
+implementations used the same kernel, a grid size of 32 and a block size of 16.
+
Example
-------
[source,ada]