The following example program tpa1 is used in the sections above to show the basic performance statistics that can be collected and displayed without instrumentation of the code.
// file tpa1.c - error checking removed to improve readability
...
// the value of nr_spus is 3
for (i = 0; i < nr_spus; i++) {
spe_context_ptr_t spe;
spe_program_handle_t *tpa1_spu;
unsigned int entry = SPE_DEFAULT_ENTRY;
union {
void *ptr;
unsigned int spe_num;
} t_info;
spe = spe_context_create(0, NULL);
tpa1_spu = spe_image_open("tpa1_spu")
(void)spe_program_load(spe, &tpa1_spu);
printf("Spawning thread: %d\n", i);
t_tinfo.spe_num = i;
(void)spe_context_run(spe, &entry, 0, t_info.ptr, NULL, NULL);
(void)spe_context_detroy(spe);
}
// file tpa1_spu.c
main(unsigned long long id, unsigned long long spe_num)
{
int i;
for( i=0; i<spe_num*3; i++ )
sim_printf("SPE#: %lld, Count: %d\n", spe_num, i);
}