The example illustrates the performance gain of the asynchronous I/O usage in comparison with synchronous I/O usage. In the example, 5.6 MB of data is asynchronously written with the main program computation, which is the scalar multiplication of two vectors with some normalization.
С-source file executing a scalar multiplication:
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
 
double do_compute(double A, double B, int arr_len)
{
  int i;
  double   res = 0;
  double  *xA = malloc(arr_len * sizeof(double));
  double  *xB = malloc(arr_len * sizeof(double));
  if ( !xA || !xB )
   abort();
   for (i = 0; i < arr_len; i++) {
    xA[i] = sin(A);
    xB[i] = cos(B);
    res = res + xA[i]*xA[i];
   }
  free(xA);
  free(xB);
 return res;
}
C-main-source file using asynchronous I/O implementation:
#define DIM_X   123/*123*/
#define DIM_Y   70000
double  aio_dat[DIM_Y /*12MB*/] = {0};
double  aio_dat_tmp[DIM_Y /*12MB*/];
#include <stdio.h>
#include <aio.h>
typedef struct aiocb  aiocb_t;
   aiocb_t   my_aio;
   aiocb_t  *my_aio_list[1] = {&my_aio};
   
int main()
{
  double do_compute(double A, double B, int arr_len);
  int i, j;
  HANDLE fd = CreateFile("aio.dat",
  GENERIC_READ | GENERIC_WRITE,
  FILE_SHARE_READ,
  NULL,
  OPEN_ALWAYS,
  FILE_ATTRIBUTE_NORMAL,
  NULL);
/* Do some complex computation */
for (i = 0; i < DIM_X; i++) {
 for ( j = 0; j < DIM_Y; j++ )
 aio_dat[j] = do_compute(i, j, DIM_X);
 
  if (i) aio_suspend(my_aio_list, 1, 0);
  my_aio.aio_fildes = fd;
  my_aio.aio_buf    = memcpy(aio_dat_tmp, aio_dat, sizeof(aio_dat_tmp));
  my_aio.aio_nbytes = sizeof(aio_dat_tmp);
  my_aio.aio_offset = (intptr_t)-1;
  my_aio.aio_sigevent.sigev_notify = SIGEV_NONE;
  
  if ( aio_write((void*)&my_aio) == -1 ) {
  printf("ERROR!!! %s\n", "aio_write()==-1");
  abort();}
  }
 aio_suspend(my_aio_list, 1, 0);
 return 0;
}
C-main-source file example 2 using asynchronous I/O implementation:
// icx (for C++) dpcpp (for DPC++) -c do_compute.c
// icx (for C++) dpcpp (for DPC++) aio_sample2.c do_compute.obj
// aio_sample2.exe
#define DIM_X   123
#define DIM_Y    70
double  aio_dat[DIM_Y] = {0};
double  aio_dat_tmp[DIM_Y];
static volatile int aio_flg = 1;
#include <aio.h>
typedef struct aiocb  aiocb_t;
aiocb_t               my_aio;
#define WAIT { while (!aio_flg); aio_flg = 0; }
#define aio_OPEN(_fname )\
CreateFile(_fname,                       \
           GENERIC_READ | GENERIC_WRITE, \
           FILE_SHARE_READ,              \
           NULL,                         \
           OPEN_ALWAYS,                  \
           FILE_ATTRIBUTE_NORMAL,        \
           NULL)
static void aio_CompletionRoutine(sigval_t sigval)
{
    aio_flg = 1;
}
int main()
{
    double do_compute(double A, double B, int arr_len);
    int      i, j, res;
    char    *fname = "aio_sample2.dat";
    HANDLE   aio_fildes = aio_OPEN(fname);
    my_aio.aio_fildes = aio_fildes;
    my_aio.aio_nbytes = sizeof(aio_dat_tmp);
    my_aio.aio_sigevent.sigev_notify          = SIGEV_THREAD;
    my_aio.aio_sigevent.sigev_notify_function = aio_CompletionRoutine;
    /*
    ** writing
    */
    my_aio.aio_offset = -1;
    printf("Writing\n");
    for (i = 0; i < DIM_X; i++) {
        for (j = 0; j < DIM_Y; j++)
            aio_dat[j] = do_compute(i, j, DIM_X);
        WAIT;
        my_aio.aio_buf = memcpy(aio_dat_tmp, aio_dat, sizeof(aio_dat_tmp));
        res = aio_write(&my_aio);
        if (res) {printf("res!=0\n");abort();}
    }
    //
    // flushing
    //
    printf("Flushing\n");
    WAIT;
    res = aio_fsync(O_SYNC, &my_aio);
    if (res) {printf("res!=0\n");abort();}
    WAIT;
    //
    // reading
    //
    printf("Reading\n");
    my_aio.aio_offset = 0;
    my_aio.aio_buf    = (volatile char*)aio_dat_tmp;
    for (i = 0; i < DIM_X; i++) {
        aio_read(&my_aio);
        for (j = 0; j < DIM_Y; j++)
            aio_dat[j] = do_compute(i, j, DIM_X);
        WAIT;
        res = aio_return(&my_aio);
        if (res != sizeof(aio_dat)) {
            printf("aio_read() did read %d bytes, expecting %d bytes\n", res, sizeof(aio_dat));
        }
        for (j = 0; j < DIM_Y; j++)
            if ( aio_dat[j] != aio_dat_tmp[j] )
                {printf("ERROR: aio_dat[j] != aio_dat_tmp[j]\n I=%d J=%d\n", i, j); abort();}
        my_aio.aio_offset += my_aio.aio_nbytes;
    }
    CloseHandle(aio_fildes);
    printf("\nDone\n");
return 0;
}