c – Program segfaults halfway through when trying to use Duff’s device vs original code

Original code

int i,j,k;
/* loop over all diagonal (pivot) elements */
for (k = 0; k < N; k++) {
    /* for all elements in pivot row and right of pivot element */
    for (j = k + 1; j < N; j++) {
        /* divide by pivot element */
        A[k][j] = A[k][j] / A[k][k];
    }
    /* set pivot element to 1.0 */
    A[k][k] = 1.0; 

    // This nested loop is not relevant to what I am trying to do.
    /* for all elements below pivot row and right of pivot column */
    for (i = k + 1; i < N; i++) {
        for (j = k + 1; j < N; j++) {
            A[i][j] = A[i][j] - A[i][k] * A[k][j];
        }
        A[i][k] = 0.0;
    }
}

“Pointerized” version that works and is faster

float *pivot = &A[0][0];
float *pivot_end = &A[24][24];
float *nlp_end = &A[24][0];
int k = 0;

// Loop over all pivot elements (diagonal elements)
while (pivot != pivot_end)
{
    // For all elements in pivot row and right of pivot element
    float *row_start = pivot + 1;
    float *row_end = pivot + 24 - k;
    float *nlp = row_end;

    float akk = 1.0f / *pivot;
    *pivot = 1.0;
    while (row_start != row_end)
        *row_start++ *= akk;

    // For all elements below pivot row and right of pivot column
    while (nlp != nlp_end)
    {
        nlp += k;
        float *col_start = pivot + 1;
        float aik = *nlp;
        *nlp = 0.0;
        while (col_start != row_end)
        {
            nlp += 1;
            *nlp -= aik * *col_start++;
        }
        nlp += 1;
    }

    pivot += 25;
    k++;
}

Now I am trying to make use of a “Duff’s device” to unroll the inner while-loop and this has been my attempt which segfaults halfway into the algorithm. This code segfaults when k=23see edit below.

#include <stdio.h>

float A[24][24] = {
    {92.00,43.00,86.00,87.00,100.00,21.00,36.00,84.00,30.00,60.00,52.00,69.00,40.00,56.00,104.00,100.00,69.00,78.00,15.00,66.00,1.00,26.00,15.00,88.00},
    {17.00,44.00,14.00,11.00,109.00,24.00,56.00,92.00,67.00,32.00,70.00,57.00,54.00,107.00,32.00,84.00,57.00,84.00,44.00,98.00,31.00,38.00,88.00,101.00},
    {7.00,104.00,57.00,9.00,21.00,72.00,97.00,38.00,7.00,2.00,50.00,6.00,26.00,106.00,99.00,93.00,29.00,59.00,41.00,83.00,56.00,73.00,58.00,4.00},
    {48.00,102.00,102.00,79.00,31.00,81.00,70.00,38.00,75.00,18.00,48.00,96.00,91.00,36.00,25.00,98.00,38.00,75.00,105.00,64.00,72.00,94.00,48.00,101.00},
    {43.00,89.00,75.00,100.00,53.00,23.00,104.00,101.00,16.00,96.00,70.00,47.00,68.00,30.00,86.00,33.00,49.00,24.00,20.00,30.00,61.00,45.00,18.00,99.00},
    {11.00,13.00,54.00,83.00,108.00,102.00,75.00,42.00,82.00,40.00,32.00,25.00,64.00,26.00,16.00,80.00,13.00,87.00,18.00,81.00,8.00,104.00,5.00,57.00},
    {19.00,26.00,87.00,80.00,72.00,106.00,70.00,83.00,10.00,14.00,57.00,8.00,7.00,22.00,50.00,90.00,63.00,83.00,5.00,17.00,109.00,22.00,97.00,13.00},
    {109.00,5.00,95.00,7.00,0.00,101.00,65.00,19.00,17.00,43.00,100.00,90.00,39.00,60.00,63.00,49.00,75.00,10.00,58.00,83.00,33.00,109.00,63.00,96.00},
    {82.00,69.00,3.00,82.00,91.00,101.00,96.00,91.00,107.00,81.00,99.00,108.00,73.00,54.00,18.00,91.00,97.00,8.00,71.00,27.00,69.00,25.00,77.00,34.00},
    {36.00,25.00,8.00,69.00,24.00,71.00,56.00,106.00,30.00,60.00,79.00,12.00,51.00,65.00,103.00,49.00,36.00,93.00,47.00,0.00,37.00,65.00,91.00,25.00},
    {74.00,53.00,53.00,33.00,78.00,20.00,68.00,4.00,45.00,76.00,74.00,70.00,38.00,20.00,67.00,68.00,80.00,36.00,81.00,22.00,101.00,75.00,71.00,28.00},
    {58.00,9.00,28.00,96.00,75.00,10.00,12.00,39.00,63.00,65.00,73.00,31.00,85.00,31.00,36.00,20.00,108.00,0.00,91.00,36.00,20.00,48.00,105.00,101.00},
    {84.00,76.00,13.00,75.00,42.00,85.00,103.00,100.00,94.00,22.00,87.00,60.00,32.00,99.00,100.00,96.00,54.00,63.00,17.00,30.00,95.00,54.00,51.00,93.00},
    {54.00,32.00,19.00,75.00,80.00,15.00,66.00,54.00,92.00,79.00,19.00,24.00,54.00,13.00,15.00,39.00,35.00,102.00,99.00,68.00,92.00,89.00,54.00,36.00},
    {43.00,72.00,66.00,28.00,16.00,7.00,11.00,71.00,39.00,31.00,36.00,10.00,47.00,102.00,64.00,29.00,72.00,83.00,53.00,17.00,97.00,68.00,56.00,22.00},
    {61.00,46.00,91.00,43.00,26.00,35.00,80.00,70.00,108.00,37.00,98.00,14.00,45.00,0.00,86.00,85.00,32.00,12.00,95.00,79.00,5.00,49.00,108.00,77.00},
    {23.00,52.00,95.00,10.00,10.00,42.00,33.00,72.00,89.00,14.00,5.00,5.00,50.00,85.00,76.00,48.00,13.00,64.00,63.00,58.00,65.00,39.00,33.00,97.00},
    {52.00,18.00,67.00,57.00,68.00,65.00,25.00,91.00,7.00,10.00,101.00,18.00,52.00,24.00,90.00,31.00,39.00,96.00,37.00,89.00,72.00,3.00,28.00,85.00},
    {68.00,91.00,33.00,24.00,21.00,67.00,12.00,74.00,86.00,79.00,22.00,44.00,34.00,47.00,25.00,42.00,58.00,17.00,61.00,1.00,41.00,42.00,33.00,81.00},
    {28.00,71.00,60.00,101.00,75.00,89.00,76.00,34.00,71.00,0.00,58.00,92.00,68.00,70.00,57.00,44.00,39.00,79.00,88.00,74.00,16.00,3.00,6.00,75.00},
    {20.00,68.00,77.00,62.00,0.00,0.00,33.00,28.00,72.00,94.00,19.00,37.00,73.00,96.00,71.00,34.00,97.00,20.00,17.00,55.00,91.00,74.00,99.00,21.00},
    {43.00,77.00,95.00,60.00,81.00,102.00,25.00,101.00,60.00,102.00,54.00,60.00,103.00,87.00,89.00,65.00,72.00,109.00,102.00,35.00,96.00,64.00,70.00,83.00},
    {85.00,87.00,28.00,66.00,51.00,18.00,87.00,95.00,96.00,73.00,45.00,67.00,65.00,71.00,59.00,16.00,63.00,3.00,77.00,56.00, 91.00, 56.00,12.00, 53.00},
    {56.00, 5.00, 89.00, 42.00, 70.00, 49.00, 15.00, 45.00,27.00, 44.00, 1.00, 78.00, 63.00, 89.00, 64.00, 49.00,52.00, 109.00, 6.00, 8.00, 70.00, 65.00, 24.00, 24.00}
};

// Reduce matrix A to upper triangular form
void eliminate()
{
    float *pivot = &A[0][0];
    float *pivot_end = &A[24][24];
    float *nlp_end = &A[24][0];
    int k = 0;

    // Loop over all pivot elements (diagonal elements)
    while (pivot != pivot_end)
    {
        // For all elements in pivot row and right of pivot element
        float *row_start = pivot + 1;
        float *row_end = pivot + 24 - k;
        float *nlp = row_end;

        float akk = 1.0f / *pivot;
        *pivot = 1.0;
        int len = row_end - row_start;
        int n = (len + 8 - 1) / 8;
        switch (len % 8)
        {
            case 0: do { *row_start++ *= akk;
            case 7: *row_start++ *= akk;
            case 6: *row_start++ *= akk;
            case 5: *row_start++ *= akk;
            case 4: *row_start++ *= akk;
            case 3: *row_start++ *= akk;
            case 2: *row_start++ *= akk;
            case 1: *row_start++ *= akk;
                        } while (--n > 0);
        }

        // For all elements below pivot row and right of pivot column
        while (nlp != nlp_end)
        {
            nlp += k;
            float *col_start = pivot + 1;
            float aik = *nlp;
            *nlp = 0.0;
            while (col_start != row_end)
            {
                nlp += 1;
                *nlp -= aik * *col_start++;
            }
            nlp += 1;
        }

        pivot += 25;
        k++;
    }
}

void printMatrix()
{
    for (int i = 0; i < 24; ++i)
    {
        for (int j = 0; j < 24; ++j)
            printf("%12g", A[i][j]);
        printf("n");
    }
    printf("n");
}

int main(void)
{
    // printMatrix();
    eliminate();
    printMatrix();
}

EDIT: I have found something interesting.

When printing out the matrix after each iteration I notice that when k=23 (second to last row) the segfault happens.

while(pivot != pivot_end) {
    printMatrix()
    // ... rest of algorithm
}
// where printMatrix() is
void printMatrix()
{
    for (int i = 0; i < 24; ++i)
    {
        for (int j = 0; j < 24; ++j)
            printf("%.2f  ", A[i][j]);
        printf("n");
    }
    printf("n");
}

This is the matrix after iteration where k=22, two iterations left to do of the algorithm. All good so far.

1.00  0.47  0.93  0.95  1.09  0.23  0.39  0.91  0.33  0.65  0.57  0.75  0.43  0.61  1.13  1.09  0.75  0.85  0.16  0.72  0.01  0.28  0.16  0.96  
0.00  1.00  -0.05  -0.14  2.51  0.56  1.37  2.12  1.70  0.58  1.68  1.23  1.29  2.68  0.35  1.82  1.23  1.93  1.14  2.38  0.85  0.92  2.36  2.35  
0.00  0.00  1.00  0.30  -4.30  0.25  -0.78  -3.27  -3.00  -1.09  -2.20  -2.20  -1.92  -3.02  0.99  -1.75  -1.79  -2.54  -1.35  -2.90  -0.54  -0.39  -3.25  -4.30  
0.00  0.00  0.00  1.00  1.60  0.38  -0.37  0.96  4.04  0.29  0.85  3.67  3.20  -0.81  -4.45  0.32  0.53  1.36  3.35  0.68  1.38  1.17  1.93  4.94  
0.00  0.00  0.00  0.00  1.00  0.61  -0.47  0.15  2.49  -0.57  0.36  2.10  1.61  0.20  -2.47  0.96  0.32  1.35  2.21  0.99  0.61  0.89  1.48  2.39  
0.00  0.00  0.00  0.00  0.00  1.00  -4.72  -1.80  11.27  -3.75  0.07  11.37  7.99  -3.54  -15.51  1.25  0.28  3.28  12.26  0.62  3.88  2.64  5.75  11.70  
0.00  0.00  0.00  0.00  0.00  0.00  1.00  -0.11  -2.09  0.71  -0.28  -2.41  -1.50  0.76  3.25  -0.13  -0.41  -0.61  -2.49  0.24  -1.39  0.02  -1.81  -2.58  
0.00  0.00  0.00  0.00  0.00  0.00  0.00  1.00  1.02  0.18  1.06  1.66  1.02  -0.39  -2.03  -0.48  0.86  -0.04  1.50  -0.45  1.90  -0.27  2.15  2.31  
0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  1.00  0.18  0.29  1.06  1.01  -0.23  -1.27  -0.33  -0.02  0.02  1.03  0.31  0.22  0.69  0.32  1.64  
0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  1.00  -0.90  -4.05  2.56  7.08  10.80  2.55  -4.99  8.98  0.51  6.48  -9.81  10.06  -4.08  0.99  
0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  1.00  0.98  -0.77  -2.30  -2.33  -0.43  2.02  -1.16  1.32  -3.19  5.15  -2.17  3.59  -1.06  
0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  1.00  -0.86  -2.19  -1.86  -0.12  0.42  -0.48  0.04  -2.48  3.01  -1.79  0.80  -1.18  
0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  1.00  1.32  -0.86  -0.91  0.54  -2.74  -0.55  2.68  -3.30  -0.33  -1.24  1.90  
0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  1.00  4.63  1.70  -3.16  4.01  -2.17  -1.25  -2.39  2.92  -3.36  0.77  
0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  1.00  0.67  -0.66  1.64  0.50  -0.15  0.08  0.80  0.29  -0.14  
0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  1.00  1.17  9.90  7.48  -1.44  11.56  4.70  8.96  -1.07  
0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  1.00  0.18  1.94  -0.26  2.48  -0.74  2.63  0.48  
0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  1.00  0.46  -0.20  1.08  0.57  0.50  -0.09  
0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  1.00  1.67  -0.63  0.76  1.75  4.98  
0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  1.00  0.64  1.76  1.64  7.26  
0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  1.00  2.58  1.87  8.87  
0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  1.00  0.57  3.10  
0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  -23.26  -242.43  
0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  71.92  0.80

Then when we increment k++ (k now 23) and begin the next iteration and print out the matrix before continuing to the algorithm, it prints out only this and then segfaults. This is half of the first row.

1.00  0.47  0.93  0.95  1.09  0.23  0.39  0.91  0.33  0.65  0.57  0.75  0.43  0.61  1.13  1.09  0.7

I don’t understand why it would segfault while printing out the array… unless something with the array has been corrupted during the previous iteration when k=22.

Leave a Comment