Getting started in C¶

Every data type object in libscientific is stored in the HEAP and then supports dynamic memory allocation.

Hence, every data object such as matrix, vectors, tensors, models, in general, need to be manually allocated/deallocated by the programmer using the library predefined constructs “NewSOMETHING(&…);” and “DelSOMETHING(&…);”.

Hence, every data object is a pointer and needs to pass by reference “&”. To avoid memory fragmentation problems, please, consider deallocating every allocated variable at the end of your program :-)

Compile a program that use libscientific¶

A program that use libscientific requires only one directive as follow:

#include <scientific.h>

and to compile the code with a C or C++ compiler linking with -lscientific and specify the right paths using the -L<library path/of/libscientific and -I<include path/of libscientific> options.

gcc -o example1 -L/usr/local/lib/ -I/usr/local/include -lscientific example1.c

Vector operations¶

Create/Allocate a vector¶

There are four different types of vectors

Double vector: dvector
Integer vector: ivector
Unsigned integer vector: uivector
String vector: strvector

Here we show an example on how to allocate/deallocate these four vector types.

#include <stdio.h>
#include <scientific.h>


int main(void){
  int i;
  /* Define the variable vector, which in this case is a double vector.
   * If we would like to use an integer or an unsigned integer or a 
   * string vector instead this construct became
   * ivector *v; or uivector *v; or strvector *v;
   */

  dvector *v;
  NewDVector(&v, 5); // Allocate the memory space.

  // Fill the value inside the vector
  for(i = 0; i < 5; i++){
    v->data[i] = (float)i;
  }

  // Show the vector values to video
  PrintDVector(v);

  // Free the memory space
  DelDVector(&v);
}

Append a value to a given vector¶

Here we show an example on how to append a value to a vector.

#include <stdio.h>
#include <scientific.h>


int main(void){
  int i;

  /*
   * We define here the double vector. 
   * If we would like to utilize another vector
   * we can change dvector with the other three possibilities:
   * - uivector
   * - ivector
   * - strvector
   *
   */
  dvector *v;

  NewDVector(&v, 5); // We intialize the vector 

  // We add 5 numbers
  for(i = 0; i < 5; i++){
    v->data[i] = (float)i;
  }
 
  // We append a new number
  DVectorAppend(v, 123.4);
  
  // Print to video the result
  PrintDVector(v);

  // Free up the memory
  DelDVector(&v);
}

Work with string vectors¶

Matrix operations¶

Matrix is a user-defined data type that contains: - the number of rows - the number of columns - the 2D data array, which defines the matrix.

The data array is selected explicitly as a double type to work with an extensive range of numbers.

 * - **data** two dimensional array of double
 * - **row** number of rows
 * - **col** number of columns
 */

Create/Allocate a matrix with a specific size¶

Create a simple matrix of 10 rows and 15 columns and fill it with numbers.

Then print it to the terminal using “PrintMatrix();”

#include <stdio.h>
#include <scientific.h>


int main(void)
{
    int i, j;
    matrix *m; // Definition of the pointer matrix variable
    NewMatrix(&m, 10, 15); // Create the matrix with 10 rows and 15 columns. Each value in the matrix is 0.
    for(i = 0; i < 10; i++){
        for(j = 0; j < 15; j++){
            m->data[i][j] = (float)i+j; // Fill the matrix values with these numbers
        }
    }
    PrintMatrix(m); // Print to video the matrix content
    DelMatrix(&m); // Free the memory space
}

Initialize an empty matrix and append a row/column to it¶

An empty matrix is an object with rows and cols equal to 0. However in that matrix object we can add dynamically rows and columns and or resize it later on.

In this example we will initialize an empty matrix and we will add to it several rows.

#include <stdio.h>
#include <scientific.h>

int main(void)
{
    int i;
    matrix *mx; // Definition of the matrix variable as a pointer
    dvector *row; // Definition of the row variable as a pointer 

    NewDVector(&row, 15);
    for(i = 0; i < row->size; i++){
        row->data[i] = (double)i; // Fill one time the row vector 
    }

    initMatrix(&mx); // Initialize the empty matrix with rows and columns equal to 0

    for(i = 0; i < 5; i++){
        MatrixAppendRow(mx, row); // Append 5 times the row to the matrix mx
    }

    PrintMatrix(mx); // Print to video the matrix 

    DelDVector(&row); // Free the memory space for the row vector
    DelMatrix(&mx); // Free the memory space for the matrix
}

Of course the same code can be reused to add a columns using the construct “MatrixAppendCol” instead of “MatrixAppendRow”.

Matrix x Column vector dot product¶

In this example we illustrate the product between a matrix of sizes M x N and a column double vector of size N.

#include <stdio.h>
#include <scientific.h>

int main(void)
{
    int i, j;
    matrix *mx; // Definition of the matrix variable as a pointer
    dvector *cvect; // Definition of the column vector as a pointer 
    dvector *result; // Definition of the result vector between the matrix and the column vector

    NewDVector(&cvect, 15);
    for(i = 0; i < cvect->size; i++){
        cvect->data[i] = (double)i; // Fill one time the row vector 
    }

    NewMatrix(&mx, 23, 15); // Initialize a matrix with 23 rows and 15 columns

    for(i = 0; i < mx->row; i++){
        for(j = 0; j < mx->col; j++){
            mx->data[i][j] = (double)i+j;
        }
    }
    NewDVector(&result, 23);

    MatrixDVectorDotProduct(mx, cvect, result);
    /*
     * or MT_MatrixDVectorDotProduct if you want to run the multitask operation.
     * This function is usefull for large matrix.
     */


    PrintDVector(result); // Print to video the result
    // Free the memory spaces
    DelDVector(&result); 
    DelDVector(&cvect);
    DelMatrix(&mx);
}

Transpose a matrix¶

A matrix transpose is an operation that flips a matrix over its diagonal. Here is an example that shows how to produce a transpose of a given matrix.

#include <stdio.h>
#include <scientific.h>


int main(void)
{
    int i, j;
    matrix *m, *m_T; // Definition of the pointer matrix variable
    NewMatrix(&m, 10, 15); // Create the matrix with 10 rows and 15 columns. Each value in the matrix is 0.
    NewMatrix(&m_T, m->col, m->row); // Create the transposed matrix with the flip of the columns and rows size

    for(i = 0; i < 10; i++){
        for(j = 0; j < 15; j++){
            m->data[i][j] = (float)i+j; // Fill the matrix values with these numbers
        }
    }
    MatrixTranspose(m, m_T);
    PrintMatrix(m); // Print to video the original matrix content
    puts("Transposed matrix");
    PrintMatrix(m_T); // Print to video the transposed matrix 
    // Free the memory spaces
    DelMatrix(&m);
    DelMatrix(&m_T);
}

Invert a matrix¶

In this example we show how to invert a matrix with libscientific

#include <stdio.h>
#include <scientific.h>
#include <math.h>

int main(void)
{
    int i;
    matrix *m; // Definition of the matrix variable as a pointer
    matrix *m_inv; // Definition of the inverted matrix variable as a pointer 

    NewMatrix(&m, 10, 10); // Allocate the matrix to invert 
    MatrixInitRandomFloat(m, -3., 3.); // Random fill the matrix with values within a range -3 < x < 3
    PrintMatrix(m); // Print to video the matrix 

    initMatrix(&m_inv); // Initialize the matrix to invert
    MatrixInversion(m, m_inv); // Invert the matrix

    double det = fabs(MatrixDeterminant(m)); // Calculate the determinant

    printf("Determinant %.4f\n", det); // Print to video the matrix determinant
    PrintMatrix(m_inv); // Print to video the inverted matrix

    // Free the memory spaces
    DelMatrix(&m_inv);
    DelMatrix(&m);
}

Calculate eigenvectors and eigenvalues of a matrix¶

This example shows how to calculate eigenvectors and eigenvalues of an N x N real nonsymmetric matrix. The eigenvector/eigenvalue is computed thanks to the dgeev.f code extracted from the Lapack library.

#include <stdio.h>
#include <scientific.h>
#include <math.h>

int main(void)
{
    int i;
    matrix *m; // Definition of the matrix variable as a pointer
    dvector *eval; // Definition the variable to store the eigenvalues
    matrix *evect; // Definition of the variable were to store the eigenvectors

    NewMatrix(&m, 10, 10); // Allocate the matrix to invert 
    MatrixInitRandomFloat(m, -3., 3.); // Random fill the matrix with values within a range -3 < x < 3
    PrintMatrix(m); // Print to video the matrix 

    // Initialize the variables
    initDVector(&eval);
    initMatrix(&evect);

    EVectEval(m, eval, evect); // Calculate the eigenvectors and associated eigenvalues

    PrintDVector(eval); // Print to video the eigenvalues
    PrintMatrix(evect); // Print to video the eigenvectors. Each column correspond to an eingenvalue

    // Free the memory spaces
    DelDVector(&eval);
    DelMatrix(&evect);
    DelMatrix(&m);
}

Singular Value Decomposition of a square matrix¶

In this example we show how to factorize a square matrix using the singular value decomposition (SVD) method

#include <stdio.h>
#include <scientific.h>
#include <math.h>

int main(void)
{
    int i;
    matrix *m; // Definition of the matrix variable as a pointer
    matrix *U; // Definition of the complex unitary matrix
    matrix *S; // Definition of the rectangular diagnonal matrix with non-negative real numbers on diagonal
    matrix *Vt; // Definition of conjugate transpose of a complex unitary matrix

    NewMatrix(&m, 10, 10); // Allocate the matrix to invert 
    MatrixInitRandomFloat(m, -3., 3.); // Random fill the matrix with values within a range -3 < x < 3
    PrintMatrix(m); // Print to video the matrix 

    // Initialize the variables
    initMatrix(&U);
    initMatrix(&S);
    initMatrix(&Vt);
    SVD(m, U, S, Vt); // Internal method
    // SVDlapack(m, U, S, Vt); lapack method using dgesdd
    // Print to video the results of the factorization
    PrintMatrix(U);
    PrintMatrix(S);
    PrintMatrix(Vt);

    // Free the memory spaces
    DelMatrix(&U);
    DelMatrix(&S);
    DelMatrix(&Vt);
    DelMatrix(&m);
}

Tensor operations¶

Tensor is a user-defined data type that contains: - order: the number of matrix - m: the array the 2D data array, which defines the tensor itself.

The data array is selected explicitly as a double type to work with an extensive range of numbers.

 * Tensor data structure 
 *
 * - **m** list of matrix
 * - **order** number of matrix layers

Create/Allocate a tensor with a specific size¶

Create a simple tensor of 3 blocks, 10 rows and 15 columns and fill it with numbers.

Then print it to the terminal using “PrintTensor();”

#include <stdio.h>
#include <scientific.h>


int main(void)
{
    int i, j, k;
    tensor *t; // Definition of the pointer tensor variable
    NewTensor(&t, 3); // Create the tensor with 3 blocks;
    for(k = 0; k < 3; k++){
        NewTensorMatrix(t, k, 10, 15);
        for(i = 0; i < 10; i++){
            for(j = 0; j < 15; j++){
                t->m[k]->data[i][j] = (float)i+j; // Fill the tensor values with these numbers
            }
        }
    }
    PrintTensor(t); // Print to video the tensor content
    DelTensor(&t); // Free the memory space
}

Initialize an empty tensor and append different matrix to it¶

An empty tensor is an object with matrix equal to 0. In that object we can add dynamically different matrix with different rows and columns.

In this example we will initialize an empty tensor and we add different matrix to it

#include <stdio.h>
#include <scientific.h>


int main(void)
{
    size_t i, j, k;
    tensor *t; // Definition of the pointer tensor variable
    matrix *m;
    initTensor(&t); // Tensor allocation
    NewMatrix(&m, 10, 7); // Create of a matrix with 10 rows and 7 columns
    TensorAppendMatrix(t, m); // Append this matrix to the tensor t
    DelMatrix(&m); // Delete the matrix
    
    NewMatrix(&m, 10, 10); // Create of a second matrix with 10 rows and 10 columns
    TensorAppendMatrix(t, m); // Append this new matrix to the tensor t
    DelMatrix(&m);
    
    for(k = 0; k < t->order; k++){
        for(i = 0; i < t->m[k]->row; i++){
            for(j = 0; j < t->m[k]->col; j++){
                t->m[k]->data[i][j] = (float)i+j; // Fill the tensor values with these numbers
            }
        }
    }
    PrintTensor(t); // Print to video the tensor content
    DelTensor(&t); // Free the memory space
}

Multivariate analysis algorithms¶

In this section, you will find examples of running multivariate analysis algorithms. In particular, the algorithm described here is extracted from official scientific publications and is adapted to run in multithreading to speed up the calculation.

PCA and PLS implements the NIPALS algorithm described in the following publication:

P. Geladi, B.R. Kowalski
Partial least-squares regression: a tutorial
Analytica Chimica Acta Volume 185, 1986, Pages 1-17
DOI:10.1016/0003-2670(86)80028-9

CPCA implements the NIPALS algorithm described in the following publication:

ANALYSIS OF MULTIBLOCK AND HIERARCHICAL PCA AND PLS MODELS
JOHAN A. WESTERHUIS, THEODORA KOURTI* AND JOHN F. MACGREGOR
J. Chemometrics 12, 301–321 (1998)
DOI:/10.1002/(SICI)1099-128X(199809/10)12:5<301::AID-CEM515>3.0.CO;2-S

Principal Component Analysis (PCA)¶

Here is an example that shows how to compute a principal component analysis on a matrix.

#include <stdio.h>
#include <scientific.h>

int main(void)
{
    matrix *m; // Definition of the input matrix 
    PCAMODEL *model; // Definition of the PCA model
    int i, j;
    int nobj = 20;
    int nvars = 8;
    NewMatrix(&m, nobj, nvars);

    // Fill with random values the matrix m
    srand(nobj);
    for(size_t i = 0; i < nobj; i++){
        for(size_t j = 0; j < nvars; j++){
            m->data[i][j] = randDouble(0,20);
        }
    }


    NewPCAModel(&model); // Allocation of the PCA model
    PCA(m, 1, 5, model, NULL); // Calculation of the PCA on matrix m using unit variance scaling (1) and the extraction of 5 principal components 

    PrintPCA(model); // Print to video the PCA results

    /* Of course you can print in a separate way the different results contained in the model variable
     * model->scores is the matrix of scores
     * model->loadings is the matrix of loadings
     * model->colavg is the column average obtained from the input matrix
     * model->scaling is the scaling factor obtained from the input matrix
     */

    // Free the memory spaces
    DelPCAModel(&model);
    DelMatrix(&m);
}

Consensus Principal Component Analysis (CPCA)¶

Here is an example that shows how to compute a consenus principal component analysis on a tensor.

#include <stdio.h>
#include <scientific.h>

int main(void)
{
    tensor *t; // Definition of the input tensor
    CPCAMODEL *model; // Definition of the CPCA model
    int i, j, k;
    int nblocks = 4;
    int nobj = 20;
    int nvars[4] = {8, 10, 5, 7}; //every block have different variables
    NewTensor(&t, nblocks);

    // Fill with random values the matrix m
    srand(nobj);
    for(k = 0; k < nblocks; k++){
        NewTensorMatrix(t, k, nobj, nvars[k]);
        for(i = 0; i < nobj; i++){
            for(j = 0; j < nvars[k]; j++){
                t->m[k]->data[i][j] = randDouble(0,20);
            }
        }
    }


    NewCPCAModel(&model); // Allocation of the CPCA model
    CPCA(t, 1, 5, model); // Calculation of the CPCA on matrix m using unit variance scaling (1) and the extraction of 5 super principal components 

    PrintCPCA(model); // Print to video the CPCA results

    /* Of course you can print in a separate way the different results contained in the model variable
     * model->super_scores is the matrix of super scores
     * model->super_weights is the matrix of super weights
     * model->block_scores is the tensor of scores for each block
     * model->block_loadings is the tensor of loadings for each block
     */

    // Free the memory spaces
    DelCPCAModel(&model);
    DelTensor(&t);
}

Partial Least Squares (PLS)¶

A matrix of features or independent vIariables and a matrix of targets or dependent variables is requested to calculate a PLS model. Here is a simple example that shows how to calculate a PLS model.

#include <stdio.h>
#include <scientific.h>

int main(void)
{
    matrix *x, *y; // Define the feature matrix x and the target to predict y
    dvector *betas; // Define the beta coefficients
    PLSMODEL *m;

    // Allocate the matrix
    NewMatrix(&x, 14, 6);
    NewMatrix(&y, 14, 1);

    // Fill the matrix with values
    // This is a manual filling.
    // Of course we can read a csv file and fill it automatically

    x->data[0][0] = 4.0000;  x->data[0][1] = 4.0000;  x->data[0][2] = 1.0000;  x->data[0][3] = 84.1400;  x->data[0][4] = 1.0500;  x->data[0][5] = 235.1500;
    x->data[1][0] = 5.0000;  x->data[1][1] = 5.0000;  x->data[1][2] = 1.0000;  x->data[1][3] = 79.1000;  x->data[1][4] = 0.9780;  x->data[1][5] = 231;
    x->data[2][0] = 4.0000;  x->data[2][1] = 5.0000;  x->data[2][2] = 1.0000;  x->data[2][3] = 67.0900;  x->data[2][4] = 0.9700;  x->data[2][5] = 249.0000;
    x->data[3][0] = 4.0000;  x->data[3][1] = 4.0000;  x->data[3][2] = 1.0000;  x->data[3][3] = 68.0700;  x->data[3][4] = 0.9360;  x->data[3][5] = 187.3500;
    x->data[4][0] = 3.0000;  x->data[4][1] = 4.0000;  x->data[4][2] = 2.0000;  x->data[4][3] = 68.0800;  x->data[4][4] = 1.0300;  x->data[4][5] = 363.0000;
    x->data[5][0] = 9.0000;  x->data[5][1] = 7.0000;  x->data[5][2] = 1.0000;  x->data[5][3] = 129.1600;  x->data[5][4] = 1.0900;  x->data[5][5] = 258.0000;
    x->data[6][0] = 10.0000;  x->data[6][1] = 8.0000;  x->data[6][2] = 0.0000;  x->data[6][3] = 128.1600;  x->data[6][4] = 1.1500;  x->data[6][5] = 352.0000;
    x->data[7][0] = 6.0000;  x->data[7][1] = 6.0000;  x->data[7][2] = 0.0000;  x->data[7][3] = 78.1118;  x->data[7][4] = 0.8765;  x->data[7][5] = 278.6400;
    x->data[8][0] = 16.0000;  x->data[8][1] = 10.0000;  x->data[8][2] = 0.0000;  x->data[8][3] = 202.2550;  x->data[8][4] = 1.2710;  x->data[8][5] = 429.1500;
    x->data[9][0] = 6.0000;  x->data[9][1] = 12.0000;  x->data[9][2] = 0.0000;  x->data[9][3] = 84.1600;  x->data[9][4] = 0.7800;  x->data[9][5] = 279.0000;
    x->data[10][0] = 4.0000;  x->data[10][1] = 8.0000;  x->data[10][2] = 1.0000;  x->data[10][3] = 72.1100;  x->data[10][4] = 0.8900;  x->data[10][5] = 164.5000;
    x->data[11][0] = 4.0000;  x->data[11][1] = 9.0000;  x->data[11][2] = 1.0000;  x->data[11][3] = 71.1100;  x->data[11][4] = 0.8660;  x->data[11][5] = 210.0000;
    x->data[12][0] = 5.0000;  x->data[12][1] = 11.0000;  x->data[12][2] = 1.0000;  x->data[12][3] = 85.1500;  x->data[12][4] = 0.8620;  x->data[12][5] = 266.0000;
    x->data[13][0] = 5.0000;  x->data[13][1] = 10.0000;  x->data[13][2] = 1.0000;  x->data[13][3] = 86.1300;  x->data[13][4] = 0.8800;  x->data[13][5] = 228.0000;

    y->data[0][0] = 357.1500;
    y->data[1][0] = 388.0000;
    y->data[2][0] = 403.0000;
    y->data[3][0] = 304.5500;
    y->data[4][0] = 529.0000;
    y->data[5][0] = 510.0000;
    y->data[6][0] = 491.0000;
    y->data[7][0] = 353.3000;
    y->data[8][0] = 666.6500;
    y->data[9][0] = 354.0000;
    y->data[10][0] = 339.0000;
    y->data[11][0] = 360.0000;
    y->data[12][0] = 379.0000;
    y->data[13][0] = 361.0000;

    // Allocate the PLS model
    NewPLSModel(&m);

    /* Calculate the partial least squares algorithm taking as input:
     * x: the feature matrix x
     * y: the target matrix y
     * nlv: the number of latent variable nlv
     * xautoscaling: the autoscaling type for the x matrix
     * yautoscaling: the autoscaling type for the y Matrix
     * model: the PLSMODEL previously allocated
     * ssignal: a scientific signal to stop the calculation if requested by the user
     *
     * more information in the pls.h header file
     * void PLS(matrix *mx, matrix *my, size_t nlv, size_t xautoscaling, size_t yautoscaling, PLSMODEL *model, ssignal *s);
     */
    PLS(x, y, 3, 1, 0, m, NULL);

    PrintPLSModel(m); // Print to video the PLS model

    /*Validate the model using the internal validation method*/
    MODELINPUT minpt = initModelInput(); // Define the model input for the validation method
    minpt.mx = x;
    minpt.my = y;
    minpt.nlv = 3;
    minpt.xautoscaling = 1;
    minpt.yautoscaling = 0;

    // Use the boot strap random group cross validation.
    BootstrapRandomGroupsCV(&minpt, 3, 100, _PLS_, m->predicted_y, m->pred_residuals, 4, NULL, 0);
    // We can also compute leave one out in case...
    // LeaveOneOut(&minpt, _PLS_, m->predicted_y, m->pred_residuals, 4, NULL, 0);

    // Calculate the model validation statistics
    PLSRegressionStatistics(y, m->predicted_y, m->q2y, m->sdep, m->bias);
    //Print to video the results of the validation and the predicted values

    puts("Q2 Cross Validation");
    PrintMatrix(m->q2y);
    puts("SDEP Cross Validation");
    PrintMatrix(m->sdep);
    puts("BIAS Cross Validation");
    PrintMatrix(m->bias);;

    // Calculate the beta coefficients to see the importance of each feature
    puts("Beta coefficients");
    initDVector(&betas);
    PLSBetasCoeff(m, GetLVCCutoff(m->q2y), betas); // GetLVCCutoff select the best Q2 value from all the possibilities
    PrintDVector(betas);

    puts("PREDICTED VALUES");
    PrintMatrix(m->predicted_y);

    puts("PREDICTED RESIDUALS");
    PrintMatrix(m->pred_residuals);

    puts("REAL Y");
    PrintMatrix(y);

    // Free the memory spaces
    DelDVector(&betas);
    DelPLSModel(&m);
    DelMatrix(&x);
    DelMatrix(&y);
}