LCOV - code coverage report
Current view: top level - lib_dec - ivas_dirac_output_synthesis_cov_fx.c (source / functions) Hit Total Coverage
Test: Coverage on main @ 61b27367e0d19b9fc53332787af331bed7af66b1 Lines: 835 879 95.0 %
Date: 2025-10-21 02:53:18 Functions: 9 9 100.0 %

          Line data    Source code
       1             : /******************************************************************************************************
       2             : 
       3             :    (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
       4             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
       5             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
       6             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
       7             :    contributors to this repository. All Rights Reserved.
       8             : 
       9             :    This software is protected by copyright law and by international treaties.
      10             :    The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
      11             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
      12             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
      13             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
      14             :    contributors to this repository retain full ownership rights in their respective contributions in
      15             :    the software. This notice grants no license of any kind, including but not limited to patent
      16             :    license, nor is any license granted by implication, estoppel or otherwise.
      17             : 
      18             :    Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
      19             :    contributions.
      20             : 
      21             :    This software is provided "AS IS", without any express or implied warranties. The software is in the
      22             :    development stage. It is intended exclusively for experts who have experience with such software and
      23             :    solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
      24             :    and fitness for a particular purpose are hereby disclaimed and excluded.
      25             : 
      26             :    Any dispute, controversy or claim arising under or in relation to providing this software shall be
      27             :    submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
      28             :    accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
      29             :    the United Nations Convention on Contracts on the International Sales of Goods.
      30             : 
      31             : *******************************************************************************************************/
      32             : 
      33             : #include "basop_util.h"
      34             : #include "enh32.h"
      35             : #include <stdint.h>
      36             : #include <string.h>
      37             : #include <stdio.h>
      38             : #include <stdlib.h>
      39             : #include <assert.h>
      40             : #include <math.h>
      41             : #include "options.h"
      42             : #include "cnst.h"
      43             : #include "rom_enc.h"
      44             : #include "rom_com.h"
      45             : #include "prot_fx.h"
      46             : #include "ivas_stat_dec.h"
      47             : #include "ivas_cnst.h"
      48             : #include "ivas_rom_com.h"
      49             : #include "ivas_rom_dec.h"
      50             : #include "wmc_auto.h"
      51             : #include "rom_dec.h"
      52             : #include "ivas_prot_fx.h"
      53             : 
      54             : /*-----------------------------------------------------------------------*
      55             :  * Local constants
      56             :  *-----------------------------------------------------------------------*/
      57             : 
      58             : #define SQRT_EPSILON_FX 68 /* Q31 square root of EPSILON */
      59             : 
      60             : /*-------------------------------------------------------------------*
      61             :  * ivas_dirac_dec_output_synthesis_cov_open()
      62             :  *
      63             :  * Sets up the state and parameters for the Covariance Synthesis
      64             :  *-------------------------------------------------------------------*/
      65             : 
      66         341 : ivas_error ivas_dirac_dec_output_synthesis_cov_open_fx(
      67             :     DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params,   /* i/o: handle for the covariance synthesis parameters                                                        */
      68             :     DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state, /* i/o: hanlde for the covariance synthesis state                                                             */
      69             :     const Word16 max_band_decorr,                                     /* i  : uppermost frequency band where decorrelation is applied                                               */
      70             :     const Word16 interp_length,                                       /* i  : length for interpolating the mixing matrices in time slots                                            */
      71             :     const Word16 num_param_bands,                                     /* i  : number of parameter bands                                                                             */
      72             :     const Word16 num_param_bands_residual,                            /* i  : number of parameter bands with a residual mixing matrix (i.e. decorrelation                           */
      73             :     const Word16 nchan_in,                                            /* i  : number of input (transport) channels                                                                  */
      74             :     const Word16 nchan_out,                                           /* i  : number of output channels                                                                             */
      75             :     const Word32 *proto_matrix                                        /* i  : the prototype (upmix) matrix (only used if mode == 1)  Q(15-proto_matrix_e)                                               */
      76             : )
      77             : {
      78             :     Word16 idx;
      79             : 
      80         341 :     h_dirac_output_synthesis_params->max_band_decorr = max_band_decorr;
      81         341 :     move16();
      82             : 
      83             :     /*-----------------------------------------------------------------*
      84             :      * memory allocation
      85             :      *-----------------------------------------------------------------*/
      86             : 
      87             :     /* buffer length and interpolator */
      88         341 :     h_dirac_output_synthesis_params->alpha_synthesis_fx = NULL;
      89         341 :     IF( ( h_dirac_output_synthesis_params->proto_matrix_fx = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
      90             :     {
      91           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
      92             :     }
      93         341 :     h_dirac_output_synthesis_params->proto_matrix_len = imult1616( nchan_out, nchan_in );
      94         341 :     move16();
      95             :     /* cov buffers */
      96        4445 :     FOR( idx = 0; idx < num_param_bands; idx++ )
      97             :     {
      98        4104 :         IF( ( h_dirac_output_synthesis_state->cx_old_fx[idx] = (Word32 *) malloc( nchan_in * nchan_in * sizeof( Word32 ) ) ) == NULL )
      99             :         {
     100           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     101             :         }
     102        4104 :         h_dirac_output_synthesis_state->cx_old_len = imult1616( nchan_in, nchan_in );
     103        4104 :         IF( ( h_dirac_output_synthesis_state->cy_old_fx[idx] = (Word32 *) malloc( nchan_out * nchan_out * sizeof( Word32 ) ) ) == NULL )
     104             :         {
     105           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     106             :         }
     107        4104 :         IF( ( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
     108             :         {
     109           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     110             :         }
     111        4104 :         set_zero_fx( h_dirac_output_synthesis_state->cx_old_fx[idx], imult1616( nchan_in, nchan_in ) );
     112        4104 :         set_zero_fx( h_dirac_output_synthesis_state->cy_old_fx[idx], imult1616( nchan_out, nchan_out ) );
     113        4104 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx], imult1616( nchan_out, nchan_in ) );
     114             : 
     115        4104 :         IF( ( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
     116             :         {
     117           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     118             :         }
     119        4104 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_fx[idx], imult1616( nchan_out, nchan_in ) );
     120        4104 :         h_dirac_output_synthesis_state->mixing_matrix_len = i_mult( nchan_out, nchan_in );
     121        4104 :         move16();
     122             :     }
     123       16697 :     FOR( ; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
     124             :     {
     125       16356 :         h_dirac_output_synthesis_state->cx_old_fx[idx] = NULL;
     126       16356 :         h_dirac_output_synthesis_state->cy_old_fx[idx] = NULL;
     127       16356 :         h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = NULL;
     128       16356 :         h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = NULL;
     129             :     }
     130             : 
     131        3563 :     FOR( idx = 0; idx < num_param_bands_residual; idx++ )
     132             :     {
     133        3222 :         IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = (Word32 *) malloc( imult1616( nchan_out, nchan_out ) * sizeof( Word32 ) ) ) == NULL )
     134             :         {
     135           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     136             :         }
     137        3222 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx], imult1616( nchan_out, nchan_out ) );
     138             : 
     139        3222 :         IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = (Word32 *) malloc( imult1616( nchan_out, nchan_out ) * sizeof( Word32 ) ) ) == NULL )
     140             :         {
     141           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     142             :         }
     143        3222 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx], imult1616( nchan_out, nchan_out ) );
     144        3222 :         h_dirac_output_synthesis_state->mixing_matrix_res_len = i_mult( nchan_out, nchan_out );
     145        3222 :         move16();
     146             :     }
     147       17579 :     FOR( ; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
     148             :     {
     149       17238 :         h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = NULL;
     150       17238 :         h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = NULL;
     151             :     }
     152             : 
     153         341 :     IF( ( h_dirac_output_synthesis_state->cx_old_e = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     154             :     {
     155           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     156             :     }
     157         341 :     IF( ( h_dirac_output_synthesis_state->cy_old_e = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     158             :     {
     159           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     160             :     }
     161             : 
     162         341 :     set16_fx( h_dirac_output_synthesis_state->cx_old_e, 0, CLDFB_NO_CHANNELS_MAX );
     163         341 :     set16_fx( h_dirac_output_synthesis_state->cy_old_e, 0, CLDFB_NO_CHANNELS_MAX );
     164             : 
     165         341 :     IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     166             :     {
     167           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     168             :     }
     169         341 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_exp, 0, CLDFB_NO_CHANNELS_MAX );
     170             : 
     171         341 :     IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     172             :     {
     173           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     174             :     }
     175         341 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
     176             : 
     177         341 :     IF( ( h_dirac_output_synthesis_state->mixing_matrix_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     178             :     {
     179           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     180             :     }
     181         341 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_exp, 0, CLDFB_NO_CHANNELS_MAX );
     182             : 
     183         341 :     IF( ( h_dirac_output_synthesis_state->mixing_matrix_old_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     184             :     {
     185           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     186             :     }
     187         341 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
     188             : 
     189             :     /*-----------------------------------------------------------------*
     190             :      * prepare processing parameters
     191             :      *-----------------------------------------------------------------*/
     192             : 
     193             :     /* compute interpolator */
     194         341 :     IF( ( h_dirac_output_synthesis_params->interpolator_fx = (Word16 *) malloc( interp_length * sizeof( Word16 ) ) ) == NULL )
     195             :     {
     196           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     197             :     }
     198       11253 :     FOR( idx = 1; idx <= interp_length; ++idx )
     199             :     {
     200       10912 :         h_dirac_output_synthesis_params->interpolator_fx[idx - 1] = div_s( idx, interp_length );
     201       10912 :         move16();
     202             :     }
     203         341 :     Copy32( proto_matrix, h_dirac_output_synthesis_params->proto_matrix_fx, imult1616( nchan_in, nchan_out ) );
     204         341 :     h_dirac_output_synthesis_params->proto_matrix_e = 5;
     205         341 :     move16();
     206         341 :     return IVAS_ERR_OK;
     207             : }
     208             : 
     209             : 
     210             : /*-------------------------------------------------------------------*
     211             :  * ivas_dirac_dec_output_synthesis_get_interpolator_fx()
     212             :  *
     213             :  *
     214             :  *-------------------------------------------------------------------*/
     215             : 
     216      332209 : void ivas_dirac_dec_output_synthesis_get_interpolator_fx(
     217             :     DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params, /* i/o: handle for the covariance synthesis parameters  */
     218             :     const UWord16 interp_length                                     /* i  : interpolator length                             */
     219             : )
     220             : {
     221             :     Word16 idx;
     222      332209 :     Word16 tmp, exp_diff = 0;
     223      332209 :     move16();
     224             : 
     225     1652722 :     FOR( idx = 1; idx <= interp_length; ++idx )
     226             :     {
     227     1320513 :         tmp = BASOP_Util_Divide3232_Scale( L_deposit_l( idx ), L_deposit_l( interp_length ), &exp_diff ); // (Q15 - exp_diff)
     228     1320513 :         h_dirac_output_synthesis_params->interpolator_fx[idx - 1] = shl_sat( tmp, exp_diff );             // Q15
     229     1320513 :         move16();
     230             :     }
     231             : 
     232      332209 :     return;
     233             : }
     234             : 
     235             : 
     236             : /*-------------------------------------------------------------------*
     237             :  * ivas_dirac_dec_output_synthesis_cov_init()
     238             :  *
     239             :  * initialize the states for the covariance synthesis
     240             :  *-------------------------------------------------------------------*/
     241         341 : void ivas_dirac_dec_output_synthesis_cov_init_fx(
     242             :     DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state, /* i/o: pointer to the state of the covariance synthesis                            */
     243             :     const Word16 nchan_in,                                            /* i  : number of input (tranport) channels                                         */
     244             :     const Word16 nchan_out,                                           /* i  : number of output channels                                                   */
     245             :     const Word16 n_param_bands,                                       /* i  : number of total parameter bands                                             */
     246             :     const Word16 n_param_bands_res                                    /* i  : number of parameter bands with a residual mixing matrix (i.e. decorrelation */
     247             : )
     248             : {
     249             : 
     250             :     Word16 idx;
     251             : 
     252             :     /* initialize buffers */
     253        4445 :     FOR( idx = 0; idx < n_param_bands; idx++ )
     254             :     {
     255        4104 :         set_zero_fx( h_dirac_output_synthesis_state->cx_old_fx[idx], imult1616( nchan_in, nchan_in ) );
     256        4104 :         set_zero_fx( h_dirac_output_synthesis_state->cy_old_fx[idx], imult1616( nchan_out, nchan_out ) );
     257        4104 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx], imult1616( nchan_out, nchan_in ) );
     258        4104 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_fx[idx], imult1616( nchan_out, nchan_in ) );
     259             :     }
     260             : 
     261        3563 :     FOR( idx = 0; idx < n_param_bands_res; idx++ )
     262             :     {
     263        3222 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx], imult1616( nchan_out, nchan_out ) );
     264        3222 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx], imult1616( nchan_out, nchan_out ) );
     265             :     }
     266             : 
     267             : 
     268         341 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
     269         341 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_exp, 0, CLDFB_NO_CHANNELS_MAX );
     270         341 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
     271         341 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_exp, 0, CLDFB_NO_CHANNELS_MAX );
     272             : 
     273         341 :     return;
     274             : }
     275             : 
     276             : 
     277             : /*-------------------------------------------------------------------*
     278             :  * ivas_dirac_dec_output_synthesis_cov_close()
     279             :  *
     280             :  * deallocate dynamic memory in the covariance synthesis state
     281             :  *-------------------------------------------------------------------*/
     282             : 
     283         341 : void ivas_dirac_dec_output_synthesis_cov_close_fx(
     284             :     DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params,  /* i  : handle for the covariance synthesis parameters  */
     285             :     DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state /* i/o: handle for the covariance synthesis state       */
     286             : )
     287             : {
     288             :     Word16 idx;
     289             : 
     290             :     /*-----------------------------------------------------------------*
     291             :      * memory deallocation
     292             :      *-----------------------------------------------------------------*/
     293             : 
     294             :     /* free interpolator */
     295         341 :     IF( h_dirac_output_synthesis_params->interpolator_fx != NULL )
     296             :     {
     297         341 :         free( h_dirac_output_synthesis_params->interpolator_fx );
     298         341 :         h_dirac_output_synthesis_params->interpolator_fx = NULL;
     299             :     }
     300             : 
     301             :     /* free alpha */
     302         341 :     IF( h_dirac_output_synthesis_params->alpha_synthesis_fx != NULL )
     303             :     {
     304           0 :         free( h_dirac_output_synthesis_params->alpha_synthesis_fx );
     305           0 :         h_dirac_output_synthesis_params->alpha_synthesis_fx = NULL;
     306             :     }
     307             : 
     308             :     /* free proto_matrix */
     309         341 :     IF( h_dirac_output_synthesis_params->proto_matrix_fx != NULL )
     310             :     {
     311         341 :         free( h_dirac_output_synthesis_params->proto_matrix_fx );
     312         341 :         h_dirac_output_synthesis_params->proto_matrix_fx = NULL;
     313             :     }
     314             : 
     315             : 
     316         341 :     IF( h_dirac_output_synthesis_state->cx_old_e != NULL )
     317             :     {
     318         341 :         free( h_dirac_output_synthesis_state->cx_old_e );
     319         341 :         h_dirac_output_synthesis_state->cx_old_e = NULL;
     320             :     }
     321         341 :     IF( h_dirac_output_synthesis_state->cy_old_e != NULL )
     322             :     {
     323         341 :         free( h_dirac_output_synthesis_state->cy_old_e );
     324         341 :         h_dirac_output_synthesis_state->cy_old_e = NULL;
     325             :     }
     326             : 
     327             :     /* free cov buffers */
     328       20801 :     FOR( idx = 0; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
     329             :     {
     330       20460 :         IF( h_dirac_output_synthesis_state->cx_old_fx[idx] != NULL )
     331             :         {
     332        4104 :             free( h_dirac_output_synthesis_state->cx_old_fx[idx] );
     333        4104 :             h_dirac_output_synthesis_state->cx_old_fx[idx] = NULL;
     334             :         }
     335             : 
     336       20460 :         IF( h_dirac_output_synthesis_state->cy_old_fx[idx] != NULL )
     337             :         {
     338        4104 :             free( h_dirac_output_synthesis_state->cy_old_fx[idx] );
     339        4104 :             h_dirac_output_synthesis_state->cy_old_fx[idx] = NULL;
     340             :         }
     341             : 
     342       20460 :         IF( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] != NULL )
     343             :         {
     344        4104 :             free( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] );
     345        4104 :             h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = NULL;
     346             :         }
     347             : 
     348       20460 :         IF( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] != NULL )
     349             :         {
     350        3222 :             free( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] );
     351        3222 :             h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = NULL;
     352             :         }
     353             : 
     354       20460 :         IF( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] != NULL )
     355             :         {
     356        4104 :             free( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] );
     357        4104 :             h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = NULL;
     358             :         }
     359             : 
     360       20460 :         IF( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] != NULL )
     361             :         {
     362        3222 :             free( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] );
     363        3222 :             h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = NULL;
     364             :         }
     365             :     }
     366             : 
     367         341 :     IF( h_dirac_output_synthesis_state->mixing_matrix_old_exp != NULL )
     368             :     {
     369         341 :         free( h_dirac_output_synthesis_state->mixing_matrix_old_exp );
     370         341 :         h_dirac_output_synthesis_state->mixing_matrix_old_exp = NULL;
     371             :     }
     372             : 
     373         341 :     IF( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp != NULL )
     374             :     {
     375         341 :         free( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp );
     376         341 :         h_dirac_output_synthesis_state->mixing_matrix_res_old_exp = NULL;
     377             :     }
     378             : 
     379         341 :     IF( h_dirac_output_synthesis_state->mixing_matrix_exp != NULL )
     380             :     {
     381         341 :         free( h_dirac_output_synthesis_state->mixing_matrix_exp );
     382         341 :         h_dirac_output_synthesis_state->mixing_matrix_exp = NULL;
     383             :     }
     384             : 
     385         341 :     IF( h_dirac_output_synthesis_state->mixing_matrix_res_exp != NULL )
     386             :     {
     387         341 :         free( h_dirac_output_synthesis_state->mixing_matrix_res_exp );
     388         341 :         h_dirac_output_synthesis_state->mixing_matrix_res_exp = NULL;
     389             :     }
     390         341 :     return;
     391             : }
     392             : 
     393             : /*-------------------------------------------------------------------*
     394             :  * ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot()
     395             :  *
     396             :  * collect the multi channel input covariance for one filter bank time slot
     397             :  *-------------------------------------------------------------------*/
     398             : 
     399             : /*-------------------------------------------------------------------*
     400             :  * ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot()
     401             :  *
     402             :  * collect the multi channel input covariance for one filter bank time slot
     403             :  *-------------------------------------------------------------------*/
     404     2085319 : void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx(
     405             :     Word32 *RealBuffer_fx,                                                          /* i  : input channel filter bank samples (real part)          Q(31- RealBuffer_e)*/
     406             :     Word16 RealBuffer_e,                                                            /* i  : exponent input channel filter bank samples (real part)*/
     407             :     Word32 *ImagBuffer_fx,                                                          /* i  : input channel filter bank samples (imaginary part      Q(ImagBuffer_e)*/
     408             :     Word16 ImagBuffer_e,                                                            /* i  : exponent input channel filter bank samples (real part)*/
     409             :     Word32 cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS],      /* o  : accumulated input covariance (real part)              Q(31- cx_e)*/
     410             :     Word16 *cx_e,                                                                   /* i  : exponent for accumulated input covariance (real part) */
     411             :     Word32 cx_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS], /* o  : accumulated input covariance (imaginary part)         Q(31- cx_imag_e)*/
     412             :     Word16 *cx_imag_e,                                                              /* i : exponent accumulated input covariance (imag part)      */
     413             :     PARAM_MC_DEC_HANDLE hParamMC,                                                   /* i  : handle to Parametric MC state                         */
     414             :     const Word16 param_band,                                                        /* i  : parameter band                                        */
     415             :     const Word16 nchan_in                                                           /* i  : number of input channels                              */
     416             : )
     417             : {
     418             :     Word16 band_idx, ch_idx;
     419             :     Word16 brange[2];
     420             :     Word32 real_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS];
     421             :     Word16 real_in_e;
     422             :     Word32 imag_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS];
     423             :     Word16 imag_in_e;
     424             :     Word32 real_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
     425             :     Word32 imag_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
     426             :     Word16 output_e;
     427             :     Word16 tmp1_e, tmp2_e, shift_imag, shift_real;
     428             :     Word16 band, num_bands;
     429             :     Word16 cx_fx_norm, cx_imag_fx_norm;
     430             :     /* estimate input covariance */
     431             :     /* Already stack here instead of in the process_subframe */
     432             : 
     433             :     /* collect input frame */
     434     2085319 :     brange[0] = hParamMC->band_grouping[param_band];
     435     2085319 :     move16();
     436     2085319 :     brange[1] = hParamMC->band_grouping[param_band + 1];
     437     2085319 :     move16();
     438     2085319 :     num_bands = sub( brange[1], brange[0] );
     439             : 
     440    10919279 :     FOR( band_idx = 0; band_idx < num_bands; band_idx++ )
     441             :     {
     442     8833960 :         band = add( brange[0], band_idx );
     443    26570640 :         FOR( ch_idx = 0; ch_idx < nchan_in; ch_idx++ )
     444             :         {
     445    17736680 :             real_in_buffer_fx[band_idx + num_bands * ch_idx] = RealBuffer_fx[band + hParamMC->num_freq_bands * ch_idx];
     446    17736680 :             move32();
     447    17736680 :             imag_in_buffer_fx[band_idx + num_bands * ch_idx] = ImagBuffer_fx[band + hParamMC->num_freq_bands * ch_idx];
     448    17736680 :             move32();
     449             :         }
     450             :     }
     451             : 
     452     2085319 :     real_in_e = RealBuffer_e;
     453     2085319 :     move16();
     454     2085319 :     imag_in_e = ImagBuffer_e;
     455     2085319 :     move16();
     456             : 
     457     2085319 :     Word16 buf_len = imult1616( num_bands, nchan_in );
     458             : 
     459     2085319 :     shift_real = sub( L_norm_arr( real_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
     460     2085319 :     shift_imag = sub( L_norm_arr( imag_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
     461             : 
     462     2085319 :     real_in_e = sub( real_in_e, shift_real );
     463     2085319 :     imag_in_e = sub( imag_in_e, shift_imag );
     464             : 
     465             : 
     466     2085319 :     output_e = s_max( real_in_e, imag_in_e );
     467             : 
     468     2085319 :     scale_sig32( real_in_buffer_fx, buf_len, sub( RealBuffer_e, output_e ) );
     469     2085319 :     scale_sig32( imag_in_buffer_fx, buf_len, sub( ImagBuffer_e, output_e ) );
     470             : 
     471     2085319 :     cmplx_matrix_square_fx( real_in_buffer_fx, imag_in_buffer_fx, num_bands, nchan_in, real_buffer_fx, imag_buffer_fx, output_e, &output_e );
     472     2085319 :     v_add_fx_me( cx_fx, *cx_e, real_buffer_fx, output_e, cx_fx, &tmp1_e, imult1616( nchan_in, nchan_in ), 1 );
     473             : 
     474     2085319 :     v_add_fx_me( cx_imag_fx, *cx_imag_e, imag_buffer_fx, output_e, cx_imag_fx, &tmp2_e, imult1616( nchan_in, nchan_in ), 1 );
     475             : 
     476     2085319 :     cx_fx_norm = L_norm_arr( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
     477     2085319 :     cx_imag_fx_norm = L_norm_arr( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
     478             : 
     479     2085319 :     scale_sig32( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_fx_norm );
     480     2085319 :     scale_sig32( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_imag_fx_norm );
     481             : 
     482     2085319 :     *cx_e = sub( tmp1_e, cx_fx_norm );
     483     2085319 :     move16();
     484     2085319 :     *cx_imag_e = sub( tmp2_e, cx_imag_fx_norm );
     485     2085319 :     move16();
     486             : 
     487     2085319 :     return;
     488             : }
     489             : 
     490             : /*-------------------------------------------------------------------*
     491             :  * ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot()
     492             :  *
     493             :  * synthesize one filter bank slot of multi channel output filter bank
     494             :  * samples with the covariance synthesis
     495             :  *-------------------------------------------------------------------*/
     496             : 
     497      163731 : void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(
     498             :     Word32 *Cldfb_RealBuffer_in_fx,                                                   /*Q6*/
     499             :     Word32 *Cldfb_ImagBuffer_in_fx,                                                   /*Q6*/
     500             :     Word32 Cldfb_RealBuffer_fx[][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX], /* o  : output channel filter bank samples (real part)                   Q6*/
     501             :     Word32 Cldfb_ImagBuffer_fx[][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX], /* o  : output channel filter bank samples (imaginary part)              Q6*/
     502             :     Word32 *mixing_matrix_fx[],                                                       /* i  : parameter band wise mixing matrices (direct part)                Q(31-mixing_matrix_e)*/
     503             :     Word16 *mixing_matrix_e,                                                          /* i  : parameter band wise mixing matrices (direct part)                */
     504             :     Word32 *mixing_matrix_res_fx[],                                                   /* i  : parameter band wise mixing matrices (residual part)              Q(31-mixing_matrix_res_e)*/
     505             :     Word16 *mixing_matrix_res_e,                                                      /* i  : parameter band wise mixing matrices (residual part)              */
     506             :     const UWord16 slot_idx_sfr,                                                       /* i  : time slot index for the current slot within the current subframe */
     507             :     const UWord16 slot_idx_tot,                                                       /* i  : time slot index for the current slot within the frame            */
     508             :     const Word16 nX,                                                                  /* i  : number of input channels                                         */
     509             :     const Word16 nY,                                                                  /* i  : number of output channels                                        */
     510             :     PARAM_MC_DEC_HANDLE hParamMC                                                      /* i  : handle to the Parametric MC decoder state                        */
     511             : )
     512             : {
     513             :     Word16 param_band_idx, band, ch_idx;
     514             :     Word16 have_residual;
     515             :     Word16 brange[2];
     516             :     DIRAC_OUTPUT_SYNTHESIS_COV_STATE h_synthesis_state;
     517             :     Word32 mixing_matrix_smooth_fx[MAX_CICP_CHANNELS * PARAM_MC_MAX_TRANSPORT_CHANS];
     518             :     Word16 mixing_matrix_smooth_e;
     519             :     Word32 mixing_matrix_res_smooth_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
     520      163731 :     Word16 mixing_matrix_res_smooth_e = 0;
     521      163731 :     move16();
     522             :     Word32 mixing_matrix_buffer_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
     523             :     Word16 mixing_matrix_buffer_e;
     524             :     Word32 input_f_real_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
     525             :     Word32 input_f_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
     526             : 
     527             :     Word32 diff_f_real_fx[MAX_CICP_CHANNELS];
     528             :     Word32 diff_f_imag_fx[MAX_CICP_CHANNELS];
     529             : 
     530      163731 :     h_synthesis_state = hParamMC->h_output_synthesis_cov_state;
     531             : 
     532      163731 :     set_zero_fx( input_f_real_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
     533      163731 :     set_zero_fx( input_f_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
     534             : 
     535      163731 :     set_zero_fx( diff_f_real_fx, MAX_CICP_CHANNELS );
     536      163731 :     set_zero_fx( diff_f_imag_fx, MAX_CICP_CHANNELS );
     537             : 
     538     2289070 :     FOR( param_band_idx = 0; param_band_idx < hParamMC->num_param_bands_synth; param_band_idx++ )
     539             :     {
     540             :         /* final mixing */
     541     2125339 :         have_residual = 0;
     542     2125339 :         move16();
     543     2125339 :         brange[0] = hParamMC->band_grouping[param_band_idx];
     544     2125339 :         move16();
     545     2125339 :         brange[1] = hParamMC->band_grouping[( param_band_idx + 1 )];
     546     2125339 :         move16();
     547             : 
     548     2125339 :         if ( LT_16( brange[0], hParamMC->h_output_synthesis_params.max_band_decorr ) )
     549             :         {
     550     1710264 :             have_residual = 1;
     551     1710264 :             move16();
     552             :         }
     553             : 
     554     2125339 :         v_multc_fx( mixing_matrix_fx[param_band_idx], L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ), mixing_matrix_smooth_fx, imult1616( nY, nX ) );
     555     2125339 :         mixing_matrix_smooth_e = mixing_matrix_e[param_band_idx]; // interpolator is W16
     556     2125339 :         move16();
     557             : 
     558     2125339 :         v_multc_fx( h_synthesis_state.mixing_matrix_old_fx[param_band_idx], L_sub( ONE_IN_Q31, L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ) ), mixing_matrix_buffer_fx, imult1616( nY, nX ) );
     559     2125339 :         mixing_matrix_buffer_e = h_synthesis_state.mixing_matrix_old_exp[param_band_idx]; // interpolator is W16
     560     2125339 :         move16();
     561             : 
     562     2125339 :         v_add_fx_me( mixing_matrix_smooth_fx, mixing_matrix_smooth_e, mixing_matrix_buffer_fx, mixing_matrix_buffer_e, mixing_matrix_smooth_fx, &mixing_matrix_smooth_e, imult1616( nY, nX ), 0 );
     563             : 
     564     2125339 :         IF( have_residual )
     565             :         {
     566             :             /* residual mixing matrix interpolation*/
     567             : 
     568     1710264 :             v_multc_fx( mixing_matrix_res_fx[param_band_idx], L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ), mixing_matrix_res_smooth_fx, imult1616( nY, nY ) );
     569     1710264 :             mixing_matrix_res_smooth_e = mixing_matrix_res_e[param_band_idx]; // interpolator is W16
     570     1710264 :             move16();
     571             : 
     572     1710264 :             set_zero_fx( mixing_matrix_buffer_fx, imult1616( nY, nY ) );
     573     1710264 :             v_multc_fx( h_synthesis_state.mixing_matrix_res_old_fx[param_band_idx], L_sub( ONE_IN_Q31, L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ) ), mixing_matrix_buffer_fx, imult1616( nY, nY ) );
     574     1710264 :             mixing_matrix_buffer_e = h_synthesis_state.mixing_matrix_res_old_exp[param_band_idx]; // interpolator is W16
     575     1710264 :             move16();
     576             : 
     577     1710264 :             v_add_fx_me( mixing_matrix_res_smooth_fx, mixing_matrix_res_smooth_e, mixing_matrix_buffer_fx, mixing_matrix_buffer_e, mixing_matrix_res_smooth_fx, &mixing_matrix_res_smooth_e, imult1616( nY, nY ), 0 );
     578             :         }
     579             : 
     580    11122259 :         FOR( band = brange[0]; band < brange[1]; band++ )
     581             :         {
     582     8996920 :             assert( band >= 0 );
     583             : 
     584     8996920 :             IF( have_residual )
     585             :             {
     586             :                 /* collect diffuse prototypes */
     587     3274620 :                 assert( LT_16( band, hParamMC->h_output_synthesis_params.max_band_decorr ) );
     588    23274460 :                 FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
     589             :                 {
     590    19999840 :                     diff_f_real_fx[ch_idx] = Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band];
     591    19999840 :                     move32();
     592    19999840 :                     diff_f_imag_fx[ch_idx] = Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band];
     593    19999840 :                     move32();
     594             :                 }
     595             : 
     596             :                 /* apply residual mixing */
     597             :                 {
     598             :                     Word16 shifter;
     599     3274620 :                     shifter = sub( mixing_matrix_res_smooth_e, 32 );
     600    23274460 :                     FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
     601             :                     {
     602             :                         Word16 i;
     603             :                         Word16 idx;
     604             :                         Word64 temp_real, temp_imag;
     605             : 
     606    19999840 :                         idx = ch_idx;
     607    19999840 :                         temp_real = 0;
     608    19999840 :                         temp_imag = 0;
     609    19999840 :                         move64();
     610    19999840 :                         move64();
     611   143276640 :                         FOR( i = 0; i < nY; i++ )
     612             :                         {
     613   123276800 :                             temp_real = W_mac_32_32( temp_real, mixing_matrix_res_smooth_fx[idx], diff_f_real_fx[i] );
     614   123276800 :                             temp_imag = W_mac_32_32( temp_imag, mixing_matrix_res_smooth_fx[idx], diff_f_imag_fx[i] );
     615   123276800 :                             idx += nY;
     616             :                         }
     617    19999840 :                         Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = W_shl_sat_l( temp_real, shifter );
     618    19999840 :                         Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = W_shl_sat_l( temp_imag, shifter );
     619             :                     }
     620             :                 }
     621             :             }
     622             :             ELSE
     623             :             {
     624    40619420 :                 FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
     625             :                 {
     626    34897120 :                     Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = 0;
     627    34897120 :                     move32();
     628    34897120 :                     Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = 0;
     629    34897120 :                     move32();
     630             :                 }
     631             :             }
     632             : 
     633             :             /* collect input signals, still in cldfb buffers */
     634    27067560 :             FOR( ch_idx = 0; ch_idx < nX; ch_idx++ )
     635             :             {
     636             : 
     637    18070640 :                 input_f_real_fx[ch_idx] = Cldfb_RealBuffer_in_fx[ch_idx * hParamMC->num_freq_bands + band]; // Q6
     638    18070640 :                 move32();
     639    18070640 :                 input_f_imag_fx[ch_idx] = Cldfb_ImagBuffer_in_fx[ch_idx * hParamMC->num_freq_bands + band]; // Q6
     640    18070640 :                 move32();
     641             :             }
     642             : 
     643             :             /* apply mixing matrix */
     644             :             {
     645             :                 Word16 shifter;
     646     8996920 :                 shifter = 31 - mixing_matrix_smooth_e;
     647             : 
     648    63893880 :                 FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
     649             :                 {
     650             :                     Word16 i;
     651             :                     Word16 idx;
     652             :                     Word64 temp_real, temp_imag;
     653             : 
     654    54896960 :                     idx = ch_idx;
     655    54896960 :                     temp_real = 0;
     656    54896960 :                     temp_imag = 0;
     657    54896960 :                     move64();
     658    54896960 :                     move64();
     659   165535680 :                     for ( i = 0; i < nX; i++ )
     660             :                     {
     661   110638720 :                         temp_real = W_add( temp_real, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_real_fx[i] ) );
     662   110638720 :                         temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_imag_fx[i] ) );
     663   110638720 :                         idx += nY;
     664             :                     }
     665    54896960 :                     Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_real, shifter ) ) );
     666    54896960 :                     move32();
     667    54896960 :                     Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_imag, shifter ) ) );
     668    54896960 :                     move32();
     669             :                 }
     670             :             }
     671             :         }
     672             :     }
     673             : 
     674      163731 :     return;
     675             : }
     676             : 
     677             : /*-------------------------------------------------------------------*
     678             :  * computeMixingMatrices()
     679             :  *
     680             :  * compute a mixing matrix using the convariance synthesis approach
     681             :  *-------------------------------------------------------------------*/
     682             : 
     683      132832 : Word16 computeMixingMatrices_fx(
     684             :     const Word16 num_inputs,  /* i  : number of input channels                                                                                      */
     685             :     const Word16 num_outputs, /* i  : number of output channels                                                                                     */
     686             :     const Word32 *Cx,         /* i  : input channel covariance matrix Q(31-Cx_e)                                                                               */
     687             :     Word16 Cx_e,
     688             :     const Word32 *Cy, /* i  : target covariance matrix Q(31-Cy_e)                                                                               */
     689             :     Word16 Cy_e,
     690             :     const Word32 *Q, /* i  : prototype matrix (usually a upmix matrix) Q_fx_e                                                                     */
     691             :     Word16 Q_fx_e,
     692             :     const Word16 energy_compensation_flag, /* i  : flag indicating that the energy compensation should be performed (i.e. no residual mixing matrix will follow) */
     693             :     const Word32 reg_Sx_fx,                /* i  : regularization factor for the input channel singular values                                                   */
     694             :     Word16 reg_Sx_e,
     695             :     const Word32 reg_ghat_fx, /* i  : regularization factor for the normalization matrix  Q(31-reg_ghat_e)                                                         */
     696             :     Word16 reg_ghat_e,
     697             :     Word32 *mixing_matrix_fx, /* o  : resulting mixing matrix       Q(31-mixing_matrix_out_e)                                                                                 */
     698             :     Word16 *mixing_matrix_out_e,
     699             :     Word32 *Cr_fx, /* o  : residual covariance matrix Q(31-Cr_e)                                                                                    */
     700             :     Word16 *Cr_e )
     701             : {
     702             :     Word16 i, j;
     703             :     Word16 out;
     704             :     Word16 nL, nC;
     705      132832 :     Word16 lengthCx = num_inputs;
     706      132832 :     Word16 lengthCy = num_outputs;
     707             :     Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
     708             :     Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     709             :     Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     710             :     Word32 Cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
     711             :     Word32 Cy_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
     712             :     Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
     713             :     Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
     714             :     Word16 Cx_fx_e;
     715             :     Word16 Cy_fx_e;
     716             : 
     717             :     Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS];
     718             :     Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
     719             : 
     720             :     Word32 limit_fx;
     721             :     Word16 limit_e;
     722             : 
     723             :     Word32 L_tmp;
     724             :     Word16 tmp_e, tmp, exp;
     725             : 
     726             :     Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     727             :     Word32 Kx_fx[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
     728             : 
     729             :     Word16 Kx_fx_e[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
     730             :     Word16 Ky_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     731             : 
     732             :     Word32 Kx_reg_inv_fx[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
     733             :     Word16 Kx_reg_inv_e[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
     734             : 
     735             :     Word32 Q_fx[PARAM_MC_MAX_TRANSPORT_CHANS * MAX_CICP_CHANNELS];
     736             :     Word16 Q_e;
     737             : 
     738             :     Word32 Q_Cx_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     739             :     Word16 Q_Cx_e;
     740             : 
     741             :     Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
     742             :     Word16 Cy_hat_diag_e;
     743             :     Word16 Cy_hat_diag_buff_e[MAX_OUTPUT_CHANNELS];
     744             :     Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
     745             :     Word16 G_hat_buff_e[MAX_OUTPUT_CHANNELS];
     746             : 
     747             :     Word16 mat_mult_buffer2_e, mat_mult_buffer3_e;
     748             : 
     749             :     Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     750             : 
     751             :     Word16 mixing_matrix_e;
     752             : 
     753             :     Word16 Cr_fx_e;
     754             : 
     755             : 
     756             :     Word32 adj_fx[MAX_OUTPUT_CHANNELS];
     757             :     Word16 adj_e[MAX_OUTPUT_CHANNELS];
     758             :     Word32 *adj_fx_p;
     759             :     Word16 adj_fx_e;
     760             : 
     761             :     Word32 *Cr_p_fx, *Cy_tilde_p_fx, *Cy_p_fx;
     762      132832 :     push_wmops( "dirac_cov_mix_mat" );
     763             : 
     764      132832 :     out = EXIT_SUCCESS;
     765      132832 :     move16();
     766             : 
     767      132832 :     set32_fx( svd_s_buffer_fx, 0, MAX_OUTPUT_CHANNELS );
     768     2258144 :     FOR( i = 0; i < MAX_OUTPUT_CHANNELS; i++ )
     769             :     {
     770     2125312 :         set32_fx( svd_in_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
     771     2125312 :         set32_fx( svd_u_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
     772     2125312 :         set32_fx( svd_v_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
     773             :     }
     774             : 
     775             : 
     776      132832 :     Copy32( Q, Q_fx, imult1616( lengthCy, lengthCx ) );
     777      132832 :     Copy32( Cx, Cx_fx, imult1616( lengthCx, lengthCx ) );
     778      132832 :     Copy32( Cy, Cy_fx, imult1616( lengthCy, lengthCy ) );
     779             : 
     780      132832 :     Q_e = Q_fx_e;
     781      132832 :     move16();
     782      132832 :     Cx_fx_e = Cx_e;
     783      132832 :     move16();
     784      132832 :     Cy_fx_e = Cy_e;
     785      132832 :     move16();
     786             : 
     787             :     /*-----------------------------------------------------------------*
     788             :      * Decomposition of Cy
     789             :      *-----------------------------------------------------------------*/
     790             : 
     791             :     /* Processing the SVD */
     792             : 
     793      132832 :     mat2svdMat_fx( Cy_fx, svd_in_buffer_fx, lengthCy, lengthCy, 0 );
     794             : 
     795      132832 :     svd_fx( svd_in_buffer_fx, Cy_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCy, lengthCy );
     796      132832 :     Word16 max_e = -32;
     797             :     /* Computing Ky */
     798      812741 :     FOR( i = 0; i < lengthCy; ++i )
     799             :     {
     800     4211008 :         FOR( j = 0; j < lengthCy; ++j )
     801             :         {
     802     3531099 :             tmp_e = svd_s_buffer_e[j];
     803     3531099 :             move16();
     804     3531099 :             L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
     805     3531099 :             Ky_fx[i + ( j * lengthCy )] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
     806     3531099 :             move32();
     807     3531099 :             Ky_fx_e[i + ( j * lengthCy )] = tmp_e;
     808     3531099 :             move16();
     809     3531099 :             max_e = s_max( max_e, tmp_e );
     810             :         }
     811             :     }
     812     3663931 :     FOR( i = 0; i < lengthCy * lengthCy; ++i )
     813             :     {
     814     3531099 :         Ky_fx[i] = L_shr( Ky_fx[i], sub( max_e, Ky_fx_e[i] ) );
     815     3531099 :         move32();
     816     3531099 :         Ky_fx_e[i] = max_e;
     817     3531099 :         move16();
     818             :     }
     819             : 
     820             :     /*-----------------------------------------------------------------*
     821             :      * Decomposition of Cx
     822             :      *-----------------------------------------------------------------*/
     823             : 
     824             :     /* Processing the SVD */
     825             : 
     826      132832 :     mat2svdMat_fx( Cx_fx, svd_in_buffer_fx, lengthCx, lengthCx, 0 );
     827             : 
     828      132832 :     svd_fx( svd_in_buffer_fx, Cx_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCx, lengthCx );
     829      132832 :     max_e = -32;
     830      399856 :     FOR( i = 0; i < lengthCx; ++i )
     831             :     {
     832      805152 :         FOR( j = 0; j < lengthCx; ++j )
     833             :         {
     834      538128 :             tmp_e = svd_s_buffer_e[j];
     835      538128 :             move16();
     836      538128 :             L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
     837      538128 :             Kx_fx[( i + ( j * lengthCx ) )] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
     838      538128 :             move32();
     839      538128 :             Kx_fx_e[( i + ( j * lengthCx ) )] = tmp_e;
     840      538128 :             move16();
     841      538128 :             max_e = s_max( max_e, tmp_e );
     842             :         }
     843             :     }
     844      670960 :     FOR( i = 0; i < lengthCx * lengthCx; ++i )
     845             :     {
     846      538128 :         Kx_fx[i] = L_shr( Kx_fx[i], sub( max_e, Kx_fx_e[i] ) );
     847      538128 :         move32();
     848      538128 :         Kx_fx_e[i] = max_e;
     849      538128 :         move16();
     850             :     }
     851             : 
     852      399856 :     FOR( i = 0; i < lengthCx; ++i )
     853             :     {
     854      267024 :         tmp_e = svd_s_buffer_e[i];
     855      267024 :         move16();
     856      267024 :         svd_s_buffer_fx[i] = Sqrt32( svd_s_buffer_fx[i], &tmp_e ); // Q(31-tmp_e)
     857      267024 :         move32();
     858      267024 :         svd_s_buffer_e[i] = tmp_e;
     859      267024 :         move16();
     860             :     }
     861             : 
     862             :     /*-----------------------------------------------------------------*
     863             :      * Regularization of Sx
     864             :      *-----------------------------------------------------------------*/
     865             : 
     866      132832 :     limit_fx = svd_s_buffer_fx[0];
     867      132832 :     move32();
     868      132832 :     limit_e = svd_s_buffer_e[0];
     869      132832 :     move16();
     870      267024 :     FOR( i = 1; i < lengthCx; i++ )
     871             :     {
     872      134192 :         IF( GT_32( svd_s_buffer_fx[i], L_shl_sat( limit_fx, sub( limit_e, svd_s_buffer_e[i] ) ) ) )
     873             :         {
     874           0 :             limit_fx = svd_s_buffer_fx[i];
     875           0 :             move32();
     876           0 :             limit_e = svd_s_buffer_e[i];
     877           0 :             move16();
     878             :         }
     879             :     }
     880             : 
     881      132832 :     limit_e = add( limit_e, reg_Sx_e );
     882      132832 :     limit_fx = Mpy_32_32( limit_fx, reg_Sx_fx );
     883      132832 :     IF( BASOP_Util_Cmp_Mant32Exp( limit_fx, limit_e, SQRT_EPSILON_FX /* Q31 */, 0 ) < 0 )
     884             :     {
     885           0 :         limit_fx = SQRT_EPSILON_FX;
     886           0 :         move32();
     887           0 :         limit_e = 0;
     888           0 :         move16();
     889             :     }
     890             : 
     891      399856 :     FOR( i = 0; i < lengthCx; ++i )
     892             :     {
     893      267024 :         IF( LT_32( L_shl_sat( svd_s_buffer_fx[i], sub( svd_s_buffer_e[i], limit_e ) ), limit_fx ) )
     894             :         {
     895       58518 :             svd_s_buffer_fx[i] = limit_fx;
     896       58518 :             move32();
     897       58518 :             svd_s_buffer_e[i] = limit_e;
     898       58518 :             move16();
     899             :         }
     900             :     }
     901             : 
     902      132832 :     limit_fx = 0;
     903      132832 :     move32();
     904      132832 :     limit_e = 0;
     905      132832 :     move16();
     906             : 
     907             :     /*-----------------------------------------------------------------*
     908             :      * regularized Kx-1
     909             :      *-----------------------------------------------------------------*/
     910             : 
     911      399856 :     FOR( i = 0; i < lengthCx; ++i )
     912             :     {
     913             :         Word16 scale, reg_fac_fx;
     914      267024 :         reg_fac_fx = BASOP_Util_Divide3232_Scale( 1, svd_s_buffer_fx[i], &scale );
     915      267024 :         scale = add( scale, sub( Q31, svd_s_buffer_e[i] ) );
     916      805152 :         FOR( j = 0; j < lengthCx; ++j )
     917             :         {
     918      538128 :             Kx_reg_inv_fx[i + j * lengthCx] = Mpy_32_16_1( svd_u_buffer_fx[j][i], reg_fac_fx ); // Q(31-scale)
     919      538128 :             move32();
     920      538128 :             Kx_reg_inv_e[i + j * lengthCx] = scale;
     921      538128 :             move16();
     922             :         }
     923             :     }
     924             : 
     925             :     /*-----------------------------------------------------------------*
     926             :      * normalization matrix G hat
     927             :      *-----------------------------------------------------------------*/
     928             : 
     929             :     /* Computing Q*Cx*Q' */
     930      132832 :     matrix_product_mant_exp_fx( Q_fx, Q_e, lengthCy, lengthCx, 0, Cx_fx, Cx_fx_e, lengthCx, lengthCx, 0, Q_Cx_fx, &Q_Cx_e );
     931             : 
     932      132832 :     matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e );
     933             : 
     934      132832 :     Word16 com_e = sub( limit_e, Cy_hat_diag_e );
     935      812741 :     FOR( i = 0; i < lengthCy; ++i )
     936             :     {
     937      679909 :         IF( GT_32( Cy_hat_diag_fx[i], L_shl_sat( limit_fx, com_e ) ) )
     938             :         {
     939      679888 :             limit_fx = Cy_hat_diag_fx[i];
     940      679888 :             move32();
     941      679888 :             limit_e = Cy_hat_diag_e;
     942      679888 :             move16();
     943             :         }
     944             :     }
     945      132832 :     limit_fx = Madd_32_32( EPSILON_FX, limit_fx, reg_ghat_fx ); // limit_e+ reg_ghat_e
     946      132832 :     limit_e = add( limit_e, reg_ghat_e );
     947             : 
     948      132832 :     com_e = sub( Cy_hat_diag_e, limit_e );
     949      812741 :     FOR( i = 0; i < lengthCy; ++i )
     950             :     {
     951      679909 :         Cy_hat_diag_buff_e[i] = Cy_hat_diag_e;
     952      679909 :         move16();
     953             : 
     954      679909 :         IF( GT_32( limit_fx, L_shl_sat( Cy_hat_diag_fx[i], com_e ) ) )
     955             :         {
     956         125 :             Cy_hat_diag_fx[i] = limit_fx;
     957         125 :             move32();
     958         125 :             Cy_hat_diag_buff_e[i] = limit_e;
     959         125 :             move16();
     960             :         }
     961             : 
     962      679909 :         tmp = BASOP_Util_Divide3232_Scale( Cy_fx[( i + ( i * lengthCy ) )], Cy_hat_diag_fx[i], &exp );
     963      679909 :         exp = add( exp, sub( Cy_fx_e, Cy_hat_diag_buff_e[i] ) );
     964      679909 :         L_tmp = Sqrt32( L_deposit_h( tmp ), &exp );
     965      679909 :         G_hat_fx[i] = L_tmp;
     966      679909 :         move32();
     967      679909 :         G_hat_buff_e[i] = exp;
     968      679909 :         move16();
     969             :     }
     970             : 
     971             :     /*-----------------------------------------------------------------*
     972             :      * Formulate optimal P
     973             :      *-----------------------------------------------------------------*/
     974             : 
     975             :     /* Computing the input matrix Kx'*Q'*G_hat'*Ky */
     976             :     Word16 mat_mult_buffer1_fx_e;
     977             : 
     978             :     Word16 mat_mult_buffer2_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     979             : 
     980      132832 :     matrix_product_mant_exp_fx( Kx_fx, Kx_fx_e[0], lengthCx, lengthCx, 1, Q_fx, Q_e, lengthCy, lengthCx, 1, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
     981             : 
     982      132832 :     matrix_diag_product_fx_2( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e, lengthCx, lengthCy, 0, G_hat_fx, G_hat_buff_e, lengthCy, mat_mult_buffer2_fx, mat_mult_buffer2_fx_e );
     983             : 
     984      132832 :     matrix_product_mant_exp_fx( mat_mult_buffer2_fx, mat_mult_buffer2_fx_e[0], lengthCx, lengthCy, 0, Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
     985             : 
     986      132832 :     IF( LT_16( lengthCx, lengthCy ) )
     987             :     {
     988      132832 :         mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 1 );
     989      132832 :         nL = lengthCy;
     990      132832 :         move16();
     991      132832 :         nC = lengthCx;
     992      132832 :         move16();
     993      132832 :         svd_fx( svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, svd_s_buffer_e, nL, nC );
     994             :     }
     995             :     ELSE
     996             :     {
     997           0 :         mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 0 );
     998           0 :         nL = lengthCx;
     999           0 :         move16();
    1000           0 :         nC = lengthCy;
    1001           0 :         move16();
    1002           0 :         svd_fx( svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, nL, nC );
    1003             :     }
    1004             : 
    1005             :     /* Actually Processing P */
    1006             : 
    1007             :     /* can be skipped: lambda is always column-truncated identity matrix, so this operation just
    1008             :        truncates V to num_input_channel columns */
    1009             : 
    1010      132832 :     svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, lengthCy, lengthCx );
    1011      132832 :     svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
    1012             : 
    1013      132832 :     mat_mult_buffer1_fx_e = 0;
    1014      132832 :     move16();
    1015      132832 :     mat_mult_buffer2_e = 0;
    1016      132832 :     move16();
    1017             : 
    1018      132832 :     matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e, lengthCy, lengthCx, 0,
    1019             :                                 mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1,
    1020             :                                 mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    1021             : 
    1022             :     /************************ Formulate M **********************/
    1023             : 
    1024      132832 :     matrix_product_mant_exp_fx( Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, lengthCy, lengthCx, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
    1025             : 
    1026             :     Word16 mixing_matrix_fx_e[MAX_CICP_CHANNELS * PARAM_MC_MAX_TRANSPORT_CHANS];
    1027             : 
    1028             :     Word16 mat_mult_buffer1_fx_e1[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1029      132832 :     set16_fx( mat_mult_buffer1_fx_e1, mat_mult_buffer1_fx_e, MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS );
    1030             : 
    1031      132832 :     matrix_product_mant_exp( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1, lengthCy, lengthCx, 0, Kx_reg_inv_fx, Kx_reg_inv_e, lengthCx, lengthCx, 0, mixing_matrix_fx, mixing_matrix_fx_e );
    1032             : 
    1033             :     /*-----------------------------------------------------------------*
    1034             :      * Formulate Cr
    1035             :      *-----------------------------------------------------------------*/
    1036             : 
    1037             :     /* Compute Cy_tilde = M*Cx*M' */
    1038             :     Word16 Cx_e_arr[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
    1039      132832 :     set16_fx( Cx_e_arr, Cx_fx_e, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
    1040      132832 :     matrix_product_mant_exp( mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 0, Cx_fx, Cx_e_arr, lengthCx, lengthCx, 0, mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1 );
    1041             : 
    1042      132832 :     matrix_product_mant_exp( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1, lengthCy, lengthCx, 0, mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 1, mat_mult_buffer2_fx, mat_mult_buffer2_fx_e );
    1043             : 
    1044      132832 :     exp = mixing_matrix_fx_e[0];
    1045      132832 :     move16();
    1046     1372788 :     FOR( i = 1; i < lengthCy * lengthCx; i++ )
    1047             :     {
    1048     1239956 :         if ( LT_16( exp, mixing_matrix_fx_e[i] ) )
    1049             :         {
    1050       97394 :             exp = mixing_matrix_fx_e[i];
    1051       97394 :             move16();
    1052             :         }
    1053             :     }
    1054             : 
    1055     1505620 :     FOR( i = 0; i < lengthCy * lengthCx; i++ )
    1056             :     {
    1057     1372788 :         mixing_matrix_fx[i] = L_shr( mixing_matrix_fx[i], sub( exp, mixing_matrix_fx_e[i] ) ); // Q(31-exp)
    1058     1372788 :         move32();
    1059             :     }
    1060             : 
    1061      132832 :     mixing_matrix_e = exp;
    1062      132832 :     move16();
    1063             : 
    1064      132832 :     Cr_p_fx = Cr_fx;
    1065      132832 :     Cy_p_fx = Cy_fx;
    1066      132832 :     Cy_tilde_p_fx = mat_mult_buffer2_fx;
    1067             :     Word16 Cr_e_arr[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
    1068      812741 :     FOR( i = 0; i < lengthCy; ++i )
    1069             :     {
    1070     4211008 :         FOR( j = 0; j < lengthCy; ++j )
    1071             :         {
    1072             : 
    1073     3531099 :             *( Cr_p_fx ) = BASOP_Util_Add_Mant32Exp( *( Cy_p_fx ), Cy_fx_e, L_negate( *( Cy_tilde_p_fx ) ), mat_mult_buffer2_fx_e[i * lengthCy + j], &Cr_e_arr[i * lengthCy + j] );
    1074     3531099 :             move32();
    1075     3531099 :             Cr_p_fx++;
    1076     3531099 :             Cy_p_fx++;
    1077     3531099 :             Cy_tilde_p_fx++;
    1078             :         }
    1079             : 
    1080             :         /* Avoid Meaningless negative main diagonal elements */
    1081      679909 :         IF( Cr_fx[i + ( i * lengthCy )] < 0 )
    1082             :         {
    1083       76299 :             Cr_fx[i + ( i * lengthCy )] = 0;
    1084       76299 :             move32();
    1085       76299 :             Cr_e_arr[i + ( i * lengthCy )] = 0;
    1086       76299 :             move16();
    1087             :         }
    1088             :     }
    1089             : 
    1090      132832 :     exp = Cr_e_arr[0];
    1091      132832 :     move16();
    1092     3531099 :     FOR( i = 1; i < lengthCy * lengthCy; i++ )
    1093             :     {
    1094     3398267 :         if ( LT_16( exp, Cr_e_arr[i] ) )
    1095             :         {
    1096      126888 :             exp = Cr_e_arr[i];
    1097      126888 :             move16();
    1098             :         }
    1099             :     }
    1100             : 
    1101     3663931 :     FOR( i = 0; i < lengthCy * lengthCy; i++ )
    1102             :     {
    1103     3531099 :         Cr_fx[i] = L_shr( Cr_fx[i], sub( exp, Cr_e_arr[i] ) ); // Q(31-exp)
    1104     3531099 :         move32();
    1105             :     }
    1106             : 
    1107      132832 :     Cr_fx_e = exp;
    1108      132832 :     move16();
    1109             : 
    1110      132832 :     exp = mat_mult_buffer2_fx_e[0];
    1111      132832 :     move16();
    1112     3531099 :     FOR( i = 1; i < lengthCy * lengthCy; i++ )
    1113             :     {
    1114     3398267 :         if ( LT_16( exp, mat_mult_buffer2_fx_e[i] ) )
    1115             :         {
    1116      184646 :             exp = mat_mult_buffer2_fx_e[i];
    1117      184646 :             move16();
    1118             :         }
    1119             :     }
    1120             : 
    1121     3663931 :     FOR( i = 0; i < lengthCy * lengthCy; i++ )
    1122             :     {
    1123     3531099 :         mat_mult_buffer2_fx[i] = L_shr( mat_mult_buffer2_fx[i], sub( exp, mat_mult_buffer2_fx_e[i] ) ); // Q(31-exp)
    1124     3531099 :         move32();
    1125             :     }
    1126             : 
    1127      132832 :     mat_mult_buffer2_e = exp;
    1128      132832 :     move16();
    1129             : 
    1130             :     /*-----------------------------------------------------------------*
    1131             :      * Energy Compensation
    1132             :      *-----------------------------------------------------------------*/
    1133             : 
    1134      132832 :     IF( EQ_16( energy_compensation_flag, 1 ) )
    1135             :     {
    1136       25942 :         adj_fx_p = svd_s_buffer_fx;
    1137       25942 :         Cy_tilde_p_fx = mat_mult_buffer2_fx;
    1138             : 
    1139      158372 :         FOR( i = 0; i < lengthCy; ++i )
    1140             :         {
    1141             :             /* Avoid correction for very small energies,
    1142             :                main diagonal elements of Cy_tilde_p may be negative */
    1143      132430 :             IF( Cy_tilde_p_fx[i + ( i * lengthCy )] < 0 )
    1144             :             {
    1145           0 :                 adj_fx_p[i] = 1073741824; // 1.0f in Q30
    1146           0 :                 move32();
    1147           0 :                 adj_e[i] = 1;
    1148           0 :                 move16();
    1149             :             }
    1150             :             ELSE
    1151             :             {
    1152      132430 :                 tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + ( i * lengthCy )], L_add( Cy_tilde_p_fx[i + ( i * lengthCy )], EPSILON_FX ), &exp );
    1153      132430 :                 exp = add( exp, sub( Cy_fx_e, mat_mult_buffer2_e ) );
    1154      132430 :                 L_tmp = L_deposit_h( tmp );
    1155      132430 :                 L_tmp = Sqrt32( L_tmp, &exp );
    1156      132430 :                 adj_fx_p[i] = L_tmp;
    1157      132430 :                 move32();
    1158      132430 :                 adj_e[i] = exp;
    1159      132430 :                 move16();
    1160             :             }
    1161             : 
    1162      132430 :             Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, adj_e[i] ) );
    1163      132430 :             IF( GT_32( adj_fx_p[i], temp ) )
    1164             :             {
    1165          18 :                 adj_fx_p[i] = 1073741824; // 1.0f in Q30
    1166          18 :                 move32();
    1167          18 :                 adj_e[i] = 3;
    1168          18 :                 move16();
    1169             :             }
    1170             :         }
    1171             : 
    1172       25942 :         exp = adj_e[0];
    1173       25942 :         move16();
    1174      132430 :         FOR( i = 1; i < lengthCy; i++ )
    1175             :         {
    1176      106488 :             if ( LT_16( exp, adj_e[i] ) )
    1177             :             {
    1178        1423 :                 exp = adj_e[i];
    1179        1423 :                 move16();
    1180             :             }
    1181             :         }
    1182             : 
    1183      158372 :         FOR( i = 0; i < lengthCy; i++ )
    1184             :         {
    1185      132430 :             adj_fx[i] = L_shr( adj_fx_p[i], sub( exp, adj_e[i] ) ); // Q(31-exp)
    1186      132430 :             move32();
    1187             :         }
    1188       25942 :         adj_fx_e = exp;
    1189       25942 :         move16();
    1190             : 
    1191       25942 :         diag_matrix_product_fx( adj_fx, adj_fx_e, lengthCy, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    1192             : 
    1193       25942 :         Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCx, lengthCy ) ); // Q(31-mat_mult_buffer3_e)
    1194       25942 :         mixing_matrix_e = mat_mult_buffer3_e;
    1195       25942 :         move16();
    1196             :     }
    1197             : 
    1198      132832 :     *mixing_matrix_out_e = mixing_matrix_e;
    1199      132832 :     move16();
    1200      132832 :     *Cr_e = Cr_fx_e;
    1201      132832 :     move16();
    1202      132832 :     pop_wmops();
    1203             : 
    1204      132832 :     return out;
    1205             : }
    1206             : 
    1207             : 
    1208             : /*-------------------------------------------------------------------*
    1209             :  * computeMixingMatricesResidual()
    1210             :  *
    1211             :  * compute a residual mixing matrix using the covariance synthesis approach
    1212             :  *-------------------------------------------------------------------*/
    1213             : 
    1214      106890 : Word16 computeMixingMatricesResidual_fx(
    1215             :     const Word32 num_outputs, /* i  : number of output channels                                           */
    1216             :     const Word32 *Cx_fx,      /* i  : vector containing the diagonal diffuse prototype covariance Q(31-Cx_e)         */
    1217             :     const Word16 Cx_e,
    1218             :     const Word32 *Cy_fx, /* i  : matrix containing the missing cov (Cr from computeMixingMatrices()) Q(31-Cy_fx_e) */
    1219             :     const Word16 Cy_fx_e,
    1220             :     const Word32 reg_Sx_fx, /* i  : regularization factor for the input channel singular values Q(31-reg_Sx_e)         */
    1221             :     const Word16 reg_Sx_e,
    1222             :     const Word32 reg_ghat_fx, /* i  : regularization factor for the normalization matrix Q(31-reg_ghat_e)                  */
    1223             :     const Word16 reg_ghat_e,
    1224             :     Word32 *mixing_matrix_fx, /* o  : resulting residual mixing matrix Q(31-mixing_matrix_ret_e)                                    */
    1225             :     Word16 *mixing_matrix_ret_e )
    1226             : {
    1227             :     Word16 i, j;
    1228             :     Word16 out, lengthCx, lengthCy;
    1229      106890 :     out = EXIT_SUCCESS;
    1230      106890 :     move16();
    1231      106890 :     lengthCx = extract_l( num_outputs );
    1232      106890 :     lengthCy = extract_l( num_outputs );
    1233             : 
    1234             :     Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
    1235             :     Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1236             :     Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
    1237             :     Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
    1238             : 
    1239             :     Word16 mat_mult_buffer1_buff_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1240             : 
    1241      106890 :     push_wmops( "dirac_cov_mix_mat_r" );
    1242             : 
    1243             :     Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1244             :     Word32 adj_fx[MAX_OUTPUT_CHANNELS];
    1245             :     Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1246      106890 :     Word16 mixing_matrix_e = 0, mat_mult_buffer1_e, adj_e, mat_mult_buffer3_e, mat_mult_buffer2_e;
    1247      106890 :     move16();
    1248             : 
    1249      106890 :     Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS] = { 0 };
    1250             :     Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
    1251             :     Word32 L_tmp;
    1252             :     Word16 tmp_e;
    1253             :     Word16 tmp, scale;
    1254             :     Word32 div_tmp;
    1255             :     Word16 exp;
    1256             : 
    1257             :     Word32 Kx_fx[MAX_OUTPUT_CHANNELS];
    1258             :     Word16 Ky_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1259             :     Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1260             :     Word16 Kx_fx_e[MAX_OUTPUT_CHANNELS];
    1261             : 
    1262             :     Word32 limit_fx;
    1263             :     Word16 limit_e;
    1264             : 
    1265             :     Word32 Kx_reg_inv_fx[MAX_OUTPUT_CHANNELS];
    1266             :     Word16 Kx_reg_inv_e[MAX_OUTPUT_CHANNELS];
    1267             : 
    1268             :     Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
    1269             :     Word16 Cy_hat_diag_e;
    1270             : 
    1271             :     Word16 Cy_hat_diag_fx_e[MAX_OUTPUT_CHANNELS];
    1272             :     Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
    1273             :     Word16 G_hat_e[MAX_OUTPUT_CHANNELS];
    1274             : 
    1275             :     Word32 *adj_fx_p;
    1276             :     Word16 adj_buff_e[MAX_OUTPUT_CHANNELS];
    1277             : 
    1278             :     Word32 Cy_tilde_fx[MAX_OUTPUT_CHANNELS];
    1279             :     Word16 Cy_tilde_e;
    1280             : 
    1281             :     /*-----------------------------------------------------------------*
    1282             :      * Decomposition of Cy
    1283             :      *-----------------------------------------------------------------*/
    1284             : 
    1285             :     /* Processing the SVD */
    1286             : 
    1287             :     /* linear array to svd buffer */
    1288      106890 :     mat2svdMat_fx( Cy_fx, svd_in_buffer_fx, lengthCy, lengthCy, 0 );
    1289             : 
    1290      106890 :     svd_fx( svd_in_buffer_fx, Cy_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCy, lengthCy );
    1291             : 
    1292             :     /* Computing Ky */
    1293      106890 :     Word16 max_e = -32;
    1294      654369 :     FOR( i = 0; i < lengthCy; ++i )
    1295             :     {
    1296     3393068 :         FOR( j = 0; j < lengthCy; ++j )
    1297             :         {
    1298     2845589 :             tmp_e = svd_s_buffer_e[j];
    1299     2845589 :             move16();
    1300     2845589 :             L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
    1301     2845589 :             Ky_fx[i + j * lengthCy] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
    1302     2845589 :             move32();
    1303     2845589 :             Ky_fx_e[i + j * lengthCy] = tmp_e;
    1304     2845589 :             move16();
    1305     2845589 :             max_e = s_max( max_e, tmp_e );
    1306             :         }
    1307             :     }
    1308             : 
    1309     2952479 :     FOR( i = 0; i < lengthCy * lengthCy; ++i )
    1310             :     {
    1311     2845589 :         Ky_fx[i] = L_shr( Ky_fx[i], sub( max_e, Ky_fx_e[i] ) );
    1312     2845589 :         move32();
    1313     2845589 :         Ky_fx_e[i] = max_e;
    1314     2845589 :         move16();
    1315             :     }
    1316             : 
    1317             :     /*-----------------------------------------------------------------*
    1318             :      * Decomposition of Cx
    1319             :      *-----------------------------------------------------------------*/
    1320             : 
    1321             :     /* Processing the SVD of Cx*/
    1322             :     /* Cx is a diagonal matrix, so SVD would lead to the sorted diagonal as S and u
    1323             :      *  would be just indicating the sorting index, so go straight to Kx as the
    1324             :      * square root of the diagonal of Cx */
    1325             : 
    1326             :     /* Computing Kx */
    1327      106890 :     max_e = -32;
    1328      654369 :     FOR( i = 0; i < lengthCx; ++i )
    1329             :     {
    1330      547479 :         exp = Cx_e;
    1331      547479 :         move16();
    1332      547479 :         Kx_fx[i] = Sqrt32( Cx_fx[i], &exp );
    1333      547479 :         move32();
    1334      547479 :         Kx_fx_e[i] = exp;
    1335      547479 :         move16();
    1336      547479 :         max_e = s_max( max_e, exp );
    1337             :     }
    1338             : 
    1339      654369 :     FOR( i = 0; i < lengthCx; ++i )
    1340             :     {
    1341      547479 :         Kx_fx[i] = L_shr( Kx_fx[i], sub( max_e, Kx_fx_e[i] ) );
    1342      547479 :         move32();
    1343      547479 :         Kx_fx_e[i] = max_e;
    1344      547479 :         move16();
    1345             :     }
    1346             : 
    1347             :     /*-----------------------------------------------------------------*
    1348             :      * Regularization of Sx
    1349             :      *-----------------------------------------------------------------*/
    1350             : 
    1351      106890 :     limit_fx = Kx_fx[0];
    1352      106890 :     move32();
    1353             : 
    1354      547479 :     FOR( i = 1; i < lengthCx; i++ )
    1355             :     {
    1356      440589 :         IF( GT_32( Kx_fx[i], limit_fx ) )
    1357             :         {
    1358      165586 :             limit_fx = Kx_fx[i];
    1359      165586 :             move32();
    1360             :         }
    1361             :     }
    1362             : 
    1363      106890 :     L_tmp = Mpy_32_32( limit_fx, reg_Sx_fx ); // limit_e + reg_Sx_e
    1364      106890 :     L_tmp = L_add( L_tmp, EPSILLON_FX );
    1365      106890 :     limit_fx = L_tmp;
    1366      106890 :     move16();
    1367      106890 :     limit_e = add( Kx_fx_e[0], reg_Sx_e );
    1368             : 
    1369      654369 :     FOR( i = 0; i < lengthCx; ++i )
    1370             :     {
    1371      547479 :         IF( GT_32( Kx_fx[i], L_shl_sat( limit_fx, sub( limit_e, Kx_fx_e[i] ) ) ) )
    1372             :         {
    1373      546080 :             div_tmp = Kx_fx[i];
    1374      546080 :             move32();
    1375      546080 :             exp = Kx_fx_e[i];
    1376      546080 :             move16();
    1377             :         }
    1378             :         ELSE
    1379             :         {
    1380        1399 :             div_tmp = limit_fx;
    1381        1399 :             move32();
    1382        1399 :             exp = limit_e;
    1383        1399 :             move16();
    1384             :         }
    1385      547479 :         tmp = BASOP_Util_Divide3232_Scale( 1073741824, div_tmp, &scale ); // 1073741824 -> 1.0f in Q30
    1386      547479 :         scale = add( scale, sub( Q1, exp ) );
    1387             : 
    1388      547479 :         Kx_reg_inv_fx[i] = L_deposit_h( tmp );
    1389      547479 :         move32();
    1390      547479 :         Kx_reg_inv_e[i] = scale;
    1391      547479 :         move16();
    1392             :     }
    1393             : 
    1394      106890 :     limit_fx = 0;
    1395      106890 :     move32();
    1396      106890 :     limit_e = 0;
    1397      106890 :     move16();
    1398             : 
    1399             :     /*-----------------------------------------------------------------*
    1400             :      * regularized Kx-1
    1401             :      *-----------------------------------------------------------------*/
    1402             : 
    1403             :     /*-----------------------------------------------------------------*
    1404             :      * normalization matrix G hat
    1405             :      *-----------------------------------------------------------------*/
    1406             : 
    1407             :     /* Computing Cy_hat_diag */
    1408      106890 :     Copy32( Cx_fx, Cy_hat_diag_fx, extract_l( num_outputs ) ); // Q(31-Cx_e)
    1409      106890 :     Cy_hat_diag_e = Cx_e;
    1410      106890 :     move16();
    1411             : 
    1412      106890 :     Word16 com_e = sub( limit_e, Cy_hat_diag_e );
    1413      654369 :     FOR( i = 0; i < lengthCy; ++i )
    1414             :     {
    1415      547479 :         IF( GT_32( Cy_hat_diag_fx[i], L_shl_sat( limit_fx, com_e ) ) )
    1416             :         {
    1417      547463 :             limit_fx = Cy_hat_diag_fx[i];
    1418      547463 :             move32();
    1419      547463 :             limit_e = Cy_hat_diag_e;
    1420      547463 :             move16();
    1421             :         }
    1422             :     }
    1423             : 
    1424      106890 :     limit_fx = Madd_32_32( EPSILON_FX, limit_fx, reg_ghat_fx ); // Q(limit_e+reg_ghat_e)
    1425      106890 :     limit_e = add( limit_e, reg_ghat_e );
    1426             : 
    1427             :     /* Computing G_hat */
    1428             : 
    1429      106890 :     com_e = sub( Cy_hat_diag_e, limit_e );
    1430      654369 :     FOR( i = 0; i < lengthCy; ++i )
    1431             :     {
    1432      547479 :         Cy_hat_diag_fx_e[i] = Cy_hat_diag_e;
    1433      547479 :         move16();
    1434      547479 :         IF( GT_32( limit_fx, L_shl_sat( Cy_hat_diag_fx[i], com_e ) ) ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
    1435             :         {
    1436         101 :             Cy_hat_diag_fx[i] = limit_fx;
    1437         101 :             move32();
    1438         101 :             Cy_hat_diag_fx_e[i] = limit_e;
    1439         101 :             move16();
    1440             :         }
    1441      547479 :         tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + i * lengthCy], Cy_hat_diag_fx[i], &scale );
    1442      547479 :         scale = add( scale, sub( Cy_fx_e, Cy_hat_diag_fx_e[i] ) );
    1443      547479 :         L_tmp = Sqrt32( L_deposit_h( tmp ), &scale );
    1444             : 
    1445      547479 :         G_hat_fx[i] = L_tmp;
    1446      547479 :         move32();
    1447      547479 :         G_hat_e[i] = scale;
    1448      547479 :         move16();
    1449             :     }
    1450             : 
    1451             :     /*-----------------------------------------------------------------*
    1452             :      * Formulate optimal P
    1453             :      *-----------------------------------------------------------------*/
    1454             : 
    1455      654369 :     FOR( i = 0; i < num_outputs; i++ )
    1456             :     {
    1457      547479 :         Kx_fx[i] = Mpy_32_32( Kx_fx[i], G_hat_fx[i] ); // Q(31-(Kx_fx_e+G_hag_e))
    1458      547479 :         move32();
    1459      547479 :         Kx_fx_e[i] = add( Kx_fx_e[i], G_hat_e[i] );
    1460      547479 :         move16();
    1461             :     }
    1462             : 
    1463      654369 :     FOR( i = 0; i < num_outputs; i++ )
    1464             :     {
    1465             :         Word32 fac_fx;
    1466      547479 :         fac_fx = Kx_fx[i];
    1467      547479 :         move32();
    1468             : 
    1469     3393068 :         FOR( j = 0; j < num_outputs; j++ )
    1470             :         {
    1471     2845589 :             mat_mult_buffer1_fx[i + j * num_outputs] = Mpy_32_32( Ky_fx[i + j * num_outputs], fac_fx ); // Q(31-(Ky_fx_e+Kx_fx_e));
    1472     2845589 :             move32();
    1473     2845589 :             mat_mult_buffer1_buff_e[i + j * num_outputs] = add( Ky_fx_e[i + j * num_outputs], Kx_fx_e[i] );
    1474     2845589 :             move16();
    1475             :         }
    1476             :     }
    1477             : 
    1478      106890 :     mat_mult_buffer1_e = mat_mult_buffer1_buff_e[0];
    1479      106890 :     move16();
    1480     2845589 :     FOR( i = 1; i < num_outputs * num_outputs; i++ )
    1481             :     {
    1482     2738699 :         if ( LT_16( mat_mult_buffer1_e, mat_mult_buffer1_buff_e[i] ) )
    1483             :         {
    1484       29503 :             mat_mult_buffer1_e = mat_mult_buffer1_buff_e[i];
    1485       29503 :             move16();
    1486             :         }
    1487             :     }
    1488             : 
    1489     2952479 :     FOR( i = 0; i < num_outputs * num_outputs; i++ )
    1490             :     {
    1491     2845589 :         mat_mult_buffer1_fx[i] = L_shr( mat_mult_buffer1_fx[i], sub( mat_mult_buffer1_e, mat_mult_buffer1_buff_e[i] ) ); // Q(31-mat_mult_buffer1_e)
    1492     2845589 :         move32();
    1493             :     }
    1494             : 
    1495      106890 :     mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 0 );
    1496             : 
    1497      106890 :     svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCx, lengthCy );
    1498             : 
    1499             :     /* Actually Processing P */
    1500             : 
    1501      106890 :     svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, lengthCy, lengthCx );
    1502      106890 :     svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
    1503      106890 :     mat_mult_buffer1_e = 0;
    1504      106890 :     move16();
    1505      106890 :     mat_mult_buffer2_e = 0;
    1506      106890 :     move16();
    1507             : 
    1508      106890 :     matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0,
    1509             :                                 mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1,
    1510             :                                 mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    1511             : 
    1512             :     /*-----------------------------------------------------------------*
    1513             :      * Formulate M
    1514             :      *-----------------------------------------------------------------*/
    1515             : 
    1516             : 
    1517      106890 :     matrix_product_mant_exp_fx( Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, lengthCy, lengthCx, 0, mat_mult_buffer1_fx, mat_mult_buffer1_buff_e );
    1518      106890 :     set16_fx( mat_mult_buffer1_buff_e, mat_mult_buffer1_buff_e[0], MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS );
    1519             : 
    1520             :     Word16 mixing_matrix_fx_e[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
    1521             : 
    1522      654369 :     FOR( i = 0; i < num_outputs; i++ )
    1523             :     {
    1524             :         Word32 fac_fx;
    1525      547479 :         fac_fx = Kx_reg_inv_fx[i];
    1526      547479 :         move32();
    1527             : 
    1528     3393068 :         FOR( j = 0; j < num_outputs; j++ )
    1529             :         {
    1530     2845589 :             mixing_matrix_fx[j + i * num_outputs] = Mpy_32_32( mat_mult_buffer1_fx[j + i * num_outputs], fac_fx ); // Q(31-mat_mult_buffer1_e+Kx_reg_inv_e);
    1531     2845589 :             move32();
    1532     2845589 :             mixing_matrix_fx_e[j + i * num_outputs] = add( mat_mult_buffer1_buff_e[j + i * num_outputs], Kx_reg_inv_e[i] );
    1533     2845589 :             move16();
    1534             :         }
    1535             :     }
    1536             : 
    1537             :     /*-----------------------------------------------------------------*
    1538             :      * Formulate Cr
    1539             :      *-----------------------------------------------------------------*/
    1540             : 
    1541             :     /* Compute Cy_tilde = M*Cx*M' */
    1542             : 
    1543             :     Word16 Cx_e_arr[MAX_CICP_CHANNELS];
    1544      106890 :     set16_fx( Cx_e_arr, Cx_e, MAX_CICP_CHANNELS );
    1545             : 
    1546      106890 :     matrix_diag_product_fx_1( mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 0, Cx_fx, Cx_e_arr, lengthCx, mat_mult_buffer1_fx, mat_mult_buffer1_buff_e );
    1547             : 
    1548      106890 :     exp = mixing_matrix_fx_e[0];
    1549      106890 :     move16();
    1550     2845589 :     FOR( i = 1; i < num_outputs * num_outputs; i++ )
    1551             :     {
    1552     2738699 :         if ( LT_16( exp, mixing_matrix_fx_e[i] ) )
    1553             :         {
    1554       11815 :             exp = mixing_matrix_fx_e[i];
    1555       11815 :             move16();
    1556             :         }
    1557             :     }
    1558             : 
    1559     2952479 :     FOR( i = 0; i < num_outputs * num_outputs; i++ )
    1560             :     {
    1561     2845589 :         mixing_matrix_fx[i] = L_shr( mixing_matrix_fx[i], sub( exp, mixing_matrix_fx_e[i] ) ); // Q(31-exp)
    1562     2845589 :         move32();
    1563             :     }
    1564      106890 :     mixing_matrix_e = exp;
    1565      106890 :     move16();
    1566             : 
    1567      106890 :     exp = mat_mult_buffer1_buff_e[0];
    1568      106890 :     move16();
    1569     2845589 :     FOR( i = 1; i < num_outputs * num_outputs; i++ )
    1570             :     {
    1571     2738699 :         if ( LT_16( exp, mat_mult_buffer1_buff_e[i] ) )
    1572             :         {
    1573       11815 :             exp = mat_mult_buffer1_buff_e[i];
    1574       11815 :             move16();
    1575             :         }
    1576             :     }
    1577             : 
    1578     2952479 :     FOR( i = 0; i < num_outputs * num_outputs; i++ )
    1579             :     {
    1580     2845589 :         mat_mult_buffer1_fx[i] = L_shr( mat_mult_buffer1_fx[i], sub( exp, mat_mult_buffer1_buff_e[i] ) ); // Q(31-exp)
    1581     2845589 :         move32();
    1582             :     }
    1583      106890 :     mat_mult_buffer1_e = exp;
    1584      106890 :     move16();
    1585             : 
    1586      106890 :     matrix_product_diag_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 1, Cy_tilde_fx, &Cy_tilde_e );
    1587             : 
    1588             :     /*-----------------------------------------------------------------*
    1589             :      * Energy Compensation
    1590             :      *-----------------------------------------------------------------*/
    1591             : 
    1592      106890 :     adj_fx_p = svd_s_buffer_fx;
    1593             : 
    1594      654369 :     FOR( i = 0; i < lengthCy; ++i )
    1595             :     {
    1596      547479 :         tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + ( lengthCy * i )], L_add( Cy_tilde_fx[i], EPSILON_FX ), &scale );
    1597      547479 :         scale = add( scale, sub( Cy_fx_e, Cy_tilde_e ) );
    1598      547479 :         adj_fx_p[i] = Sqrt32( L_deposit_h( tmp ), &scale );
    1599      547479 :         move32();
    1600      547479 :         adj_buff_e[i] = scale;
    1601      547479 :         move16();
    1602      547479 :         Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, scale ) );
    1603      547479 :         IF( GT_32( adj_fx_p[i], temp ) ) // 1073741824 -> 1.0f in Q30
    1604             :         {
    1605           2 :             adj_fx_p[i] = 1073741824; // 1.0f in Q30
    1606           2 :             move32();
    1607           2 :             adj_buff_e[i] = 3;
    1608           2 :             move16();
    1609             :         }
    1610             :     }
    1611             : 
    1612      106890 :     adj_e = adj_buff_e[0];
    1613      106890 :     move16();
    1614             : 
    1615      547479 :     FOR( i = 1; i < lengthCy; i++ )
    1616             :     {
    1617      440589 :         if ( LT_16( adj_e, adj_buff_e[i] ) )
    1618             :         {
    1619       43195 :             adj_e = adj_buff_e[i];
    1620       43195 :             move16();
    1621             :         }
    1622             :     }
    1623             : 
    1624             : 
    1625      654369 :     FOR( i = 0; i < lengthCy; i++ )
    1626             :     {
    1627      547479 :         adj_fx[i] = L_shr( adj_fx_p[i], sub( adj_e, adj_buff_e[i] ) ); // Q(31-adj_e)
    1628      547479 :         move32();
    1629             :     }
    1630             : 
    1631      106890 :     diag_matrix_product_fx( adj_fx, adj_e, lengthCy, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    1632      106890 :     Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCy, lengthCx ) );
    1633      106890 :     *mixing_matrix_ret_e = mat_mult_buffer3_e;
    1634      106890 :     move16();
    1635             : 
    1636      106890 :     pop_wmops();
    1637             : 
    1638      106890 :     return out;
    1639             : }
    1640             : 
    1641             : 
    1642             : /*-------------------------------------------------------------------*
    1643             :  * computeMixingMatricesISM()
    1644             :  *
    1645             :  *
    1646             :  *-------------------------------------------------------------------*/
    1647      315180 : Word16 computeMixingMatricesISM_fx(
    1648             :     const Word16 num_inputs,
    1649             :     const Word16 num_responses,
    1650             :     const Word16 num_outputs,
    1651             :     const Word32 *responses_fx, /*Q(31-responses_e) */
    1652             :     const Word16 responses_e,
    1653             :     const Word32 *ener_fx, /*Q(31-ener_e) */
    1654             :     const Word16 ener_e,
    1655             :     const Word32 *Cx_diag_fx, /*Q(31-diag_e) */
    1656             :     const Word16 Cx_diag_e,
    1657             :     const Word32 *Cy_diag_fx, /*Q(31-diag_e) */
    1658             :     const Word16 Cy_diag_e,
    1659             :     const Word16 *Q_16fx, // Q15
    1660             :     const Word16 energy_compensation_flag,
    1661             :     const Word32 reg_Sx_fx,   /*Q0*/
    1662             :     const Word32 reg_ghat_fx, /*Q0*/
    1663             :     Word32 *mixing_matrix_fx, /*Q(31-mixing_matrix_e) */
    1664             :     Word16 *mixing_matrix_e )
    1665             : {
    1666             :     Word16 i, out;
    1667             :     Word16 lengthCx, lengthCy;
    1668             :     Word16 nL, nC;
    1669             : 
    1670             :     Word32 *Cy_tilde_p_fx;
    1671             :     Word32 *adj_fx;
    1672             :     Word32 limit_fx;
    1673             : 
    1674             :     Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1675             :     Word32 Q_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1676             :     Word32 Q_Cx_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1677             :     Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1678             :     Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
    1679             :     Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1680             :     Word32 Kx_reg_inv_fx[MAX_TRANSPORT_CHANNELS];
    1681             :     Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1682             :     Word32 Kx_fx[MAX_TRANSPORT_CHANNELS];
    1683             :     Word16 Ky_e, Q_e, Q_Cx_e, mat_mult_buffer1_e, G_hat_e, mat_mult_buffer2_e, Kx_reg_inv_e, adj_e, mat_mult_buffer3_e, Kx_e;
    1684             : 
    1685             :     Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
    1686             :     Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
    1687             :     Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS];
    1688             :     Word16 svd_s_buffer_fx_e;
    1689             :     Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
    1690             :     Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
    1691             : 
    1692             :     Word16 temp_e[MAX_OUTPUT_CHANNELS];
    1693      315180 :     push_wmops( "dirac_cov_mix_mat" );
    1694             : 
    1695      315180 :     out = EXIT_SUCCESS;
    1696      315180 :     move16();
    1697      315180 :     lengthCx = num_inputs;
    1698      315180 :     move16();
    1699      315180 :     lengthCy = num_outputs;
    1700      315180 :     move16();
    1701             : 
    1702     6073020 :     FOR( i = 0; i < lengthCy * lengthCx; i++ )
    1703             :     {
    1704     5757840 :         IF( EQ_16( Q_16fx[i], MAX_16 ) )
    1705             :         {
    1706     2575200 :             Q_fx[i] = MAX_32;
    1707     2575200 :             move32();
    1708             :         }
    1709             :         ELSE
    1710             :         {
    1711     3182640 :             Q_fx[i] = L_deposit_h( Q_16fx[i] );
    1712     3182640 :             move32();
    1713             :         }
    1714             :     }
    1715      315180 :     Q_e = 0;
    1716      315180 :     move16();
    1717      315180 :     set32_fx( svd_s_buffer_fx, 0, MAX_OUTPUT_CHANNELS );
    1718     5358060 :     FOR( i = 0; i < MAX_OUTPUT_CHANNELS; i++ )
    1719             :     {
    1720     5042880 :         set32_fx( svd_in_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
    1721     5042880 :         set32_fx( svd_u_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
    1722     5042880 :         set32_fx( svd_v_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
    1723             :     }
    1724             : 
    1725             :     /* Decomposition of Cy = Ky*Ky' */
    1726             :     /* Ky = responses*diag(ener) */
    1727      315180 :     matrix_diag_product_fx( responses_fx, responses_e, lengthCy, num_responses, 0, ener_fx, ener_e, num_responses, Ky_fx, &Ky_e );
    1728             : 
    1729             :     /* Decomposition of Cx -> Computing Kx */
    1730      315180 :     set16_fx( temp_e, Cx_diag_e, lengthCx );
    1731      315180 :     v_sqrt_fx( Cx_diag_fx, temp_e, Kx_fx, lengthCx );
    1732      315180 :     Kx_e = temp_e[0];
    1733      315180 :     move16();
    1734      630360 :     FOR( i = 1; i < lengthCx; i++ )
    1735             :     {
    1736      315180 :         Kx_e = s_max( Kx_e, temp_e[i] );
    1737             :     }
    1738      945540 :     FOR( i = 0; i < lengthCx; i++ )
    1739             :     {
    1740      630360 :         Kx_fx[i] = L_shr_r( Kx_fx[i], sub( Kx_e, temp_e[i] ) ); // Q(31-Kx_e)
    1741      630360 :         move32();
    1742             :     }
    1743             : 
    1744             :     /* Regularization of Sx */
    1745      315180 :     maximum_32_fx( Kx_fx, lengthCx, &limit_fx );
    1746      315180 :     limit_fx = Mpy_32_32( limit_fx, reg_Sx_fx ); // Cy_hat_diag_e + reg_ghat_e
    1747             : 
    1748      945540 :     FOR( i = 0; i < lengthCx; ++i )
    1749             :     {
    1750      630360 :         IF( GT_32( Kx_fx[i], limit_fx ) )
    1751             :         {
    1752      595269 :             svd_s_buffer_fx[i] = Kx_fx[i];
    1753      595269 :             move32();
    1754             :         }
    1755             :         ELSE
    1756             :         {
    1757       35091 :             svd_s_buffer_fx[i] = limit_fx;
    1758       35091 :             move32();
    1759             :         }
    1760             :     }
    1761      315180 :     svd_s_buffer_fx_e = Kx_e;
    1762      315180 :     move16();
    1763             : 
    1764      315180 :     limit_fx = 0;
    1765      315180 :     move32();
    1766             : 
    1767             :     /* regularized Kx-1 */
    1768             : 
    1769      945540 :     FOR( i = 0; i < lengthCx; ++i )
    1770             :     {
    1771      630360 :         IF( svd_s_buffer_fx[i] )
    1772             :         {
    1773             :             Word32 reg_fac;
    1774      630354 :             reg_fac = BASOP_Util_Divide3232_Scale_newton( MAX_32, svd_s_buffer_fx[i], &temp_e[i] );
    1775      630354 :             Kx_reg_inv_fx[i] = reg_fac;
    1776      630354 :             move32();
    1777      630354 :             temp_e[i] = sub( temp_e[i], svd_s_buffer_fx_e );
    1778      630354 :             move16();
    1779             :         }
    1780             :         ELSE
    1781             :         {
    1782             :             Word32 reg_fac;
    1783           6 :             reg_fac = BASOP_Util_Divide3232_Scale_newton( MAX_32, EPSILON_FX_M, &temp_e[i] );
    1784           6 :             Kx_reg_inv_fx[i] = reg_fac;
    1785           6 :             move32();
    1786           6 :             temp_e[i] = sub( temp_e[i], EPSILON_FX_E );
    1787           6 :             move16();
    1788             :         }
    1789             :     }
    1790      315180 :     Kx_reg_inv_e = temp_e[0];
    1791      315180 :     move16();
    1792      630360 :     FOR( i = 1; i < lengthCx; i++ )
    1793             :     {
    1794      315180 :         Kx_reg_inv_e = s_max( Kx_reg_inv_e, temp_e[i] );
    1795             :     }
    1796      945540 :     FOR( i = 0; i < lengthCx; i++ )
    1797             :     {
    1798      630360 :         Kx_reg_inv_fx[i] = L_shr_r( Kx_reg_inv_fx[i], sub( Kx_reg_inv_e, temp_e[i] ) ); // Q(31- Kx_reg_inv_e)
    1799      630360 :         move32();
    1800             :     }
    1801             : 
    1802             :     /************************ normalization matrix G hat **********************/
    1803             : 
    1804             :     /* Computing Q*Cx*Q' */
    1805             :     Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
    1806             :     Word16 Cy_hat_diag_e;
    1807             : 
    1808      315180 :     matrix_diag_product_fx( Q_fx, Q_e, lengthCy, lengthCx, 0, Cx_diag_fx, Cx_diag_e, lengthCx, Q_Cx_fx, &Q_Cx_e );
    1809             : 
    1810      315180 :     Word16 guard_bits = find_guarded_bits_fx( lengthCx + 1 );
    1811             : 
    1812     6073020 :     FOR( i = 0; i < lengthCy * lengthCx; ++i )
    1813             :     {
    1814     5757840 :         IF( GT_16( Q_Cx_e, Q_e ) )
    1815             :         {
    1816     5757782 :             Q_fx[i] = L_shr( Q_fx[i], guard_bits );
    1817     5757782 :             move32();
    1818             :         }
    1819             :         ELSE
    1820             :         {
    1821          58 :             Q_Cx_fx[i] = L_shr( Q_Cx_fx[i], guard_bits );
    1822          58 :             move32();
    1823             :         }
    1824             :     }
    1825             : 
    1826      315180 :     IF( GT_16( Q_Cx_e, Q_e ) )
    1827             :     {
    1828      315177 :         Q_e = add( Q_e, guard_bits );
    1829             :     }
    1830             :     ELSE
    1831             :     {
    1832           3 :         Q_Cx_e = add( Q_Cx_e, guard_bits );
    1833             :     }
    1834      315180 :     matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e );
    1835             : 
    1836             :     /* Computing Cy_hat_diag */
    1837     3194100 :     FOR( i = 0; i < lengthCy; ++i )
    1838             :     {
    1839     2878920 :         if ( GT_32( Cy_hat_diag_fx[i], limit_fx ) )
    1840             :         {
    1841      467716 :             limit_fx = Cy_hat_diag_fx[i];
    1842      467716 :             move32();
    1843             :         }
    1844             :     }
    1845             : 
    1846      315180 :     limit_fx = Mpy_32_32( limit_fx, reg_ghat_fx ); // Cy_hat_diag_e + reg_ghat_e
    1847             : 
    1848             :     /* Computing G_hat */
    1849     3194100 :     FOR( i = 0; i < lengthCy; ++i )
    1850             :     {
    1851     2878920 :         if ( GT_32( limit_fx, Cy_hat_diag_fx[i] ) ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
    1852             :         {
    1853       12557 :             Cy_hat_diag_fx[i] = limit_fx;
    1854       12557 :             move32();
    1855             :         }
    1856     2878920 :         IF( Cy_diag_fx[i] )
    1857             :         {
    1858     1875780 :             IF( Cy_hat_diag_fx[i] )
    1859             :             {
    1860     1875780 :                 G_hat_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], Cy_hat_diag_fx[i], &temp_e[i] );
    1861     1875780 :                 move32();
    1862     1875780 :                 temp_e[i] = add( temp_e[i], sub( Cy_diag_e, Cy_hat_diag_e ) );
    1863     1875780 :                 move16();
    1864     1875780 :                 G_hat_fx[i] = Sqrt32( G_hat_fx[i], &temp_e[i] );
    1865     1875780 :                 move32();
    1866             :             }
    1867             :             ELSE
    1868             :             {
    1869           0 :                 G_hat_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], EPSILON_FX_M, &temp_e[i] );
    1870           0 :                 move32();
    1871           0 :                 temp_e[i] = add( temp_e[i], sub( Cy_diag_e, EPSILON_FX_E ) );
    1872           0 :                 move16();
    1873           0 :                 G_hat_fx[i] = Sqrt32( G_hat_fx[i], &temp_e[i] );
    1874           0 :                 move32();
    1875             :             }
    1876             :         }
    1877             :         ELSE
    1878             :         {
    1879     1003140 :             G_hat_fx[i] = 0;
    1880     1003140 :             move32();
    1881     1003140 :             temp_e[i] = 0;
    1882     1003140 :             move16();
    1883             :         }
    1884             :     }
    1885      315180 :     G_hat_e = temp_e[0];
    1886      315180 :     move16();
    1887     2878920 :     FOR( i = 1; i < lengthCy; i++ )
    1888             :     {
    1889     2563740 :         G_hat_e = s_max( G_hat_e, temp_e[i] );
    1890             :     }
    1891     3194100 :     FOR( i = 0; i < lengthCy; i++ )
    1892             :     {
    1893     2878920 :         G_hat_fx[i] = L_shr_r( G_hat_fx[i], sub( G_hat_e, temp_e[i] ) ); // Q(31-G_hat_e)
    1894     2878920 :         move32();
    1895             :     }
    1896             : 
    1897             :     /************************ Formulate optimal P **********************/
    1898             : 
    1899             :     /* Computing the input matrix Kx'*Q'*G_hat'*Ky */
    1900      315180 :     diag_matrix_product_fx( Kx_fx, Kx_e, lengthCx, Q_fx, Q_e, lengthCy, lengthCx, 1, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
    1901             : 
    1902      315180 :     matrix_diag_product_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCx, lengthCy, 0, G_hat_fx, G_hat_e, lengthCy, mat_mult_buffer2_fx, &mat_mult_buffer2_e );
    1903             : 
    1904      315180 :     matrix_product_mant_exp_fx( mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCy, 0, Ky_fx, Ky_e, lengthCy, num_responses, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
    1905             : 
    1906      315180 :     IF( LT_16( lengthCx, num_responses ) )
    1907             :     {
    1908        3600 :         mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 1 );
    1909             : 
    1910        3600 :         nL = num_responses;
    1911        3600 :         move16();
    1912        3600 :         nC = lengthCx;
    1913        3600 :         move16();
    1914        3600 :         svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, svd_s_buffer_e, nL, nC );
    1915             :     }
    1916             :     ELSE
    1917             :     {
    1918      311580 :         mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 0 );
    1919             : 
    1920      311580 :         nL = lengthCx;
    1921      311580 :         move16();
    1922      311580 :         nC = num_responses;
    1923      311580 :         move16();
    1924      311580 :         svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, nL, nC );
    1925             :     }
    1926             : 
    1927             :     /* Actually Processing P */
    1928             : 
    1929             :     /* can be skipped: lambda is always column-truncated identity matrix, so this operation just truncates V to num_input_channel columns */
    1930      315180 :     svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, num_responses, lengthCx );
    1931      315180 :     mat_mult_buffer1_e = 0;
    1932      315180 :     move16();
    1933      315180 :     svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
    1934      315180 :     mat_mult_buffer2_e = 0;
    1935      315180 :     move16();
    1936             : 
    1937      315180 :     matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, num_responses, lengthCx, 0, mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    1938             : 
    1939             :     /************************ Formulate M **********************/
    1940             : 
    1941      315180 :     matrix_product_mant_exp_fx( Ky_fx, Ky_e, lengthCy, num_responses, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, num_responses, lengthCx, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
    1942             : 
    1943      315180 :     matrix_diag_product_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, Kx_reg_inv_fx, Kx_reg_inv_e, lengthCx, mixing_matrix_fx, mixing_matrix_e );
    1944             : 
    1945             :     /*********************** Energy Compensation ****************/
    1946             : 
    1947             :     /* Compute Cy_tilde = M*Cx*M' */
    1948      315180 :     matrix_diag_product_fx( mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 0, Cx_diag_fx, Cx_diag_e, lengthCx, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
    1949             : 
    1950      315180 :     matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 1, mat_mult_buffer2_fx, &mat_mult_buffer2_e );
    1951             : 
    1952      315180 :     IF( EQ_16( energy_compensation_flag, 1 ) )
    1953             :     {
    1954      315180 :         adj_fx = svd_s_buffer_fx;
    1955      315180 :         Cy_tilde_p_fx = mat_mult_buffer2_fx;
    1956     3194100 :         FOR( i = 0; i < lengthCy; ++i )
    1957             :         {
    1958             :             /* Avoid correction for very small energies, main diagonal elements of Cy_tilde_p may be negative */
    1959     2878920 :             IF( Cy_tilde_p_fx[( i + ( i * lengthCy ) )] < 0 )
    1960             :             {
    1961           0 :                 adj_fx[i] = MAX_32;
    1962           0 :                 move32();
    1963           0 :                 temp_e[i] = 0;
    1964           0 :                 move16();
    1965             :             }
    1966             :             ELSE
    1967             :             {
    1968     2878920 :                 IF( Cy_diag_fx[i] )
    1969             :                 {
    1970     1875780 :                     IF( Cy_tilde_p_fx[i + ( i * lengthCy )] )
    1971             :                     {
    1972     1875751 :                         adj_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], Cy_tilde_p_fx[i + ( i * lengthCy )], &temp_e[i] );
    1973     1875751 :                         move32();
    1974     1875751 :                         temp_e[i] = add( temp_e[i], sub( Cy_diag_e, mat_mult_buffer2_e ) );
    1975     1875751 :                         move16();
    1976     1875751 :                         adj_fx[i] = Sqrt32( adj_fx[i], &temp_e[i] );
    1977     1875751 :                         move32();
    1978             :                     }
    1979             :                     ELSE
    1980             :                     {
    1981          29 :                         adj_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], EPSILON_FX_M, &temp_e[i] );
    1982          29 :                         move32();
    1983          29 :                         temp_e[i] = add( temp_e[i], sub( Cy_diag_e, EPSILON_FX_E ) );
    1984          29 :                         move16();
    1985          29 :                         adj_fx[i] = Sqrt32( adj_fx[i], &temp_e[i] );
    1986          29 :                         move32();
    1987             :                     }
    1988             :                 }
    1989             :                 ELSE
    1990             :                 {
    1991     1003140 :                     adj_fx[i] = 0;
    1992     1003140 :                     move32();
    1993     1003140 :                     temp_e[i] = 0;
    1994     1003140 :                     move16();
    1995             :                 }
    1996             :             }
    1997             : 
    1998     2878920 :             Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, temp_e[i] ) );
    1999     2878920 :             IF( GT_32( adj_fx[i], temp ) )
    2000             :             {
    2001        1613 :                 adj_fx[i] = MAX_32;
    2002        1613 :                 move32();
    2003        1613 :                 temp_e[i] = 2;
    2004        1613 :                 move16();
    2005             :             }
    2006             :         }
    2007      315180 :         adj_e = temp_e[0];
    2008      315180 :         move16();
    2009     2878920 :         FOR( i = 1; i < lengthCy; i++ )
    2010             :         {
    2011     2563740 :             adj_e = s_max( adj_e, temp_e[i] );
    2012             :         }
    2013     3194100 :         FOR( i = 0; i < lengthCy; i++ )
    2014             :         {
    2015     2878920 :             adj_fx[i] = L_shr_r( adj_fx[i], sub( adj_e, temp_e[i] ) ); // Q(31-adj_e)
    2016     2878920 :             move32();
    2017             :         }
    2018             : 
    2019      315180 :         diag_matrix_product_fx( adj_fx, adj_e, lengthCy, mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    2020             : 
    2021      315180 :         Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCy, lengthCx ) );
    2022      315180 :         *mixing_matrix_e = mat_mult_buffer3_e;
    2023      315180 :         move16();
    2024             :     }
    2025             : 
    2026      315180 :     pop_wmops();
    2027             : 
    2028      315180 :     return out;
    2029             : }

Generated by: LCOV version 1.14