LCOV - code coverage report
Current view: top level - lib_dec - ivas_dirac_output_synthesis_cov_fx.c (source / functions) Hit Total Coverage
Test: Coverage on main enc/dec/rend @ 574a190e3c6896c6c4ed10d7f23649709a0c4347 Lines: 836 876 95.4 %
Date: 2025-06-27 02:59:36 Functions: 9 9 100.0 %

          Line data    Source code
       1             : /******************************************************************************************************
       2             : 
       3             :    (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
       4             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
       5             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
       6             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
       7             :    contributors to this repository. All Rights Reserved.
       8             : 
       9             :    This software is protected by copyright law and by international treaties.
      10             :    The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
      11             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
      12             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
      13             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
      14             :    contributors to this repository retain full ownership rights in their respective contributions in
      15             :    the software. This notice grants no license of any kind, including but not limited to patent
      16             :    license, nor is any license granted by implication, estoppel or otherwise.
      17             : 
      18             :    Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
      19             :    contributions.
      20             : 
      21             :    This software is provided "AS IS", without any express or implied warranties. The software is in the
      22             :    development stage. It is intended exclusively for experts who have experience with such software and
      23             :    solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
      24             :    and fitness for a particular purpose are hereby disclaimed and excluded.
      25             : 
      26             :    Any dispute, controversy or claim arising under or in relation to providing this software shall be
      27             :    submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
      28             :    accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
      29             :    the United Nations Convention on Contracts on the International Sales of Goods.
      30             : 
      31             : *******************************************************************************************************/
      32             : 
      33             : 
      34             : #include <stdint.h>
      35             : #include <string.h>
      36             : #include <stdio.h>
      37             : #include <stdlib.h>
      38             : #include <assert.h>
      39             : #include <math.h>
      40             : #include "options.h"
      41             : #include "cnst.h"
      42             : #include "rom_enc.h"
      43             : #include "rom_com.h"
      44             : #include "prot_fx.h"
      45             : #include "ivas_stat_dec.h"
      46             : #include "ivas_cnst.h"
      47             : #include "ivas_rom_com.h"
      48             : #include "ivas_rom_dec.h"
      49             : #include "wmc_auto.h"
      50             : #include "rom_dec.h"
      51             : #include "ivas_prot_fx.h"
      52             : 
      53             : 
      54             : /*-------------------------------------------------------------------*
      55             :  * ivas_dirac_dec_output_synthesis_cov_open()
      56             :  *
      57             :  * Sets up the state and parameters for the Covariance Synthesis
      58             :  *-------------------------------------------------------------------*/
      59             : 
      60         297 : ivas_error ivas_dirac_dec_output_synthesis_cov_open_fx(
      61             :     DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params,   /* i/o: handle for the covariance synthesis parameters                                                        */
      62             :     DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state, /* i/o: hanlde for the covariance synthesis state                                                             */
      63             :     const Word16 max_band_decorr,                                     /* i  : uppermost frequency band where decorrelation is applied                                               */
      64             :     const Word16 interp_length,                                       /* i  : length for interpolating the mixing matrices in time slots                                            */
      65             :     const Word16 num_param_bands,                                     /* i  : number of parameter bands                                                                             */
      66             :     const Word16 num_param_bands_residual,                            /* i  : number of parameter bands with a residual mixing matrix (i.e. decorrelation                           */
      67             :     const Word16 nchan_in,                                            /* i  : number of input (transport) channels                                                                  */
      68             :     const Word16 nchan_out,                                           /* i  : number of output channels                                                                             */
      69             :     const Word32 *proto_matrix                                        /* i  : the prototype (upmix) matrix (only used if mode == 1)  Q(15-proto_matrix_e)                                               */
      70             : )
      71             : {
      72             :     Word16 idx;
      73             : 
      74         297 :     h_dirac_output_synthesis_params->max_band_decorr = max_band_decorr;
      75         297 :     move16();
      76             : 
      77             :     /*-----------------------------------------------------------------*
      78             :      * memory allocation
      79             :      *-----------------------------------------------------------------*/
      80             : 
      81             :     /* buffer length and interpolator */
      82         297 :     h_dirac_output_synthesis_params->alpha_synthesis_fx = NULL;
      83         297 :     IF( ( h_dirac_output_synthesis_params->proto_matrix_fx = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
      84             :     {
      85           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
      86             :     }
      87         297 :     h_dirac_output_synthesis_params->proto_matrix_len = imult1616( nchan_out, nchan_in );
      88         297 :     move16();
      89             :     /* cov buffers */
      90        3803 :     FOR( idx = 0; idx < num_param_bands; idx++ )
      91             :     {
      92        3506 :         IF( ( h_dirac_output_synthesis_state->cx_old_fx[idx] = (Word32 *) malloc( nchan_in * nchan_in * sizeof( Word32 ) ) ) == NULL )
      93             :         {
      94           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
      95             :         }
      96        3506 :         h_dirac_output_synthesis_state->cx_old_len = imult1616( nchan_in, nchan_in );
      97        3506 :         IF( ( h_dirac_output_synthesis_state->cy_old_fx[idx] = (Word32 *) malloc( nchan_out * nchan_out * sizeof( Word32 ) ) ) == NULL )
      98             :         {
      99           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     100             :         }
     101        3506 :         IF( ( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
     102             :         {
     103           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     104             :         }
     105        3506 :         set_zero_fx( h_dirac_output_synthesis_state->cx_old_fx[idx], imult1616( nchan_in, nchan_in ) );
     106        3506 :         set_zero_fx( h_dirac_output_synthesis_state->cy_old_fx[idx], imult1616( nchan_out, nchan_out ) );
     107        3506 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx], imult1616( nchan_out, nchan_in ) );
     108             : 
     109        3506 :         IF( ( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
     110             :         {
     111           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     112             :         }
     113        3506 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_fx[idx], imult1616( nchan_out, nchan_in ) );
     114        3506 :         h_dirac_output_synthesis_state->mixing_matrix_len = i_mult( nchan_out, nchan_in );
     115        3506 :         move16();
     116             :     }
     117       14611 :     FOR( ; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
     118             :     {
     119       14314 :         h_dirac_output_synthesis_state->cx_old_fx[idx] = NULL;
     120       14314 :         h_dirac_output_synthesis_state->cy_old_fx[idx] = NULL;
     121       14314 :         h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = NULL;
     122       14314 :         h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = NULL;
     123             :     }
     124             : 
     125        3053 :     FOR( idx = 0; idx < num_param_bands_residual; idx++ )
     126             :     {
     127        2756 :         IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = (Word32 *) malloc( imult1616( nchan_out, nchan_out ) * sizeof( Word32 ) ) ) == NULL )
     128             :         {
     129           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     130             :         }
     131        2756 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx], imult1616( nchan_out, nchan_out ) );
     132             : 
     133        2756 :         IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = (Word32 *) malloc( imult1616( nchan_out, nchan_out ) * sizeof( Word32 ) ) ) == NULL )
     134             :         {
     135           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     136             :         }
     137        2756 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx], imult1616( nchan_out, nchan_out ) );
     138        2756 :         h_dirac_output_synthesis_state->mixing_matrix_res_len = i_mult( nchan_out, nchan_out );
     139        2756 :         move16();
     140             :     }
     141       15361 :     FOR( ; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
     142             :     {
     143       15064 :         h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = NULL;
     144       15064 :         h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = NULL;
     145             :     }
     146             : 
     147         297 :     IF( ( h_dirac_output_synthesis_state->cx_old_e = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     148             :     {
     149           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     150             :     }
     151         297 :     IF( ( h_dirac_output_synthesis_state->cy_old_e = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     152             :     {
     153           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     154             :     }
     155             : 
     156         297 :     set16_fx( h_dirac_output_synthesis_state->cx_old_e, 0, CLDFB_NO_CHANNELS_MAX );
     157         297 :     set16_fx( h_dirac_output_synthesis_state->cy_old_e, 0, CLDFB_NO_CHANNELS_MAX );
     158             : 
     159         297 :     IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     160             :     {
     161           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     162             :     }
     163         297 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_exp, 0, CLDFB_NO_CHANNELS_MAX );
     164             : 
     165         297 :     IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     166             :     {
     167           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     168             :     }
     169         297 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
     170             : 
     171         297 :     IF( ( h_dirac_output_synthesis_state->mixing_matrix_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     172             :     {
     173           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     174             :     }
     175         297 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_exp, 0, CLDFB_NO_CHANNELS_MAX );
     176             : 
     177         297 :     IF( ( h_dirac_output_synthesis_state->mixing_matrix_old_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     178             :     {
     179           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     180             :     }
     181         297 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
     182             : 
     183             :     /*-----------------------------------------------------------------*
     184             :      * prepare processing parameters
     185             :      *-----------------------------------------------------------------*/
     186             : 
     187             :     /* compute interpolator */
     188         297 :     IF( ( h_dirac_output_synthesis_params->interpolator_fx = (Word16 *) malloc( interp_length * sizeof( Word16 ) ) ) == NULL )
     189             :     {
     190           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     191             :     }
     192        9801 :     FOR( idx = 1; idx <= interp_length; ++idx )
     193             :     {
     194        9504 :         h_dirac_output_synthesis_params->interpolator_fx[idx - 1] = div_s( idx, interp_length );
     195        9504 :         move16();
     196             :     }
     197         297 :     Copy32( proto_matrix, h_dirac_output_synthesis_params->proto_matrix_fx, imult1616( nchan_in, nchan_out ) );
     198         297 :     h_dirac_output_synthesis_params->proto_matrix_e = 5;
     199         297 :     move16();
     200         297 :     return IVAS_ERR_OK;
     201             : }
     202             : 
     203             : 
     204             : /*-------------------------------------------------------------------*
     205             :  * ivas_dirac_dec_output_synthesis_get_interpolator_fx()
     206             :  *
     207             :  *
     208             :  *-------------------------------------------------------------------*/
     209             : 
     210      318405 : void ivas_dirac_dec_output_synthesis_get_interpolator_fx(
     211             :     DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params, /* i/o: handle for the covariance synthesis parameters  */
     212             :     const UWord16 interp_length                                     /* i  : interpolator length                             */
     213             : )
     214             : {
     215             :     Word16 idx;
     216      318405 :     Word16 tmp, exp_diff = 0;
     217      318405 :     move16();
     218             : 
     219     1583754 :     FOR( idx = 1; idx <= interp_length; ++idx )
     220             :     {
     221     1265349 :         tmp = BASOP_Util_Divide3232_Scale( L_deposit_l( idx ), L_deposit_l( interp_length ), &exp_diff ); // (Q15 - exp_diff)
     222     1265349 :         h_dirac_output_synthesis_params->interpolator_fx[idx - 1] = shl_sat( tmp, exp_diff );             // Q15
     223     1265349 :         move16();
     224             :     }
     225             : 
     226      318405 :     return;
     227             : }
     228             : 
     229             : 
     230             : /*-------------------------------------------------------------------*
     231             :  * ivas_dirac_dec_output_synthesis_cov_init()
     232             :  *
     233             :  * initialize the states for the covariance synthesis
     234             :  *-------------------------------------------------------------------*/
     235         297 : void ivas_dirac_dec_output_synthesis_cov_init_fx(
     236             :     DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state, /* i/o: pointer to the state of the covariance synthesis                            */
     237             :     const Word16 nchan_in,                                            /* i  : number of input (tranport) channels                                         */
     238             :     const Word16 nchan_out,                                           /* i  : number of output channels                                                   */
     239             :     const Word16 n_param_bands,                                       /* i  : number of total parameter bands                                             */
     240             :     const Word16 n_param_bands_res                                    /* i  : number of parameter bands with a residual mixing matrix (i.e. decorrelation */
     241             : )
     242             : {
     243             : 
     244             :     Word16 idx;
     245             : 
     246             :     /* initialize buffers */
     247        3803 :     FOR( idx = 0; idx < n_param_bands; idx++ )
     248             :     {
     249        3506 :         set_zero_fx( h_dirac_output_synthesis_state->cx_old_fx[idx], imult1616( nchan_in, nchan_in ) );
     250        3506 :         set_zero_fx( h_dirac_output_synthesis_state->cy_old_fx[idx], imult1616( nchan_out, nchan_out ) );
     251        3506 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx], imult1616( nchan_out, nchan_in ) );
     252        3506 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_fx[idx], imult1616( nchan_out, nchan_in ) );
     253             :     }
     254             : 
     255        3053 :     FOR( idx = 0; idx < n_param_bands_res; idx++ )
     256             :     {
     257        2756 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx], imult1616( nchan_out, nchan_out ) );
     258        2756 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx], imult1616( nchan_out, nchan_out ) );
     259             :     }
     260             : 
     261             : 
     262         297 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
     263         297 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_exp, 0, CLDFB_NO_CHANNELS_MAX );
     264         297 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
     265         297 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_exp, 0, CLDFB_NO_CHANNELS_MAX );
     266             : 
     267         297 :     return;
     268             : }
     269             : 
     270             : 
     271             : /*-------------------------------------------------------------------*
     272             :  * ivas_dirac_dec_output_synthesis_cov_close()
     273             :  *
     274             :  * deallocate dynamic memory in the covariance synthesis state
     275             :  *-------------------------------------------------------------------*/
     276             : 
     277         297 : void ivas_dirac_dec_output_synthesis_cov_close_fx(
     278             :     DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params,  /* i  : handle for the covariance synthesis parameters  */
     279             :     DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state /* i/o: handle for the covariance synthesis state       */
     280             : )
     281             : {
     282             :     Word16 idx;
     283             : 
     284             :     /*-----------------------------------------------------------------*
     285             :      * memory deallocation
     286             :      *-----------------------------------------------------------------*/
     287             : 
     288             :     /* free interpolator */
     289         297 :     IF( h_dirac_output_synthesis_params->interpolator_fx != NULL )
     290             :     {
     291         297 :         free( h_dirac_output_synthesis_params->interpolator_fx );
     292         297 :         h_dirac_output_synthesis_params->interpolator_fx = NULL;
     293             :     }
     294             : 
     295             :     /* free alpha */
     296         297 :     IF( h_dirac_output_synthesis_params->alpha_synthesis_fx != NULL )
     297             :     {
     298           0 :         free( h_dirac_output_synthesis_params->alpha_synthesis_fx );
     299           0 :         h_dirac_output_synthesis_params->alpha_synthesis_fx = NULL;
     300             :     }
     301             : 
     302             :     /* free proto_matrix */
     303         297 :     IF( h_dirac_output_synthesis_params->proto_matrix_fx != NULL )
     304             :     {
     305         297 :         free( h_dirac_output_synthesis_params->proto_matrix_fx );
     306         297 :         h_dirac_output_synthesis_params->proto_matrix_fx = NULL;
     307             :     }
     308             : 
     309             : 
     310         297 :     IF( h_dirac_output_synthesis_state->cx_old_e != NULL )
     311             :     {
     312         297 :         free( h_dirac_output_synthesis_state->cx_old_e );
     313         297 :         h_dirac_output_synthesis_state->cx_old_e = NULL;
     314             :     }
     315         297 :     IF( h_dirac_output_synthesis_state->cy_old_e != NULL )
     316             :     {
     317         297 :         free( h_dirac_output_synthesis_state->cy_old_e );
     318         297 :         h_dirac_output_synthesis_state->cy_old_e = NULL;
     319             :     }
     320             : 
     321             :     /* free cov buffers */
     322       18117 :     FOR( idx = 0; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
     323             :     {
     324       17820 :         IF( h_dirac_output_synthesis_state->cx_old_fx[idx] != NULL )
     325             :         {
     326        3506 :             free( h_dirac_output_synthesis_state->cx_old_fx[idx] );
     327        3506 :             h_dirac_output_synthesis_state->cx_old_fx[idx] = NULL;
     328             :         }
     329             : 
     330       17820 :         IF( h_dirac_output_synthesis_state->cy_old_fx[idx] != NULL )
     331             :         {
     332        3506 :             free( h_dirac_output_synthesis_state->cy_old_fx[idx] );
     333        3506 :             h_dirac_output_synthesis_state->cy_old_fx[idx] = NULL;
     334             :         }
     335             : 
     336       17820 :         IF( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] != NULL )
     337             :         {
     338        3506 :             free( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] );
     339        3506 :             h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = NULL;
     340             :         }
     341             : 
     342       17820 :         IF( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] != NULL )
     343             :         {
     344        2756 :             free( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] );
     345        2756 :             h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = NULL;
     346             :         }
     347             : 
     348       17820 :         IF( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] != NULL )
     349             :         {
     350        3506 :             free( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] );
     351        3506 :             h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = NULL;
     352             :         }
     353             : 
     354       17820 :         IF( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] != NULL )
     355             :         {
     356        2756 :             free( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] );
     357        2756 :             h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = NULL;
     358             :         }
     359             :     }
     360             : 
     361         297 :     IF( h_dirac_output_synthesis_state->mixing_matrix_old_exp != NULL )
     362             :     {
     363         297 :         free( h_dirac_output_synthesis_state->mixing_matrix_old_exp );
     364         297 :         h_dirac_output_synthesis_state->mixing_matrix_old_exp = NULL;
     365             :     }
     366             : 
     367         297 :     IF( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp != NULL )
     368             :     {
     369         297 :         free( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp );
     370         297 :         h_dirac_output_synthesis_state->mixing_matrix_res_old_exp = NULL;
     371             :     }
     372             : 
     373         297 :     IF( h_dirac_output_synthesis_state->mixing_matrix_exp != NULL )
     374             :     {
     375         297 :         free( h_dirac_output_synthesis_state->mixing_matrix_exp );
     376         297 :         h_dirac_output_synthesis_state->mixing_matrix_exp = NULL;
     377             :     }
     378             : 
     379         297 :     IF( h_dirac_output_synthesis_state->mixing_matrix_res_exp != NULL )
     380             :     {
     381         297 :         free( h_dirac_output_synthesis_state->mixing_matrix_res_exp );
     382         297 :         h_dirac_output_synthesis_state->mixing_matrix_res_exp = NULL;
     383             :     }
     384         297 :     return;
     385             : }
     386             : 
     387             : /*-------------------------------------------------------------------*
     388             :  * ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot()
     389             :  *
     390             :  * collect the multi channel input covariance for one filter bank time slot
     391             :  *-------------------------------------------------------------------*/
     392             : 
     393             : /*-------------------------------------------------------------------*
     394             :  * ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot()
     395             :  *
     396             :  * collect the multi channel input covariance for one filter bank time slot
     397             :  *-------------------------------------------------------------------*/
     398     1776450 : void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx(
     399             :     Word32 *RealBuffer_fx,                                                          /* i  : input channel filter bank samples (real part)          Q(31- RealBuffer_e)*/
     400             :     Word16 RealBuffer_e,                                                            /* i  : exponent input channel filter bank samples (real part)*/
     401             :     Word32 *ImagBuffer_fx,                                                          /* i  : input channel filter bank samples (imaginary part      Q(ImagBuffer_e)*/
     402             :     Word16 ImagBuffer_e,                                                            /* i  : exponent input channel filter bank samples (real part)*/
     403             :     Word32 cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS],      /* o  : accumulated input covariance (real part)              Q(31- cx_e)*/
     404             :     Word16 *cx_e,                                                                   /* i  : exponent for accumulated input covariance (real part) */
     405             :     Word32 cx_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS], /* o  : accumulated input covariance (imaginary part)         Q(31- cx_imag_e)*/
     406             :     Word16 *cx_imag_e,                                                              /* i : exponent accumulated input covariance (imag part)      */
     407             :     PARAM_MC_DEC_HANDLE hParamMC,                                                   /* i  : handle to Parametric MC state                         */
     408             :     const Word16 param_band,                                                        /* i  : parameter band                                        */
     409             :     const Word16 nchan_in                                                           /* i  : number of input channels                              */
     410             : )
     411             : {
     412             :     Word16 band_idx, ch_idx;
     413             :     Word16 brange[2];
     414             :     Word32 real_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS];
     415             :     Word16 real_in_e;
     416             :     Word32 imag_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS];
     417             :     Word16 imag_in_e;
     418             :     Word32 real_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
     419             :     Word32 imag_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
     420             :     Word16 output_e;
     421             :     Word16 tmp1_e, tmp2_e, shift_imag, shift_real;
     422             :     Word16 band, num_bands;
     423             :     Word16 cx_fx_norm, cx_imag_fx_norm;
     424             :     /* estimate input covariance */
     425             :     /* Already stack here instead of in the process_subframe */
     426             : 
     427             :     /* collect input frame */
     428     1776450 :     brange[0] = hParamMC->band_grouping[param_band];
     429     1776450 :     move16();
     430     1776450 :     brange[1] = hParamMC->band_grouping[param_band + 1];
     431     1776450 :     move16();
     432     1776450 :     num_bands = sub( brange[1], brange[0] );
     433             : 
     434     9379450 :     FOR( band_idx = 0; band_idx < num_bands; band_idx++ )
     435             :     {
     436     7603000 :         band = add( brange[0], band_idx );
     437    22843320 :         FOR( ch_idx = 0; ch_idx < nchan_in; ch_idx++ )
     438             :         {
     439    15240320 :             real_in_buffer_fx[band_idx + num_bands * ch_idx] = RealBuffer_fx[band + hParamMC->num_freq_bands * ch_idx];
     440    15240320 :             move32();
     441    15240320 :             imag_in_buffer_fx[band_idx + num_bands * ch_idx] = ImagBuffer_fx[band + hParamMC->num_freq_bands * ch_idx];
     442    15240320 :             move32();
     443             :         }
     444             :     }
     445             : 
     446     1776450 :     real_in_e = RealBuffer_e;
     447     1776450 :     move16();
     448     1776450 :     imag_in_e = ImagBuffer_e;
     449     1776450 :     move16();
     450             : 
     451     1776450 :     Word16 buf_len = imult1616( num_bands, nchan_in );
     452             : 
     453     1776450 :     shift_real = sub( L_norm_arr( real_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
     454     1776450 :     shift_imag = sub( L_norm_arr( imag_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
     455             : 
     456     1776450 :     real_in_e = sub( real_in_e, shift_real );
     457     1776450 :     imag_in_e = sub( imag_in_e, shift_imag );
     458             : 
     459             : 
     460     1776450 :     output_e = s_max( real_in_e, imag_in_e );
     461             : 
     462     1776450 :     scale_sig32( real_in_buffer_fx, buf_len, sub( RealBuffer_e, output_e ) );
     463     1776450 :     scale_sig32( imag_in_buffer_fx, buf_len, sub( ImagBuffer_e, output_e ) );
     464             : 
     465     1776450 :     cmplx_matrix_square_fx( real_in_buffer_fx, imag_in_buffer_fx, num_bands, nchan_in, real_buffer_fx, imag_buffer_fx, output_e, &output_e );
     466     1776450 :     v_add_fixed_me( cx_fx, *cx_e, real_buffer_fx, output_e, cx_fx, &tmp1_e, imult1616( nchan_in, nchan_in ), 1 );
     467             : 
     468     1776450 :     v_add_fixed_me( cx_imag_fx, *cx_imag_e, imag_buffer_fx, output_e, cx_imag_fx, &tmp2_e, imult1616( nchan_in, nchan_in ), 1 );
     469             : 
     470     1776450 :     cx_fx_norm = L_norm_arr( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
     471     1776450 :     cx_imag_fx_norm = L_norm_arr( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
     472             : 
     473     1776450 :     scale_sig32( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_fx_norm );
     474     1776450 :     scale_sig32( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_imag_fx_norm );
     475             : 
     476     1776450 :     *cx_e = sub( tmp1_e, cx_fx_norm );
     477     1776450 :     move16();
     478     1776450 :     *cx_imag_e = sub( tmp2_e, cx_imag_fx_norm );
     479     1776450 :     move16();
     480             : 
     481     1776450 :     return;
     482             : }
     483             : 
     484             : /*-------------------------------------------------------------------*
     485             :  * ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot()
     486             :  *
     487             :  * synthesize one filter bank slot of multi channel output filter bank
     488             :  * samples with the covariance synthesis
     489             :  *-------------------------------------------------------------------*/
     490             : 
     491      140160 : void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(
     492             :     Word32 *Cldfb_RealBuffer_in_fx,                                                   /*Q6*/
     493             :     Word32 *Cldfb_ImagBuffer_in_fx,                                                   /*Q6*/
     494             :     Word32 Cldfb_RealBuffer_fx[][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX], /* o  : output channel filter bank samples (real part)                   Q6*/
     495             :     Word32 Cldfb_ImagBuffer_fx[][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX], /* o  : output channel filter bank samples (imaginary part)              Q6*/
     496             :     Word32 *mixing_matrix_fx[],                                                       /* i  : parameter band wise mixing matrices (direct part)                Q(31-mixing_matrix_e)*/
     497             :     Word16 *mixing_matrix_e,                                                          /* i  : parameter band wise mixing matrices (direct part)                */
     498             :     Word32 *mixing_matrix_res_fx[],                                                   /* i  : parameter band wise mixing matrices (residual part)              Q(31-mixing_matrix_res_e)*/
     499             :     Word16 *mixing_matrix_res_e,                                                      /* i  : parameter band wise mixing matrices (residual part)              */
     500             :     const UWord16 slot_idx_sfr,                                                       /* i  : time slot index for the current slot within the current subframe */
     501             :     const UWord16 slot_idx_tot,                                                       /* i  : time slot index for the current slot within the frame            */
     502             :     const Word16 nX,                                                                  /* i  : number of input channels                                         */
     503             :     const Word16 nY,                                                                  /* i  : number of output channels                                        */
     504             :     PARAM_MC_DEC_HANDLE hParamMC                                                      /* i  : handle to the Parametric MC decoder state                        */
     505             : )
     506             : {
     507             :     Word16 param_band_idx, band, ch_idx;
     508             :     Word16 have_residual;
     509             :     Word16 brange[2];
     510             :     DIRAC_OUTPUT_SYNTHESIS_COV_STATE h_synthesis_state;
     511             :     Word32 mixing_matrix_smooth_fx[MAX_CICP_CHANNELS * PARAM_MC_MAX_TRANSPORT_CHANS];
     512             :     Word16 mixing_matrix_smooth_e;
     513             :     Word32 mixing_matrix_res_smooth_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
     514      140160 :     Word16 mixing_matrix_res_smooth_e = 0;
     515      140160 :     move16();
     516             :     Word32 mixing_matrix_buffer_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
     517             :     Word16 mixing_matrix_buffer_e;
     518             :     Word32 input_f_real_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
     519             :     Word32 input_f_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
     520             : 
     521             :     Word32 diff_f_real_fx[MAX_CICP_CHANNELS];
     522             :     Word32 diff_f_imag_fx[MAX_CICP_CHANNELS];
     523             : 
     524      140160 :     h_synthesis_state = hParamMC->h_output_synthesis_cov_state;
     525             : 
     526      140160 :     set_zero_fx( input_f_real_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
     527      140160 :     set_zero_fx( input_f_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
     528             : 
     529      140160 :     set_zero_fx( diff_f_real_fx, MAX_CICP_CHANNELS );
     530      140160 :     set_zero_fx( diff_f_imag_fx, MAX_CICP_CHANNELS );
     531             : 
     532     1950080 :     FOR( param_band_idx = 0; param_band_idx < hParamMC->num_param_bands_synth; param_band_idx++ )
     533             :     {
     534             :         /* final mixing */
     535     1809920 :         have_residual = 0;
     536     1809920 :         move16();
     537     1809920 :         brange[0] = hParamMC->band_grouping[param_band_idx];
     538     1809920 :         move16();
     539     1809920 :         brange[1] = hParamMC->band_grouping[( param_band_idx + 1 )];
     540     1809920 :         move16();
     541             : 
     542     1809920 :         if ( LT_16( brange[0], hParamMC->h_output_synthesis_params.max_band_decorr ) )
     543             :         {
     544     1454400 :             have_residual = 1;
     545     1454400 :             move16();
     546             :         }
     547             : 
     548     1809920 :         v_multc_fixed( mixing_matrix_fx[param_band_idx], L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ), mixing_matrix_smooth_fx, imult1616( nY, nX ) );
     549     1809920 :         mixing_matrix_smooth_e = mixing_matrix_e[param_band_idx]; // interpolator is W16
     550     1809920 :         move16();
     551             : 
     552     1809920 :         v_multc_fixed( h_synthesis_state.mixing_matrix_old_fx[param_band_idx], L_sub( ONE_IN_Q31, L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ) ), mixing_matrix_buffer_fx, imult1616( nY, nX ) );
     553     1809920 :         mixing_matrix_buffer_e = h_synthesis_state.mixing_matrix_old_exp[param_band_idx]; // interpolator is W16
     554     1809920 :         move16();
     555             : 
     556     1809920 :         v_add_fixed_me( mixing_matrix_smooth_fx, mixing_matrix_smooth_e, mixing_matrix_buffer_fx, mixing_matrix_buffer_e, mixing_matrix_smooth_fx, &mixing_matrix_smooth_e, imult1616( nY, nX ), 0 );
     557             : 
     558     1809920 :         IF( have_residual )
     559             :         {
     560             :             /* residual mixing matrix interpolation*/
     561             : 
     562     1454400 :             v_multc_fixed( mixing_matrix_res_fx[param_band_idx], L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ), mixing_matrix_res_smooth_fx, imult1616( nY, nY ) );
     563     1454400 :             mixing_matrix_res_smooth_e = mixing_matrix_res_e[param_band_idx]; // interpolator is W16
     564     1454400 :             move16();
     565             : 
     566     1454400 :             set_zero_fx( mixing_matrix_buffer_fx, imult1616( nY, nY ) );
     567     1454400 :             v_multc_fixed( h_synthesis_state.mixing_matrix_res_old_fx[param_band_idx], L_sub( ONE_IN_Q31, L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ) ), mixing_matrix_buffer_fx, imult1616( nY, nY ) );
     568     1454400 :             mixing_matrix_buffer_e = h_synthesis_state.mixing_matrix_res_old_exp[param_band_idx]; // interpolator is W16
     569     1454400 :             move16();
     570             : 
     571     1454400 :             v_add_fixed_me( mixing_matrix_res_smooth_fx, mixing_matrix_res_smooth_e, mixing_matrix_buffer_fx, mixing_matrix_buffer_e, mixing_matrix_res_smooth_fx, &mixing_matrix_res_smooth_e, imult1616( nY, nY ), 0 );
     572             :         }
     573             : 
     574             : 
     575     9553920 :         FOR( band = brange[0]; band < brange[1]; band++ )
     576             :         {
     577     7744000 :             assert( band >= 0 );
     578             : 
     579     7744000 :             IF( have_residual )
     580             :             {
     581             :                 /* collect diffuse prototypes */
     582     2803200 :                 assert( LT_16( band, hParamMC->h_output_synthesis_params.max_band_decorr ) );
     583    19833600 :                 FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
     584             :                 {
     585    17030400 :                     diff_f_real_fx[ch_idx] = Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band];
     586    17030400 :                     move32();
     587    17030400 :                     diff_f_imag_fx[ch_idx] = Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band];
     588    17030400 :                     move32();
     589             :                 }
     590             : 
     591             :                 /* apply residual mixing */
     592             :                 {
     593             :                     Word16 shifter;
     594             : #ifdef OPT_SBA_DEC_V2_BE
     595     2803200 :                     shifter = sub( mixing_matrix_res_smooth_e, 31 );
     596             : #else  /* OPT_SBA_DEC_V2_BE */
     597             :                     shifter = 31 - mixing_matrix_res_smooth_e;
     598             : #endif /* OPT_SBA_DEC_V2_BE */
     599    19833600 :                     FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
     600             :                     {
     601             :                         int i;
     602             :                         Word16 idx;
     603             :                         Word64 temp_real, temp_imag;
     604             : 
     605             : 
     606    17030400 :                         idx = ch_idx;
     607    17030400 :                         temp_real = 0;
     608    17030400 :                         temp_imag = 0;
     609    17030400 :                         move64();
     610    17030400 :                         move64();
     611   121209600 :                         for ( i = 0; i < nY; i++ )
     612             :                         {
     613   104179200 :                             temp_real = W_add( temp_real, W_mult0_32_32( mixing_matrix_res_smooth_fx[idx], diff_f_real_fx[i] ) );
     614   104179200 :                             temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_res_smooth_fx[idx], diff_f_imag_fx[i] ) );
     615   104179200 :                             idx += nY;
     616             :                         }
     617             : #ifdef OPT_SBA_DEC_V2_BE
     618    17030400 :                         Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = W_shl_sat_l( temp_real, shifter );
     619    17030400 :                         Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = W_shl_sat_l( temp_imag, shifter );
     620             : #else  /* OPT_SBA_DEC_V2_BE */
     621             :                         Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = W_extract_l( W_shr( temp_real, shifter ) );
     622             :                         Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = W_extract_l( W_shr( temp_imag, shifter ) );
     623             : #endif /* OPT_SBA_DEC_V2_BE */
     624             :                     }
     625             :                 }
     626             :             }
     627             :             ELSE
     628             :             {
     629    34969600 :                 FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
     630             :                 {
     631    30028800 :                     Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = 0;
     632    30028800 :                     move32();
     633    30028800 :                     Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = 0;
     634    30028800 :                     move32();
     635             :                 }
     636             :             }
     637             : 
     638             :             /* collect input signals, still in cldfb buffers */
     639    23270400 :             FOR( ch_idx = 0; ch_idx < nX; ch_idx++ )
     640             :             {
     641             : 
     642    15526400 :                 input_f_real_fx[ch_idx] = Cldfb_RealBuffer_in_fx[ch_idx * hParamMC->num_freq_bands + band]; // Q6
     643    15526400 :                 move32();
     644    15526400 :                 input_f_imag_fx[ch_idx] = Cldfb_ImagBuffer_in_fx[ch_idx * hParamMC->num_freq_bands + band]; // Q6
     645    15526400 :                 move32();
     646             :             }
     647             : 
     648             :             /* apply mixing matrix */
     649             :             {
     650             :                 Word16 shifter;
     651     7744000 :                 shifter = 31 - mixing_matrix_smooth_e;
     652             : 
     653    54803200 :                 FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
     654             :                 {
     655             :                     int i;
     656             :                     Word16 idx;
     657             :                     Word64 temp_real, temp_imag;
     658             : 
     659             : 
     660    47059200 :                     idx = ch_idx;
     661    47059200 :                     temp_real = 0;
     662    47059200 :                     temp_imag = 0;
     663    47059200 :                     move64();
     664    47059200 :                     move64();
     665   141638400 :                     for ( i = 0; i < nX; i++ )
     666             :                     {
     667    94579200 :                         temp_real = W_add( temp_real, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_real_fx[i] ) );
     668    94579200 :                         temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_imag_fx[i] ) );
     669    94579200 :                         idx += nY;
     670             :                     }
     671    47059200 :                     Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_real, shifter ) ) );
     672    47059200 :                     move32();
     673    47059200 :                     Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_imag, shifter ) ) );
     674    47059200 :                     move32();
     675             :                 }
     676             :             }
     677             :         }
     678             :     }
     679             : 
     680      140160 :     return;
     681             : }
     682             : 
     683             : /*-------------------------------------------------------------------*
     684             :  * computeMixingMatrices()
     685             :  *
     686             :  * compute a mixing matrix using the convariance synthesis approach
     687             :  *-------------------------------------------------------------------*/
     688             : 
     689      113120 : Word16 computeMixingMatrices_fx(
     690             :     const Word16 num_inputs,  /* i  : number of input channels                                                                                      */
     691             :     const Word16 num_outputs, /* i  : number of output channels                                                                                     */
     692             :     const Word32 *Cx,         /* i  : input channel covariance matrix Q(31-Cx_e)                                                                               */
     693             :     Word16 Cx_e,
     694             :     const Word32 *Cy, /* i  : target covariance matrix Q(31-Cy_e)                                                                               */
     695             :     Word16 Cy_e,
     696             :     const Word32 *Q, /* i  : prototype matrix (usually a upmix matrix) Q_fx_e                                                                     */
     697             :     Word16 Q_fx_e,
     698             :     const Word16 energy_compensation_flag, /* i  : flag indicating that the energy compensation should be performed (i.e. no residual mixing matrix will follow) */
     699             :     const Word32 reg_Sx_fx,                /* i  : regularization factor for the input channel singular values                                                   */
     700             :     Word16 reg_Sx_e,
     701             :     const Word32 reg_ghat_fx, /* i  : regularization factor for the normalization matrix  Q(31-reg_ghat_e)                                                         */
     702             :     Word16 reg_ghat_e,
     703             :     Word32 *mixing_matrix_fx, /* o  : resulting mixing matrix       Q(31-mixing_matrix_out_e)                                                                                 */
     704             :     Word16 *mixing_matrix_out_e,
     705             :     Word32 *Cr_fx, /* o  : residual covariance matrix Q(31-Cr_e)                                                                                    */
     706             :     Word16 *Cr_e )
     707             : {
     708             :     Word16 i, j;
     709             :     Word16 out;
     710             :     Word16 nL, nC;
     711      113120 :     Word16 lengthCx = num_inputs;
     712      113120 :     Word16 lengthCy = num_outputs;
     713             :     Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
     714             :     Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     715             :     Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     716             :     Word32 Cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
     717             :     Word32 Cy_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
     718             :     Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
     719             :     Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
     720             :     Word16 Cx_fx_e;
     721             :     Word16 Cy_fx_e;
     722             : 
     723             :     Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS];
     724             :     Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
     725             : 
     726             : 
     727             :     Word32 limit_fx;
     728             :     Word16 limit_e;
     729             : 
     730             :     Word32 L_tmp;
     731             :     Word16 tmp_e, tmp, exp;
     732             : 
     733             :     Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     734             :     Word32 Kx_fx[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
     735             : 
     736             :     Word16 Kx_fx_e[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
     737             :     Word16 Ky_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     738             : 
     739             :     Word32 Kx_reg_inv_fx[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
     740             :     Word16 Kx_reg_inv_e[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
     741             : 
     742             :     Word32 Q_fx[PARAM_MC_MAX_TRANSPORT_CHANS * MAX_CICP_CHANNELS];
     743             :     Word16 Q_e;
     744             : 
     745             :     Word32 Q_Cx_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     746             :     Word16 Q_Cx_e;
     747             : 
     748             :     Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
     749             :     Word16 Cy_hat_diag_e;
     750             :     Word16 Cy_hat_diag_buff_e[MAX_OUTPUT_CHANNELS];
     751             :     Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
     752             :     Word16 G_hat_buff_e[MAX_OUTPUT_CHANNELS];
     753             : 
     754             :     Word16 mat_mult_buffer2_e, mat_mult_buffer3_e;
     755             : 
     756             :     Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     757             : 
     758             :     Word16 mixing_matrix_e;
     759             : 
     760             :     Word16 Cr_fx_e;
     761             : 
     762             : 
     763             :     Word32 adj_fx[MAX_OUTPUT_CHANNELS];
     764             :     Word16 adj_e[MAX_OUTPUT_CHANNELS];
     765             :     Word32 *adj_fx_p;
     766             :     Word16 adj_fx_e;
     767             : 
     768             :     Word32 *Cr_p_fx, *Cy_tilde_p_fx, *Cy_p_fx;
     769      113120 :     push_wmops( "dirac_cov_mix_mat" );
     770             : 
     771      113120 :     out = EXIT_SUCCESS;
     772      113120 :     move16();
     773             : 
     774      113120 :     set32_fx( svd_s_buffer_fx, 0, MAX_OUTPUT_CHANNELS );
     775     1923040 :     FOR( i = 0; i < MAX_OUTPUT_CHANNELS; i++ )
     776             :     {
     777     1809920 :         set32_fx( svd_in_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
     778     1809920 :         set32_fx( svd_u_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
     779     1809920 :         set32_fx( svd_v_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
     780             :     }
     781             : 
     782             : 
     783      113120 :     Copy32( Q, Q_fx, imult1616( lengthCy, lengthCx ) );
     784      113120 :     Copy32( Cx, Cx_fx, imult1616( lengthCx, lengthCx ) );
     785      113120 :     Copy32( Cy, Cy_fx, imult1616( lengthCy, lengthCy ) );
     786             : 
     787      113120 :     Q_e = Q_fx_e;
     788      113120 :     move16();
     789      113120 :     Cx_fx_e = Cx_e;
     790      113120 :     move16();
     791      113120 :     Cy_fx_e = Cy_e;
     792      113120 :     move16();
     793             : 
     794             : 
     795             :     /*-----------------------------------------------------------------*
     796             :      * Decomposition of Cy
     797             :      *-----------------------------------------------------------------*/
     798             : 
     799             :     /* Processing the SVD */
     800             : 
     801      113120 :     mat2svdMat_fx( Cy_fx, svd_in_buffer_fx, lengthCy, lengthCy, 0 );
     802             : 
     803      113120 :     svd_fx( svd_in_buffer_fx, Cy_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCy, lengthCy );
     804      113120 :     Word16 max_e = -32;
     805             :     /* Computing Ky */
     806      688444 :     FOR( i = 0; i < lengthCy; ++i )
     807             :     {
     808     3533968 :         FOR( j = 0; j < lengthCy; ++j )
     809             :         {
     810     2958644 :             tmp_e = svd_s_buffer_e[j];
     811     2958644 :             move16();
     812     2958644 :             L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
     813     2958644 :             Ky_fx[i + ( j * lengthCy )] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
     814     2958644 :             move32();
     815     2958644 :             Ky_fx_e[i + ( j * lengthCy )] = tmp_e;
     816     2958644 :             move16();
     817     2958644 :             max_e = s_max( max_e, tmp_e );
     818             :         }
     819             :     }
     820     3071764 :     FOR( i = 0; i < lengthCy * lengthCy; ++i )
     821             :     {
     822     2958644 :         Ky_fx[i] = L_shr( Ky_fx[i], sub( max_e, Ky_fx_e[i] ) );
     823     2958644 :         move32();
     824     2958644 :         Ky_fx_e[i] = max_e;
     825     2958644 :         move16();
     826             :     }
     827             : 
     828             :     /*-----------------------------------------------------------------*
     829             :      * Decomposition of Cx
     830             :      *-----------------------------------------------------------------*/
     831             : 
     832             :     /* Processing the SVD */
     833             : 
     834      113120 :     mat2svdMat_fx( Cx_fx, svd_in_buffer_fx, lengthCx, lengthCx, 0 );
     835             : 
     836      113120 :     svd_fx( svd_in_buffer_fx, Cx_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCx, lengthCx );
     837      113120 :     max_e = -32;
     838      339920 :     FOR( i = 0; i < lengthCx; ++i )
     839             :     {
     840      682080 :         FOR( j = 0; j < lengthCx; ++j )
     841             :         {
     842      455280 :             tmp_e = svd_s_buffer_e[j];
     843      455280 :             move16();
     844      455280 :             L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
     845      455280 :             Kx_fx[( i + ( j * lengthCx ) )] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
     846      455280 :             move32();
     847      455280 :             Kx_fx_e[( i + ( j * lengthCx ) )] = tmp_e;
     848      455280 :             move16();
     849      455280 :             max_e = s_max( max_e, tmp_e );
     850             :         }
     851             :     }
     852      568400 :     FOR( i = 0; i < lengthCx * lengthCx; ++i )
     853             :     {
     854      455280 :         Kx_fx[i] = L_shr( Kx_fx[i], sub( max_e, Kx_fx_e[i] ) );
     855      455280 :         move32();
     856      455280 :         Kx_fx_e[i] = max_e;
     857      455280 :         move16();
     858             :     }
     859             : 
     860      339920 :     FOR( i = 0; i < lengthCx; ++i )
     861             :     {
     862      226800 :         tmp_e = svd_s_buffer_e[i];
     863      226800 :         move16();
     864      226800 :         svd_s_buffer_fx[i] = Sqrt32( svd_s_buffer_fx[i], &tmp_e ); // Q(31-tmp_e)
     865      226800 :         move32();
     866      226800 :         svd_s_buffer_e[i] = tmp_e;
     867      226800 :         move16();
     868             :     }
     869             : 
     870             :     /*-----------------------------------------------------------------*
     871             :      * Regularization of Sx
     872             :      *-----------------------------------------------------------------*/
     873             : 
     874      113120 :     limit_fx = svd_s_buffer_fx[0];
     875      113120 :     move32();
     876      113120 :     limit_e = svd_s_buffer_e[0];
     877      113120 :     move16();
     878      226800 :     FOR( i = 1; i < lengthCx; i++ )
     879             :     {
     880      113680 :         IF( BASOP_Util_Cmp_Mant32Exp( svd_s_buffer_fx[i], svd_s_buffer_e[i], limit_fx, limit_e ) > 0 )
     881             :         {
     882           0 :             limit_fx = svd_s_buffer_fx[i];
     883           0 :             move32();
     884           0 :             limit_e = svd_s_buffer_e[i];
     885           0 :             move16();
     886             :         }
     887             :     }
     888             : 
     889      113120 :     L_tmp = Mpy_32_32( limit_fx, reg_Sx_fx );
     890      113120 :     limit_e = add( limit_e, reg_Sx_e );
     891      113120 :     limit_fx = L_add( L_tmp, EPSILON_FX );
     892             : 
     893      339920 :     FOR( i = 0; i < lengthCx; ++i )
     894             :     {
     895      226800 :         IF( BASOP_Util_Cmp_Mant32Exp( svd_s_buffer_fx[i], svd_s_buffer_e[i], limit_fx, limit_e ) < 0 )
     896             :         {
     897       49326 :             svd_s_buffer_fx[i] = limit_fx;
     898       49326 :             move32();
     899       49326 :             svd_s_buffer_e[i] = limit_e;
     900       49326 :             move16();
     901             :         }
     902             :     }
     903             : 
     904             : 
     905      113120 :     limit_fx = 0;
     906      113120 :     move32();
     907      113120 :     limit_e = 0;
     908      113120 :     move16();
     909             : 
     910             : 
     911             :     /*-----------------------------------------------------------------*
     912             :      * regularized Kx-1
     913             :      *-----------------------------------------------------------------*/
     914             : 
     915      339920 :     FOR( i = 0; i < lengthCx; ++i )
     916             :     {
     917             :         Word16 scale, reg_fac_fx;
     918      226800 :         reg_fac_fx = BASOP_Util_Divide3232_Scale( 1, svd_s_buffer_fx[i], &scale );
     919      226800 :         scale = add( scale, sub( Q31, svd_s_buffer_e[i] ) );
     920      682080 :         FOR( j = 0; j < lengthCx; ++j )
     921             :         {
     922      455280 :             Kx_reg_inv_fx[i + j * lengthCx] = Mpy_32_16_1( svd_u_buffer_fx[j][i], reg_fac_fx ); // Q(31-scale)
     923      455280 :             move32();
     924      455280 :             Kx_reg_inv_e[i + j * lengthCx] = scale;
     925      455280 :             move16();
     926             :         }
     927             :     }
     928             : 
     929             :     /*-----------------------------------------------------------------*
     930             :      * normalization matrix G hat
     931             :      *-----------------------------------------------------------------*/
     932             : 
     933             :     /* Computing Q*Cx*Q' */
     934             : 
     935      113120 :     matrix_product_mant_exp_fx( Q_fx, Q_e, lengthCy, lengthCx, 0, Cx_fx, Cx_fx_e, lengthCx, lengthCx, 0, Q_Cx_fx, &Q_Cx_e );
     936             : 
     937      113120 :     matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e );
     938             : 
     939             : 
     940      688444 :     FOR( i = 0; i < lengthCy; ++i )
     941             :     {
     942      575324 :         IF( BASOP_Util_Cmp_Mant32Exp( Cy_hat_diag_fx[i], Cy_hat_diag_e, limit_fx, limit_e ) > 0 )
     943             :         {
     944      289247 :             limit_fx = Cy_hat_diag_fx[i];
     945      289247 :             move32();
     946      289247 :             limit_e = Cy_hat_diag_e;
     947      289247 :             move16();
     948             :         }
     949             :     }
     950      113120 :     L_tmp = Mpy_32_32( limit_fx, reg_ghat_fx ); // limit_e+ reg_ghat_e
     951      113120 :     limit_fx = L_add( L_tmp, EPSILON_FX );
     952      113120 :     limit_e = add( limit_e, reg_ghat_e );
     953             : 
     954      688444 :     FOR( i = 0; i < lengthCy; ++i )
     955             :     {
     956      575324 :         Cy_hat_diag_buff_e[i] = Cy_hat_diag_e;
     957      575324 :         move16();
     958      575324 :         IF( BASOP_Util_Cmp_Mant32Exp( limit_fx, limit_e, Cy_hat_diag_fx[i], Cy_hat_diag_buff_e[i] ) > 0 ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
     959             :         {
     960         210 :             Cy_hat_diag_fx[i] = limit_fx;
     961         210 :             move32();
     962         210 :             Cy_hat_diag_buff_e[i] = limit_e;
     963         210 :             move16();
     964             :         }
     965             : 
     966      575324 :         tmp = BASOP_Util_Divide3232_Scale( Cy_fx[( i + ( i * lengthCy ) )], Cy_hat_diag_fx[i], &exp );
     967      575324 :         exp = add( exp, sub( Cy_fx_e, Cy_hat_diag_buff_e[i] ) );
     968      575324 :         L_tmp = Sqrt32( L_deposit_h( tmp ), &exp );
     969      575324 :         G_hat_fx[i] = L_tmp;
     970      575324 :         move32();
     971      575324 :         G_hat_buff_e[i] = exp;
     972      575324 :         move16();
     973             :     }
     974             : 
     975             : 
     976             :     /*-----------------------------------------------------------------*
     977             :      * Formulate optimal P
     978             :      *-----------------------------------------------------------------*/
     979             : 
     980             :     /* Computing the input matrix Kx'*Q'*G_hat'*Ky */
     981             : 
     982             :     Word16 mat_mult_buffer1_fx_e;
     983             : 
     984             :     Word16 mat_mult_buffer2_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     985             : 
     986      113120 :     matrix_product_mant_exp_fx( Kx_fx, Kx_fx_e[0], lengthCx, lengthCx, 1, Q_fx, Q_e, lengthCy, lengthCx, 1, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
     987             : 
     988      113120 :     matrix_diag_product_fx_2( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e, lengthCx, lengthCy, 0, G_hat_fx, G_hat_buff_e, lengthCy, mat_mult_buffer2_fx, mat_mult_buffer2_fx_e );
     989             : 
     990      113120 :     matrix_product_mant_exp_fx( mat_mult_buffer2_fx, mat_mult_buffer2_fx_e[0], lengthCx, lengthCy, 0, Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
     991             : 
     992      113120 :     IF( LT_16( lengthCx, lengthCy ) )
     993             :     {
     994      113120 :         mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 1 );
     995      113120 :         nL = lengthCy;
     996      113120 :         move16();
     997      113120 :         nC = lengthCx;
     998      113120 :         move16();
     999      113120 :         svd_fx( svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, svd_s_buffer_e, nL, nC );
    1000             :     }
    1001             :     ELSE
    1002             :     {
    1003           0 :         mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 0 );
    1004           0 :         nL = lengthCx;
    1005           0 :         move16();
    1006           0 :         nC = lengthCy;
    1007           0 :         move16();
    1008           0 :         svd_fx( svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, nL, nC );
    1009             :     }
    1010             : 
    1011             :     /* Actually Processing P */
    1012             : 
    1013             :     /* can be skipped: lambda is always column-truncated identity matrix, so this operation just
    1014             :        truncates V to num_input_channel columns */
    1015             : 
    1016      113120 :     svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, lengthCy, lengthCx );
    1017      113120 :     svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
    1018             : 
    1019      113120 :     mat_mult_buffer1_fx_e = 0;
    1020      113120 :     move16();
    1021      113120 :     mat_mult_buffer2_e = 0;
    1022      113120 :     move16();
    1023             : 
    1024      113120 :     matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e, lengthCy, lengthCx, 0,
    1025             :                                 mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1,
    1026             :                                 mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    1027             : 
    1028             :     /************************ Formulate M **********************/
    1029             : 
    1030      113120 :     matrix_product_mant_exp_fx( Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, lengthCy, lengthCx, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
    1031             : 
    1032             :     Word16 mixing_matrix_fx_e[MAX_CICP_CHANNELS * PARAM_MC_MAX_TRANSPORT_CHANS];
    1033             : 
    1034             :     Word16 mat_mult_buffer1_fx_e1[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1035      113120 :     set16_fx( mat_mult_buffer1_fx_e1, mat_mult_buffer1_fx_e, MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS );
    1036             : 
    1037      113120 :     matrix_product_mant_exp( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1, lengthCy, lengthCx, 0, Kx_reg_inv_fx, Kx_reg_inv_e, lengthCx, lengthCx, 0, mixing_matrix_fx, mixing_matrix_fx_e );
    1038             : 
    1039             :     /*-----------------------------------------------------------------*
    1040             :      * Formulate Cr
    1041             :      *-----------------------------------------------------------------*/
    1042             : 
    1043             :     /* Compute Cy_tilde = M*Cx*M' */
    1044             : 
    1045             : 
    1046             :     Word16 Cx_e_arr[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
    1047      113120 :     set16_fx( Cx_e_arr, Cx_fx_e, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
    1048      113120 :     matrix_product_mant_exp( mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 0, Cx_fx, Cx_e_arr, lengthCx, lengthCx, 0, mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1 );
    1049             : 
    1050      113120 :     matrix_product_mant_exp( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1, lengthCy, lengthCx, 0, mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 1, mat_mult_buffer2_fx, mat_mult_buffer2_fx_e );
    1051             : 
    1052      113120 :     exp = mixing_matrix_fx_e[0];
    1053      113120 :     move16();
    1054     1156848 :     FOR( i = 1; i < lengthCy * lengthCx; i++ )
    1055             :     {
    1056     1043728 :         if ( LT_16( exp, mixing_matrix_fx_e[i] ) )
    1057             :         {
    1058       81665 :             exp = mixing_matrix_fx_e[i];
    1059       81665 :             move16();
    1060             :         }
    1061             :     }
    1062             : 
    1063     1269968 :     FOR( i = 0; i < lengthCy * lengthCx; i++ )
    1064             :     {
    1065     1156848 :         mixing_matrix_fx[i] = L_shr( mixing_matrix_fx[i], sub( exp, mixing_matrix_fx_e[i] ) ); // Q(31-exp)
    1066     1156848 :         move32();
    1067             :     }
    1068             : 
    1069      113120 :     mixing_matrix_e = exp;
    1070      113120 :     move16();
    1071             : 
    1072      113120 :     Cr_p_fx = Cr_fx;
    1073      113120 :     Cy_p_fx = Cy_fx;
    1074      113120 :     Cy_tilde_p_fx = mat_mult_buffer2_fx;
    1075             :     Word16 Cr_e_arr[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
    1076      688444 :     FOR( i = 0; i < lengthCy; ++i )
    1077             :     {
    1078     3533968 :         FOR( j = 0; j < lengthCy; ++j )
    1079             :         {
    1080             : 
    1081     2958644 :             *( Cr_p_fx ) = BASOP_Util_Add_Mant32Exp( *( Cy_p_fx ), Cy_fx_e, L_negate( *( Cy_tilde_p_fx ) ), mat_mult_buffer2_fx_e[i * lengthCy + j], &Cr_e_arr[i * lengthCy + j] );
    1082     2958644 :             move32();
    1083     2958644 :             Cr_p_fx++;
    1084     2958644 :             Cy_p_fx++;
    1085     2958644 :             Cy_tilde_p_fx++;
    1086             :         }
    1087             : 
    1088             :         /* Avoid Meaningless negative main diagonal elements */
    1089      575324 :         IF( Cr_fx[i + ( i * lengthCy )] < 0 )
    1090             :         {
    1091       62157 :             Cr_fx[i + ( i * lengthCy )] = 0;
    1092       62157 :             move32();
    1093       62157 :             Cr_e_arr[i + ( i * lengthCy )] = 0;
    1094       62157 :             move16();
    1095             :         }
    1096             :     }
    1097             : 
    1098      113120 :     exp = Cr_e_arr[0];
    1099      113120 :     move16();
    1100     2958644 :     FOR( i = 1; i < lengthCy * lengthCy; i++ )
    1101             :     {
    1102     2845524 :         if ( LT_16( exp, Cr_e_arr[i] ) )
    1103             :         {
    1104      103521 :             exp = Cr_e_arr[i];
    1105      103521 :             move16();
    1106             :         }
    1107             :     }
    1108             : 
    1109     3071764 :     FOR( i = 0; i < lengthCy * lengthCy; i++ )
    1110             :     {
    1111     2958644 :         Cr_fx[i] = L_shr( Cr_fx[i], sub( exp, Cr_e_arr[i] ) ); // Q(31-exp)
    1112     2958644 :         move32();
    1113             :     }
    1114             : 
    1115      113120 :     Cr_fx_e = exp;
    1116      113120 :     move16();
    1117             : 
    1118      113120 :     exp = mat_mult_buffer2_fx_e[0];
    1119      113120 :     move16();
    1120     2958644 :     FOR( i = 1; i < lengthCy * lengthCy; i++ )
    1121             :     {
    1122     2845524 :         if ( LT_16( exp, mat_mult_buffer2_fx_e[i] ) )
    1123             :         {
    1124      156914 :             exp = mat_mult_buffer2_fx_e[i];
    1125      156914 :             move16();
    1126             :         }
    1127             :     }
    1128             : 
    1129     3071764 :     FOR( i = 0; i < lengthCy * lengthCy; i++ )
    1130             :     {
    1131     2958644 :         mat_mult_buffer2_fx[i] = L_shr( mat_mult_buffer2_fx[i], sub( exp, mat_mult_buffer2_fx_e[i] ) ); // Q(31-exp)
    1132     2958644 :         move32();
    1133             :     }
    1134             : 
    1135      113120 :     mat_mult_buffer2_e = exp;
    1136      113120 :     move16();
    1137             : 
    1138             :     /*-----------------------------------------------------------------*
    1139             :      * Energy Compensation
    1140             :      *-----------------------------------------------------------------*/
    1141             : 
    1142      113120 :     IF( EQ_16( energy_compensation_flag, 1 ) )
    1143             :     {
    1144       22220 :         adj_fx_p = svd_s_buffer_fx;
    1145       22220 :         Cy_tilde_p_fx = mat_mult_buffer2_fx;
    1146             : 
    1147      135100 :         FOR( i = 0; i < lengthCy; ++i )
    1148             :         {
    1149             :             /* Avoid correction for very small energies,
    1150             :                main diagonal elements of Cy_tilde_p may be negative */
    1151      112880 :             IF( Cy_tilde_p_fx[i + ( i * lengthCy )] < 0 )
    1152             :             {
    1153           0 :                 adj_fx_p[i] = 1073741824; // 1.0f in Q30
    1154           0 :                 move32();
    1155           0 :                 adj_e[i] = 1;
    1156           0 :                 move16();
    1157             :             }
    1158             :             ELSE
    1159             :             {
    1160      112880 :                 tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + ( i * lengthCy )], L_add( Cy_tilde_p_fx[i + ( i * lengthCy )], EPSILON_FX ), &exp );
    1161      112880 :                 exp = add( exp, sub( Cy_fx_e, mat_mult_buffer2_e ) );
    1162      112880 :                 L_tmp = L_deposit_h( tmp );
    1163      112880 :                 L_tmp = Sqrt32( L_tmp, &exp );
    1164      112880 :                 adj_fx_p[i] = L_tmp;
    1165      112880 :                 move32();
    1166      112880 :                 adj_e[i] = exp;
    1167      112880 :                 move16();
    1168             :             }
    1169             : 
    1170      112880 :             Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, adj_e[i] ) );
    1171      112880 :             IF( GT_32( adj_fx_p[i], temp ) )
    1172             :             {
    1173           4 :                 adj_fx_p[i] = 1073741824; // 1.0f in Q30
    1174           4 :                 move32();
    1175           4 :                 adj_e[i] = 3;
    1176           4 :                 move16();
    1177             :             }
    1178             :         }
    1179             : 
    1180       22220 :         exp = adj_e[0];
    1181       22220 :         move16();
    1182      112880 :         FOR( i = 1; i < lengthCy; i++ )
    1183             :         {
    1184       90660 :             if ( LT_16( exp, adj_e[i] ) )
    1185             :             {
    1186        1040 :                 exp = adj_e[i];
    1187        1040 :                 move16();
    1188             :             }
    1189             :         }
    1190             : 
    1191      135100 :         FOR( i = 0; i < lengthCy; i++ )
    1192             :         {
    1193      112880 :             adj_fx[i] = L_shr( adj_fx_p[i], sub( exp, adj_e[i] ) ); // Q(31-exp)
    1194      112880 :             move32();
    1195             :         }
    1196       22220 :         adj_fx_e = exp;
    1197       22220 :         move16();
    1198             : 
    1199       22220 :         diag_matrix_product_fx( adj_fx, adj_fx_e, lengthCy, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    1200             : 
    1201       22220 :         Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCx, lengthCy ) ); // Q(31-mat_mult_buffer3_e)
    1202       22220 :         mixing_matrix_e = mat_mult_buffer3_e;
    1203       22220 :         move16();
    1204             :     }
    1205             : 
    1206      113120 :     *mixing_matrix_out_e = mixing_matrix_e;
    1207      113120 :     move16();
    1208      113120 :     *Cr_e = Cr_fx_e;
    1209      113120 :     move16();
    1210      113120 :     pop_wmops();
    1211             : 
    1212      113120 :     return out;
    1213             : }
    1214             : 
    1215             : /*-------------------------------------------------------------------*
    1216             :  * computeMixingMatricesResidual()
    1217             :  *
    1218             :  * compute a residual mixing matrix using the covariance synthesis approach
    1219             :  *-------------------------------------------------------------------*/
    1220             : 
    1221       90900 : Word16 computeMixingMatricesResidual_fx(
    1222             :     const Word32 num_outputs, /* i  : number of output channels                                           */
    1223             :     const Word32 *Cx_fx,      /* i  : vector containing the diagonal diffuse prototype covariance Q(31-Cx_e)         */
    1224             :     const Word16 Cx_e,
    1225             :     const Word32 *Cy_fx, /* i  : matrix containing the missing cov (Cr from computeMixingMatrices()) Q(31-Cy_fx_e) */
    1226             :     const Word16 Cy_fx_e,
    1227             :     const Word32 reg_Sx_fx, /* i  : regularization factor for the input channel singular values Q(31-reg_Sx_e)         */
    1228             :     const Word16 reg_Sx_e,
    1229             :     const Word32 reg_ghat_fx, /* i  : regularization factor for the normalization matrix Q(31-reg_ghat_e)                  */
    1230             :     const Word16 reg_ghat_e,
    1231             :     Word32 *mixing_matrix_fx, /* o  : resulting residual mixing matrix Q(31-mixing_matrix_ret_e)                                    */
    1232             :     Word16 *mixing_matrix_ret_e )
    1233             : {
    1234             :     Word16 i, j;
    1235             :     Word16 out, lengthCx, lengthCy;
    1236       90900 :     out = EXIT_SUCCESS;
    1237       90900 :     move16();
    1238       90900 :     lengthCx = extract_l( num_outputs );
    1239       90900 :     lengthCy = extract_l( num_outputs );
    1240             : 
    1241             :     Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
    1242             :     Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1243             :     Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
    1244             :     Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
    1245             : 
    1246             :     Word16 mat_mult_buffer1_buff_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1247             : 
    1248       90900 :     push_wmops( "dirac_cov_mix_mat_r" );
    1249             : 
    1250             :     Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1251             :     Word32 adj_fx[MAX_OUTPUT_CHANNELS];
    1252             :     Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1253       90900 :     Word16 mixing_matrix_e = 0, mat_mult_buffer1_e, adj_e, mat_mult_buffer3_e, mat_mult_buffer2_e;
    1254       90900 :     move16();
    1255             : 
    1256       90900 :     Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS] = { 0 };
    1257             :     Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
    1258             :     Word32 L_tmp;
    1259             :     Word16 tmp_e;
    1260             :     Word16 tmp, scale;
    1261             :     Word32 div_tmp;
    1262             :     Word16 exp;
    1263             : 
    1264             :     Word32 Kx_fx[MAX_OUTPUT_CHANNELS];
    1265             :     Word16 Ky_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1266             :     Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1267             :     Word16 Kx_fx_e[MAX_OUTPUT_CHANNELS];
    1268             : 
    1269             :     Word32 limit_fx;
    1270             :     Word16 limit_e;
    1271             : 
    1272             :     Word32 Kx_reg_inv_fx[MAX_OUTPUT_CHANNELS];
    1273             :     Word16 Kx_reg_inv_e[MAX_OUTPUT_CHANNELS];
    1274             : 
    1275             :     Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
    1276             :     Word16 Cy_hat_diag_e;
    1277             : 
    1278             :     Word16 Cy_hat_diag_fx_e[MAX_OUTPUT_CHANNELS];
    1279             :     Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
    1280             :     Word16 G_hat_e[MAX_OUTPUT_CHANNELS];
    1281             : 
    1282             :     Word32 *adj_fx_p;
    1283             :     Word16 adj_buff_e[MAX_OUTPUT_CHANNELS];
    1284             : 
    1285             :     Word32 Cy_tilde_fx[MAX_OUTPUT_CHANNELS];
    1286             :     Word16 Cy_tilde_e;
    1287             : 
    1288             :     /*-----------------------------------------------------------------*
    1289             :      * Decomposition of Cy
    1290             :      *-----------------------------------------------------------------*/
    1291             : 
    1292             :     /* Processing the SVD */
    1293             : 
    1294             :     /* linear array to svd buffer */
    1295       90900 :     mat2svdMat_fx( Cy_fx, svd_in_buffer_fx, lengthCy, lengthCy, 0 );
    1296             : 
    1297       90900 :     svd_fx( svd_in_buffer_fx, Cy_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCy, lengthCy );
    1298             : 
    1299             :     /* Computing Ky */
    1300       90900 :     Word16 max_e = -32;
    1301      553344 :     FOR( i = 0; i < lengthCy; ++i )
    1302             :     {
    1303     2841348 :         FOR( j = 0; j < lengthCy; ++j )
    1304             :         {
    1305     2378904 :             tmp_e = svd_s_buffer_e[j];
    1306     2378904 :             move16();
    1307     2378904 :             L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
    1308     2378904 :             Ky_fx[i + j * lengthCy] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
    1309     2378904 :             move32();
    1310     2378904 :             Ky_fx_e[i + j * lengthCy] = tmp_e;
    1311     2378904 :             move16();
    1312     2378904 :             max_e = s_max( max_e, tmp_e );
    1313             :         }
    1314             :     }
    1315             : 
    1316     2469804 :     FOR( i = 0; i < lengthCy * lengthCy; ++i )
    1317             :     {
    1318     2378904 :         Ky_fx[i] = L_shr( Ky_fx[i], sub( max_e, Ky_fx_e[i] ) );
    1319     2378904 :         move32();
    1320     2378904 :         Ky_fx_e[i] = max_e;
    1321     2378904 :         move16();
    1322             :     }
    1323             : 
    1324             :     /*-----------------------------------------------------------------*
    1325             :      * Decomposition of Cx
    1326             :      *-----------------------------------------------------------------*/
    1327             : 
    1328             :     /* Processing the SVD of Cx*/
    1329             :     /* Cx is a diagonal matrix, so SVD would lead to the sorted diagonal as S and u
    1330             :      *  would be just indicating the sorting index, so go straight to Kx as the
    1331             :      * square root of the diagonal of Cx */
    1332             : 
    1333             :     /* Computing Kx */
    1334       90900 :     max_e = -32;
    1335      553344 :     FOR( i = 0; i < lengthCx; ++i )
    1336             :     {
    1337      462444 :         exp = Cx_e;
    1338      462444 :         move16();
    1339      462444 :         Kx_fx[i] = Sqrt32( Cx_fx[i], &exp );
    1340      462444 :         move32();
    1341      462444 :         Kx_fx_e[i] = exp;
    1342      462444 :         move16();
    1343      462444 :         max_e = s_max( max_e, exp );
    1344             :     }
    1345             : 
    1346      553344 :     FOR( i = 0; i < lengthCx; ++i )
    1347             :     {
    1348      462444 :         Kx_fx[i] = L_shr( Kx_fx[i], sub( max_e, Kx_fx_e[i] ) );
    1349      462444 :         move32();
    1350      462444 :         Kx_fx_e[i] = max_e;
    1351      462444 :         move16();
    1352             :     }
    1353             : 
    1354             :     /*-----------------------------------------------------------------*
    1355             :      * Regularization of Sx
    1356             :      *-----------------------------------------------------------------*/
    1357             : 
    1358       90900 :     limit_fx = Kx_fx[0];
    1359       90900 :     move32();
    1360             : 
    1361      462444 :     FOR( i = 1; i < lengthCx; i++ )
    1362             :     {
    1363      371544 :         IF( GT_32( Kx_fx[i], limit_fx ) )
    1364             :         {
    1365      141641 :             limit_fx = Kx_fx[i];
    1366      141641 :             move32();
    1367             :         }
    1368             :     }
    1369             : 
    1370       90900 :     L_tmp = Mpy_32_32( limit_fx, reg_Sx_fx ); // limit_e + reg_Sx_e
    1371       90900 :     L_tmp = L_add( L_tmp, EPSILLON_FX );
    1372       90900 :     limit_fx = L_tmp;
    1373       90900 :     move16();
    1374       90900 :     limit_e = add( Kx_fx_e[0], reg_Sx_e );
    1375             : 
    1376      553344 :     FOR( i = 0; i < lengthCx; ++i )
    1377             :     {
    1378      462444 :         IF( BASOP_Util_Cmp_Mant32Exp( Kx_fx[i], Kx_fx_e[i], limit_fx, limit_e ) > 0 )
    1379             :         {
    1380      461368 :             div_tmp = Kx_fx[i];
    1381      461368 :             move32();
    1382      461368 :             exp = Kx_fx_e[i];
    1383      461368 :             move16();
    1384             :         }
    1385             :         ELSE
    1386             :         {
    1387        1076 :             div_tmp = limit_fx;
    1388        1076 :             move32();
    1389        1076 :             exp = limit_e;
    1390        1076 :             move16();
    1391             :         }
    1392      462444 :         tmp = BASOP_Util_Divide3232_Scale( 1073741824, div_tmp, &scale ); // 1073741824 -> 1.0f in Q30
    1393      462444 :         scale = add( scale, sub( Q1, exp ) );
    1394             : 
    1395      462444 :         Kx_reg_inv_fx[i] = L_deposit_h( tmp );
    1396      462444 :         move32();
    1397      462444 :         Kx_reg_inv_e[i] = scale;
    1398      462444 :         move16();
    1399             :     }
    1400             : 
    1401       90900 :     limit_fx = 0;
    1402       90900 :     move32();
    1403       90900 :     limit_e = 0;
    1404       90900 :     move16();
    1405             : 
    1406             :     /*-----------------------------------------------------------------*
    1407             :      * regularized Kx-1
    1408             :      *-----------------------------------------------------------------*/
    1409             : 
    1410             :     /*-----------------------------------------------------------------*
    1411             :      * normalization matrix G hat
    1412             :      *-----------------------------------------------------------------*/
    1413             : 
    1414             :     /* Computing Cy_hat_diag */
    1415       90900 :     Copy32( Cx_fx, Cy_hat_diag_fx, extract_l( num_outputs ) ); // Q(31-Cx_e)
    1416       90900 :     Cy_hat_diag_e = Cx_e;
    1417       90900 :     move16();
    1418             : 
    1419      553344 :     FOR( i = 0; i < lengthCy; ++i )
    1420             :     {
    1421      462444 :         IF( BASOP_Util_Cmp_Mant32Exp( Cy_hat_diag_fx[i], Cy_hat_diag_e, limit_fx, limit_e ) > 0 )
    1422             :         {
    1423      232507 :             limit_fx = Cy_hat_diag_fx[i];
    1424      232507 :             move32();
    1425      232507 :             limit_e = Cy_hat_diag_e;
    1426      232507 :             move16();
    1427             :         }
    1428             :     }
    1429             : 
    1430       90900 :     L_tmp = Mpy_32_32( limit_fx, reg_ghat_fx ); // Q(limit_e+reg_ghat_e)
    1431       90900 :     limit_fx = L_add( L_tmp, EPSILON_FX );
    1432       90900 :     limit_e = add( limit_e, reg_ghat_e );
    1433             : 
    1434             :     /* Computing G_hat */
    1435      553344 :     FOR( i = 0; i < lengthCy; ++i )
    1436             :     {
    1437      462444 :         Cy_hat_diag_fx_e[i] = Cy_hat_diag_e;
    1438      462444 :         move16();
    1439      462444 :         IF( BASOP_Util_Cmp_Mant32Exp( limit_fx, limit_e, Cy_hat_diag_fx[i], Cy_hat_diag_e ) > 0 ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
    1440             :         {
    1441         210 :             Cy_hat_diag_fx[i] = limit_fx;
    1442         210 :             move32();
    1443         210 :             Cy_hat_diag_fx_e[i] = limit_e;
    1444         210 :             move16();
    1445             :         }
    1446      462444 :         tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + i * lengthCy], Cy_hat_diag_fx[i], &scale );
    1447      462444 :         scale = add( scale, sub( Cy_fx_e, Cy_hat_diag_fx_e[i] ) );
    1448      462444 :         L_tmp = Sqrt32( L_deposit_h( tmp ), &scale );
    1449             : 
    1450      462444 :         G_hat_fx[i] = L_tmp;
    1451      462444 :         move32();
    1452      462444 :         G_hat_e[i] = scale;
    1453      462444 :         move16();
    1454             :     }
    1455             : 
    1456             :     /*-----------------------------------------------------------------*
    1457             :      * Formulate optimal P
    1458             :      *-----------------------------------------------------------------*/
    1459             : 
    1460      553344 :     FOR( i = 0; i < num_outputs; i++ )
    1461             :     {
    1462      462444 :         L_tmp = Mpy_32_32( Kx_fx[i], G_hat_fx[i] ); // Q(31-(Kx_fx_e+G_hag_e))
    1463      462444 :         Kx_fx[i] = L_tmp;
    1464      462444 :         move32();
    1465      462444 :         Kx_fx_e[i] = add( Kx_fx_e[i], G_hat_e[i] );
    1466      462444 :         move16();
    1467             :     }
    1468             : 
    1469      553344 :     FOR( i = 0; i < num_outputs; i++ )
    1470             :     {
    1471             :         Word32 fac_fx;
    1472      462444 :         fac_fx = Kx_fx[i];
    1473      462444 :         move32();
    1474             : 
    1475     2841348 :         FOR( j = 0; j < num_outputs; j++ )
    1476             :         {
    1477     2378904 :             L_tmp = Mpy_32_32( Ky_fx[i + j * num_outputs], fac_fx ); // Q(31-(Ky_fx_e+Kx_fx_e))
    1478     2378904 :             mat_mult_buffer1_fx[i + j * num_outputs] = L_tmp;
    1479     2378904 :             move32();
    1480     2378904 :             mat_mult_buffer1_buff_e[i + j * num_outputs] = extract_l( L_add( Ky_fx_e[i + j * num_outputs], Kx_fx_e[i] ) );
    1481     2378904 :             move16();
    1482             :         }
    1483             :     }
    1484             : 
    1485       90900 :     mat_mult_buffer1_e = mat_mult_buffer1_buff_e[0];
    1486       90900 :     move16();
    1487     2378904 :     FOR( i = 1; i < num_outputs * num_outputs; i++ )
    1488             :     {
    1489     2288004 :         if ( LT_16( mat_mult_buffer1_e, mat_mult_buffer1_buff_e[i] ) )
    1490             :         {
    1491       24333 :             mat_mult_buffer1_e = mat_mult_buffer1_buff_e[i];
    1492       24333 :             move16();
    1493             :         }
    1494             :     }
    1495             : 
    1496     2469804 :     FOR( i = 0; i < num_outputs * num_outputs; i++ )
    1497             :     {
    1498     2378904 :         mat_mult_buffer1_fx[i] = L_shr( mat_mult_buffer1_fx[i], sub( mat_mult_buffer1_e, mat_mult_buffer1_buff_e[i] ) ); // Q(31-mat_mult_buffer1_e)
    1499     2378904 :         move32();
    1500             :     }
    1501             : 
    1502       90900 :     mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 0 );
    1503             : 
    1504       90900 :     svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCx, lengthCy );
    1505             : 
    1506             :     /* Actually Processing P */
    1507             : 
    1508       90900 :     svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, lengthCy, lengthCx );
    1509       90900 :     svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
    1510       90900 :     mat_mult_buffer1_e = 0;
    1511       90900 :     move16();
    1512       90900 :     mat_mult_buffer2_e = 0;
    1513       90900 :     move16();
    1514             : 
    1515       90900 :     matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0,
    1516             :                                 mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1,
    1517             :                                 mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    1518             : 
    1519             :     /*-----------------------------------------------------------------*
    1520             :      * Formulate M
    1521             :      *-----------------------------------------------------------------*/
    1522             : 
    1523             : 
    1524       90900 :     matrix_product_mant_exp_fx( Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, lengthCy, lengthCx, 0, mat_mult_buffer1_fx, mat_mult_buffer1_buff_e );
    1525       90900 :     set16_fx( mat_mult_buffer1_buff_e, mat_mult_buffer1_buff_e[0], MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS );
    1526             : 
    1527             :     Word16 mixing_matrix_fx_e[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
    1528             : 
    1529      553344 :     FOR( i = 0; i < num_outputs; i++ )
    1530             :     {
    1531             :         Word32 fac_fx;
    1532      462444 :         fac_fx = Kx_reg_inv_fx[i];
    1533      462444 :         move32();
    1534             : 
    1535     2841348 :         FOR( j = 0; j < num_outputs; j++ )
    1536             :         {
    1537     2378904 :             L_tmp = Mpy_32_32( mat_mult_buffer1_fx[j + i * num_outputs], fac_fx ); // Q(31-mat_mult_buffer1_e+Kx_reg_inv_e)
    1538     2378904 :             mixing_matrix_fx[j + i * num_outputs] = L_tmp;
    1539     2378904 :             move32();
    1540     2378904 :             mixing_matrix_fx_e[j + i * num_outputs] = add( mat_mult_buffer1_buff_e[j + i * num_outputs], Kx_reg_inv_e[i] );
    1541     2378904 :             move16();
    1542             :         }
    1543             :     }
    1544             : 
    1545             :     /*-----------------------------------------------------------------*
    1546             :      * Formulate Cr
    1547             :      *-----------------------------------------------------------------*/
    1548             : 
    1549             :     /* Compute Cy_tilde = M*Cx*M' */
    1550             : 
    1551             :     Word16 Cx_e_arr[MAX_CICP_CHANNELS];
    1552       90900 :     set16_fx( Cx_e_arr, Cx_e, MAX_CICP_CHANNELS );
    1553             : 
    1554       90900 :     matrix_diag_product_fx_1( mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 0, Cx_fx, Cx_e_arr, lengthCx, mat_mult_buffer1_fx, mat_mult_buffer1_buff_e );
    1555             : 
    1556       90900 :     exp = mixing_matrix_fx_e[0];
    1557       90900 :     move16();
    1558     2378904 :     FOR( i = 1; i < num_outputs * num_outputs; i++ )
    1559             :     {
    1560     2288004 :         if ( LT_16( exp, mixing_matrix_fx_e[i] ) )
    1561             :         {
    1562        9829 :             exp = mixing_matrix_fx_e[i];
    1563        9829 :             move16();
    1564             :         }
    1565             :     }
    1566             : 
    1567     2469804 :     FOR( i = 0; i < num_outputs * num_outputs; i++ )
    1568             :     {
    1569     2378904 :         mixing_matrix_fx[i] = L_shr( mixing_matrix_fx[i], sub( exp, mixing_matrix_fx_e[i] ) ); // Q(31-exp)
    1570     2378904 :         move32();
    1571             :     }
    1572       90900 :     mixing_matrix_e = exp;
    1573       90900 :     move16();
    1574             : 
    1575       90900 :     exp = mat_mult_buffer1_buff_e[0];
    1576       90900 :     move16();
    1577     2378904 :     FOR( i = 1; i < num_outputs * num_outputs; i++ )
    1578             :     {
    1579     2288004 :         if ( LT_16( exp, mat_mult_buffer1_buff_e[i] ) )
    1580             :         {
    1581        9829 :             exp = mat_mult_buffer1_buff_e[i];
    1582        9829 :             move16();
    1583             :         }
    1584             :     }
    1585             : 
    1586     2469804 :     FOR( i = 0; i < num_outputs * num_outputs; i++ )
    1587             :     {
    1588     2378904 :         mat_mult_buffer1_fx[i] = L_shr( mat_mult_buffer1_fx[i], sub( exp, mat_mult_buffer1_buff_e[i] ) ); // Q(31-exp)
    1589     2378904 :         move32();
    1590             :     }
    1591       90900 :     mat_mult_buffer1_e = exp;
    1592       90900 :     move16();
    1593             : 
    1594       90900 :     matrix_product_diag_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 1, Cy_tilde_fx, &Cy_tilde_e );
    1595             : 
    1596             :     /*-----------------------------------------------------------------*
    1597             :      * Energy Compensation
    1598             :      *-----------------------------------------------------------------*/
    1599             : 
    1600       90900 :     adj_fx_p = svd_s_buffer_fx;
    1601             : 
    1602      553344 :     FOR( i = 0; i < lengthCy; ++i )
    1603             :     {
    1604      462444 :         tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + ( lengthCy * i )], L_add( Cy_tilde_fx[i], EPSILON_FX ), &scale );
    1605      462444 :         scale = add( scale, sub( Cy_fx_e, Cy_tilde_e ) );
    1606      462444 :         adj_fx_p[i] = Sqrt32( L_deposit_h( tmp ), &scale );
    1607      462444 :         move32();
    1608      462444 :         adj_buff_e[i] = scale;
    1609      462444 :         move16();
    1610      462444 :         Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, scale ) );
    1611      462444 :         IF( GT_32( adj_fx_p[i], temp ) ) // 1073741824 -> 1.0f in Q30
    1612             :         {
    1613           1 :             adj_fx_p[i] = 1073741824; // 1.0f in Q30
    1614           1 :             move32();
    1615           1 :             adj_buff_e[i] = 3;
    1616           1 :             move16();
    1617             :         }
    1618             :     }
    1619             : 
    1620       90900 :     adj_e = adj_buff_e[0];
    1621       90900 :     move16();
    1622             : 
    1623      462444 :     FOR( i = 1; i < lengthCy; i++ )
    1624             :     {
    1625      371544 :         if ( LT_16( adj_e, adj_buff_e[i] ) )
    1626             :         {
    1627       36417 :             adj_e = adj_buff_e[i];
    1628       36417 :             move16();
    1629             :         }
    1630             :     }
    1631             : 
    1632             : 
    1633      553344 :     FOR( i = 0; i < lengthCy; i++ )
    1634             :     {
    1635      462444 :         adj_fx[i] = L_shr( adj_fx_p[i], sub( adj_e, adj_buff_e[i] ) ); // Q(31-adj_e)
    1636      462444 :         move32();
    1637             :     }
    1638             : 
    1639       90900 :     diag_matrix_product_fx( adj_fx, adj_e, lengthCy, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    1640       90900 :     Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCy, lengthCx ) );
    1641       90900 :     *mixing_matrix_ret_e = mat_mult_buffer3_e;
    1642       90900 :     move16();
    1643             : 
    1644       90900 :     pop_wmops();
    1645             : 
    1646       90900 :     return out;
    1647             : }
    1648             : 
    1649             : 
    1650             : /*-------------------------------------------------------------------*
    1651             :  * computeMixingMatricesISM()
    1652             :  *
    1653             :  *
    1654             :  *-------------------------------------------------------------------*/
    1655      315180 : Word16 computeMixingMatricesISM_fx(
    1656             :     const Word16 num_inputs,
    1657             :     const Word16 num_responses,
    1658             :     const Word16 num_outputs,
    1659             :     const Word32 *responses_fx, /*Q(31-responses_e) */
    1660             :     const Word16 responses_e,
    1661             :     const Word32 *ener_fx, /*Q(31-ener_e) */
    1662             :     const Word16 ener_e,
    1663             :     const Word32 *Cx_diag_fx, /*Q(31-diag_e) */
    1664             :     const Word16 Cx_diag_e,
    1665             :     const Word32 *Cy_diag_fx, /*Q(31-diag_e) */
    1666             :     const Word16 Cy_diag_e,
    1667             :     const Word16 *Q_16fx, // Q15
    1668             :     const Word16 energy_compensation_flag,
    1669             :     const Word32 reg_Sx_fx,   /*Q0*/
    1670             :     const Word32 reg_ghat_fx, /*Q0*/
    1671             :     Word32 *mixing_matrix_fx, /*Q(31-mixing_matrix_e) */
    1672             :     Word16 *mixing_matrix_e )
    1673             : {
    1674             :     Word16 i, out;
    1675             :     Word16 lengthCx, lengthCy;
    1676             :     Word16 nL, nC;
    1677             : 
    1678             :     Word32 *Cy_tilde_p_fx;
    1679             :     Word32 *adj_fx;
    1680             :     Word32 limit_fx;
    1681             : 
    1682             :     Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1683             :     Word32 Q_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1684             :     Word32 Q_Cx_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1685             :     Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1686             :     Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
    1687             :     Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1688             :     Word32 Kx_reg_inv_fx[MAX_TRANSPORT_CHANNELS];
    1689             :     Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1690             :     Word32 Kx_fx[MAX_TRANSPORT_CHANNELS];
    1691             :     Word16 Ky_e, Q_e, Q_Cx_e, mat_mult_buffer1_e, G_hat_e, mat_mult_buffer2_e, Kx_reg_inv_e, adj_e, mat_mult_buffer3_e, Kx_e;
    1692             : 
    1693             :     Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
    1694             :     Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
    1695             :     Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS];
    1696             :     Word16 svd_s_buffer_fx_e;
    1697             :     Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
    1698             :     Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
    1699             : 
    1700             :     Word16 temp_e[MAX_OUTPUT_CHANNELS];
    1701      315180 :     push_wmops( "dirac_cov_mix_mat" );
    1702             : 
    1703      315180 :     out = EXIT_SUCCESS;
    1704      315180 :     move16();
    1705      315180 :     lengthCx = num_inputs;
    1706      315180 :     move16();
    1707      315180 :     lengthCy = num_outputs;
    1708      315180 :     move16();
    1709             : 
    1710     6073020 :     FOR( i = 0; i < lengthCy * lengthCx; i++ )
    1711             :     {
    1712     5757840 :         IF( EQ_16( Q_16fx[i], MAX_16 ) )
    1713             :         {
    1714     2575200 :             Q_fx[i] = MAX_32;
    1715     2575200 :             move32();
    1716             :         }
    1717             :         ELSE
    1718             :         {
    1719     3182640 :             Q_fx[i] = L_deposit_h( Q_16fx[i] );
    1720     3182640 :             move32();
    1721             :         }
    1722             :     }
    1723      315180 :     Q_e = 0;
    1724      315180 :     move16();
    1725      315180 :     set32_fx( svd_s_buffer_fx, 0, MAX_OUTPUT_CHANNELS );
    1726     5358060 :     FOR( i = 0; i < MAX_OUTPUT_CHANNELS; i++ )
    1727             :     {
    1728     5042880 :         set32_fx( svd_in_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
    1729     5042880 :         set32_fx( svd_u_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
    1730     5042880 :         set32_fx( svd_v_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
    1731             :     }
    1732             : 
    1733             :     /* Decomposition of Cy = Ky*Ky' */
    1734             :     /* Ky = responses*diag(ener) */
    1735      315180 :     matrix_diag_product_fx( responses_fx, responses_e, lengthCy, num_responses, 0, ener_fx, ener_e, num_responses, Ky_fx, &Ky_e );
    1736             : 
    1737             :     /* Decomposition of Cx -> Computing Kx */
    1738      315180 :     set16_fx( temp_e, Cx_diag_e, lengthCx );
    1739      315180 :     v_sqrt_fx( Cx_diag_fx, temp_e, Kx_fx, lengthCx );
    1740      315180 :     Kx_e = temp_e[0];
    1741      315180 :     move16();
    1742      630360 :     FOR( i = 1; i < lengthCx; i++ )
    1743             :     {
    1744      315180 :         Kx_e = s_max( Kx_e, temp_e[i] );
    1745             :     }
    1746      945540 :     FOR( i = 0; i < lengthCx; i++ )
    1747             :     {
    1748      630360 :         Kx_fx[i] = L_shr_r( Kx_fx[i], sub( Kx_e, temp_e[i] ) ); // Q(31-Kx_e)
    1749      630360 :         move32();
    1750             :     }
    1751             : 
    1752             :     /* Regularization of Sx */
    1753      315180 :     maximum_32_fx( Kx_fx, lengthCx, &limit_fx );
    1754      315180 :     limit_fx = Mpy_32_32( limit_fx, reg_Sx_fx ); // Cy_hat_diag_e + reg_ghat_e
    1755             : 
    1756      945540 :     FOR( i = 0; i < lengthCx; ++i )
    1757             :     {
    1758      630360 :         IF( GT_32( Kx_fx[i], limit_fx ) )
    1759             :         {
    1760      595983 :             svd_s_buffer_fx[i] = Kx_fx[i];
    1761      595983 :             move32();
    1762             :         }
    1763             :         ELSE
    1764             :         {
    1765       34377 :             svd_s_buffer_fx[i] = limit_fx;
    1766       34377 :             move32();
    1767             :         }
    1768             :     }
    1769      315180 :     svd_s_buffer_fx_e = Kx_e;
    1770      315180 :     move16();
    1771             : 
    1772      315180 :     limit_fx = 0;
    1773      315180 :     move32();
    1774             : 
    1775             :     /* regularized Kx-1 */
    1776             : 
    1777      945540 :     FOR( i = 0; i < lengthCx; ++i )
    1778             :     {
    1779      630360 :         IF( svd_s_buffer_fx[i] )
    1780             :         {
    1781             :             Word32 reg_fac;
    1782      630350 :             reg_fac = BASOP_Util_Divide3232_Scale_newton( MAX_32, svd_s_buffer_fx[i], &temp_e[i] );
    1783      630350 :             Kx_reg_inv_fx[i] = reg_fac;
    1784      630350 :             move32();
    1785      630350 :             temp_e[i] = sub( temp_e[i], svd_s_buffer_fx_e );
    1786      630350 :             move16();
    1787             :         }
    1788             :         ELSE
    1789             :         {
    1790             :             Word32 reg_fac;
    1791          10 :             reg_fac = BASOP_Util_Divide3232_Scale_newton( MAX_32, EPSILON_FX_M, &temp_e[i] );
    1792          10 :             Kx_reg_inv_fx[i] = reg_fac;
    1793          10 :             move32();
    1794          10 :             temp_e[i] = sub( temp_e[i], EPSILON_FX_E );
    1795          10 :             move16();
    1796             :         }
    1797             :     }
    1798      315180 :     Kx_reg_inv_e = temp_e[0];
    1799      315180 :     move16();
    1800      630360 :     FOR( i = 1; i < lengthCx; i++ )
    1801             :     {
    1802      315180 :         Kx_reg_inv_e = s_max( Kx_reg_inv_e, temp_e[i] );
    1803             :     }
    1804      945540 :     FOR( i = 0; i < lengthCx; i++ )
    1805             :     {
    1806      630360 :         Kx_reg_inv_fx[i] = L_shr_r( Kx_reg_inv_fx[i], sub( Kx_reg_inv_e, temp_e[i] ) ); // Q(31- Kx_reg_inv_e)
    1807      630360 :         move32();
    1808             :     }
    1809             : 
    1810             :     /************************ normalization matrix G hat **********************/
    1811             : 
    1812             :     /* Computing Q*Cx*Q' */
    1813             :     Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
    1814             :     Word16 Cy_hat_diag_e;
    1815             : 
    1816      315180 :     matrix_diag_product_fx( Q_fx, Q_e, lengthCy, lengthCx, 0, Cx_diag_fx, Cx_diag_e, lengthCx, Q_Cx_fx, &Q_Cx_e );
    1817             : 
    1818      315180 :     Word16 guard_bits = find_guarded_bits_fx( lengthCx + 1 );
    1819             : 
    1820     6073020 :     FOR( i = 0; i < lengthCy * lengthCx; ++i )
    1821             :     {
    1822     5757840 :         IF( GT_16( Q_Cx_e, Q_e ) )
    1823             :         {
    1824     5757760 :             Q_fx[i] = L_shr( Q_fx[i], guard_bits );
    1825     5757760 :             move32();
    1826             :         }
    1827             :         ELSE
    1828             :         {
    1829          80 :             Q_Cx_fx[i] = L_shr( Q_Cx_fx[i], guard_bits );
    1830          80 :             move32();
    1831             :         }
    1832             :     }
    1833             : 
    1834      315180 :     IF( GT_16( Q_Cx_e, Q_e ) )
    1835             :     {
    1836      315175 :         Q_e = add( Q_e, guard_bits );
    1837             :     }
    1838             :     ELSE
    1839             :     {
    1840           5 :         Q_Cx_e = add( Q_Cx_e, guard_bits );
    1841             :     }
    1842      315180 :     matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e );
    1843             : 
    1844             :     /* Computing Cy_hat_diag */
    1845     3194100 :     FOR( i = 0; i < lengthCy; ++i )
    1846             :     {
    1847     2878920 :         if ( GT_32( Cy_hat_diag_fx[i], limit_fx ) )
    1848             :         {
    1849      469388 :             limit_fx = Cy_hat_diag_fx[i];
    1850      469388 :             move32();
    1851             :         }
    1852             :     }
    1853             : 
    1854      315180 :     limit_fx = Mpy_32_32( limit_fx, reg_ghat_fx ); // Cy_hat_diag_e + reg_ghat_e
    1855             : 
    1856             :     /* Computing G_hat */
    1857     3194100 :     FOR( i = 0; i < lengthCy; ++i )
    1858             :     {
    1859     2878920 :         if ( GT_32( limit_fx, Cy_hat_diag_fx[i] ) ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
    1860             :         {
    1861       11966 :             Cy_hat_diag_fx[i] = limit_fx;
    1862       11966 :             move32();
    1863             :         }
    1864     2878920 :         IF( Cy_diag_fx[i] )
    1865             :         {
    1866     1875159 :             IF( Cy_hat_diag_fx[i] )
    1867             :             {
    1868     1875159 :                 G_hat_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], Cy_hat_diag_fx[i], &temp_e[i] );
    1869     1875159 :                 move32();
    1870     1875159 :                 temp_e[i] = add( temp_e[i], sub( Cy_diag_e, Cy_hat_diag_e ) );
    1871     1875159 :                 move16();
    1872     1875159 :                 G_hat_fx[i] = Sqrt32( G_hat_fx[i], &temp_e[i] );
    1873     1875159 :                 move32();
    1874             :             }
    1875             :             ELSE
    1876             :             {
    1877           0 :                 G_hat_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], EPSILON_FX_M, &temp_e[i] );
    1878           0 :                 move32();
    1879           0 :                 temp_e[i] = add( temp_e[i], sub( Cy_diag_e, EPSILON_FX_E ) );
    1880           0 :                 move16();
    1881           0 :                 G_hat_fx[i] = Sqrt32( G_hat_fx[i], &temp_e[i] );
    1882           0 :                 move32();
    1883             :             }
    1884             :         }
    1885             :         ELSE
    1886             :         {
    1887     1003761 :             G_hat_fx[i] = 0;
    1888     1003761 :             move32();
    1889     1003761 :             temp_e[i] = 0;
    1890     1003761 :             move16();
    1891             :         }
    1892             :     }
    1893      315180 :     G_hat_e = temp_e[0];
    1894      315180 :     move16();
    1895     2878920 :     FOR( i = 1; i < lengthCy; i++ )
    1896             :     {
    1897     2563740 :         G_hat_e = s_max( G_hat_e, temp_e[i] );
    1898             :     }
    1899     3194100 :     FOR( i = 0; i < lengthCy; i++ )
    1900             :     {
    1901     2878920 :         G_hat_fx[i] = L_shr_r( G_hat_fx[i], sub( G_hat_e, temp_e[i] ) ); // Q(31-G_hat_e)
    1902     2878920 :         move32();
    1903             :     }
    1904             : 
    1905             :     /************************ Formulate optimal P **********************/
    1906             : 
    1907             :     /* Computing the input matrix Kx'*Q'*G_hat'*Ky */
    1908      315180 :     diag_matrix_product_fx( Kx_fx, Kx_e, lengthCx, Q_fx, Q_e, lengthCy, lengthCx, 1, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
    1909             : 
    1910      315180 :     matrix_diag_product_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCx, lengthCy, 0, G_hat_fx, G_hat_e, lengthCy, mat_mult_buffer2_fx, &mat_mult_buffer2_e );
    1911             : 
    1912      315180 :     matrix_product_mant_exp_fx( mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCy, 0, Ky_fx, Ky_e, lengthCy, num_responses, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
    1913             : 
    1914      315180 :     IF( LT_16( lengthCx, num_responses ) )
    1915             :     {
    1916        3600 :         mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 1 );
    1917             : 
    1918        3600 :         nL = num_responses;
    1919        3600 :         move16();
    1920        3600 :         nC = lengthCx;
    1921        3600 :         move16();
    1922        3600 :         svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, svd_s_buffer_e, nL, nC );
    1923             :     }
    1924             :     ELSE
    1925             :     {
    1926      311580 :         mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 0 );
    1927             : 
    1928      311580 :         nL = lengthCx;
    1929      311580 :         move16();
    1930      311580 :         nC = num_responses;
    1931      311580 :         move16();
    1932      311580 :         svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, nL, nC );
    1933             :     }
    1934             : 
    1935             :     /* Actually Processing P */
    1936             : 
    1937             :     /* can be skipped: lambda is always column-truncated identity matrix, so this operation just truncates V to num_input_channel columns */
    1938      315180 :     svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, num_responses, lengthCx );
    1939      315180 :     mat_mult_buffer1_e = 0;
    1940      315180 :     move16();
    1941      315180 :     svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
    1942      315180 :     mat_mult_buffer2_e = 0;
    1943      315180 :     move16();
    1944             : 
    1945      315180 :     matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, num_responses, lengthCx, 0, mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    1946             : 
    1947             :     /************************ Formulate M **********************/
    1948             : 
    1949      315180 :     matrix_product_mant_exp_fx( Ky_fx, Ky_e, lengthCy, num_responses, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, num_responses, lengthCx, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
    1950             : 
    1951      315180 :     matrix_diag_product_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, Kx_reg_inv_fx, Kx_reg_inv_e, lengthCx, mixing_matrix_fx, mixing_matrix_e );
    1952             : 
    1953             :     /*********************** Energy Compensation ****************/
    1954             : 
    1955             :     /* Compute Cy_tilde = M*Cx*M' */
    1956      315180 :     matrix_diag_product_fx( mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 0, Cx_diag_fx, Cx_diag_e, lengthCx, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
    1957             : 
    1958      315180 :     matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 1, mat_mult_buffer2_fx, &mat_mult_buffer2_e );
    1959             : 
    1960      315180 :     IF( EQ_16( energy_compensation_flag, 1 ) )
    1961             :     {
    1962      315180 :         adj_fx = svd_s_buffer_fx;
    1963      315180 :         Cy_tilde_p_fx = mat_mult_buffer2_fx;
    1964     3194100 :         FOR( i = 0; i < lengthCy; ++i )
    1965             :         {
    1966             :             /* Avoid correction for very small energies, main diagonal elements of Cy_tilde_p may be negative */
    1967     2878920 :             IF( Cy_tilde_p_fx[( i + ( i * lengthCy ) )] < 0 )
    1968             :             {
    1969           0 :                 adj_fx[i] = MAX_32;
    1970           0 :                 move32();
    1971           0 :                 temp_e[i] = 0;
    1972           0 :                 move16();
    1973             :             }
    1974             :             ELSE
    1975             :             {
    1976     2878920 :                 IF( Cy_diag_fx[i] )
    1977             :                 {
    1978     1875159 :                     IF( Cy_tilde_p_fx[i + ( i * lengthCy )] )
    1979             :                     {
    1980     1875117 :                         adj_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], Cy_tilde_p_fx[i + ( i * lengthCy )], &temp_e[i] );
    1981     1875117 :                         move32();
    1982     1875117 :                         temp_e[i] = add( temp_e[i], sub( Cy_diag_e, mat_mult_buffer2_e ) );
    1983     1875117 :                         move16();
    1984     1875117 :                         adj_fx[i] = Sqrt32( adj_fx[i], &temp_e[i] );
    1985     1875117 :                         move32();
    1986             :                     }
    1987             :                     ELSE
    1988             :                     {
    1989          42 :                         adj_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], EPSILON_FX_M, &temp_e[i] );
    1990          42 :                         move32();
    1991          42 :                         temp_e[i] = add( temp_e[i], sub( Cy_diag_e, EPSILON_FX_E ) );
    1992          42 :                         move16();
    1993          42 :                         adj_fx[i] = Sqrt32( adj_fx[i], &temp_e[i] );
    1994          42 :                         move32();
    1995             :                     }
    1996             :                 }
    1997             :                 ELSE
    1998             :                 {
    1999     1003761 :                     adj_fx[i] = 0;
    2000     1003761 :                     move32();
    2001     1003761 :                     temp_e[i] = 0;
    2002     1003761 :                     move16();
    2003             :                 }
    2004             :             }
    2005             : 
    2006     2878920 :             Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, temp_e[i] ) );
    2007     2878920 :             IF( GT_32( adj_fx[i], temp ) )
    2008             :             {
    2009        1547 :                 adj_fx[i] = MAX_32;
    2010        1547 :                 move32();
    2011        1547 :                 temp_e[i] = 2;
    2012        1547 :                 move16();
    2013             :             }
    2014             :         }
    2015      315180 :         adj_e = temp_e[0];
    2016      315180 :         move16();
    2017     2878920 :         FOR( i = 1; i < lengthCy; i++ )
    2018             :         {
    2019     2563740 :             adj_e = s_max( adj_e, temp_e[i] );
    2020             :         }
    2021     3194100 :         FOR( i = 0; i < lengthCy; i++ )
    2022             :         {
    2023     2878920 :             adj_fx[i] = L_shr_r( adj_fx[i], sub( adj_e, temp_e[i] ) ); // Q(31-adj_e)
    2024     2878920 :             move32();
    2025             :         }
    2026             : 
    2027      315180 :         diag_matrix_product_fx( adj_fx, adj_e, lengthCy, mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    2028             : 
    2029      315180 :         Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCy, lengthCx ) );
    2030      315180 :         *mixing_matrix_e = mat_mult_buffer3_e;
    2031      315180 :         move16();
    2032             :     }
    2033             : 
    2034      315180 :     pop_wmops();
    2035             : 
    2036      315180 :     return out;
    2037             : }

Generated by: LCOV version 1.14