LCOV - Coverage on main enc/dec/rend @ a878b04bf8eaca781b28fa146981883bca858fc4 - lib_dec/ivas_dirac_output_synthesis_cov

LCOV - code coverage report

Current view:	top level - lib_dec - ivas_dirac_output_synthesis_cov_fx.c (source / functions)		Hit	Total	Coverage
Test:	Coverage on main enc/dec/rend @ a878b04bf8eaca781b28fa146981883bca858fc4	Lines:	834	874	95.4 %
Date:	2025-07-05 03:03:01	Functions:	9	9	100.0 %

          Line data    Source code

       1             : /******************************************************************************************************
       2             : 
       3             :    (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
       4             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
       5             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
       6             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
       7             :    contributors to this repository. All Rights Reserved.
       8             : 
       9             :    This software is protected by copyright law and by international treaties.
      10             :    The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
      11             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
      12             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
      13             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
      14             :    contributors to this repository retain full ownership rights in their respective contributions in
      15             :    the software. This notice grants no license of any kind, including but not limited to patent
      16             :    license, nor is any license granted by implication, estoppel or otherwise.
      17             : 
      18             :    Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
      19             :    contributions.
      20             : 
      21             :    This software is provided "AS IS", without any express or implied warranties. The software is in the
      22             :    development stage. It is intended exclusively for experts who have experience with such software and
      23             :    solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
      24             :    and fitness for a particular purpose are hereby disclaimed and excluded.
      25             : 
      26             :    Any dispute, controversy or claim arising under or in relation to providing this software shall be
      27             :    submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
      28             :    accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
      29             :    the United Nations Convention on Contracts on the International Sales of Goods.
      30             : 
      31             : *******************************************************************************************************/
      32             : 
      33             : 
      34             : #include <stdint.h>
      35             : #include <string.h>
      36             : #include <stdio.h>
      37             : #include <stdlib.h>
      38             : #include <assert.h>
      39             : #include <math.h>
      40             : #include "options.h"
      41             : #include "cnst.h"
      42             : #include "rom_enc.h"
      43             : #include "rom_com.h"
      44             : #include "prot_fx.h"
      45             : #include "ivas_stat_dec.h"
      46             : #include "ivas_cnst.h"
      47             : #include "ivas_rom_com.h"
      48             : #include "ivas_rom_dec.h"
      49             : #include "wmc_auto.h"
      50             : #include "rom_dec.h"
      51             : #include "ivas_prot_fx.h"
      52             : 
      53             : 
      54             : /*-------------------------------------------------------------------*
      55             :  * ivas_dirac_dec_output_synthesis_cov_open()
      56             :  *
      57             :  * Sets up the state and parameters for the Covariance Synthesis
      58             :  *-------------------------------------------------------------------*/
      59             : 
      60         297 : ivas_error ivas_dirac_dec_output_synthesis_cov_open_fx(
      61             :     DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params,   /* i/o: handle for the covariance synthesis parameters                                                        */
      62             :     DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state, /* i/o: hanlde for the covariance synthesis state                                                             */
      63             :     const Word16 max_band_decorr,                                     /* i  : uppermost frequency band where decorrelation is applied                                               */
      64             :     const Word16 interp_length,                                       /* i  : length for interpolating the mixing matrices in time slots                                            */
      65             :     const Word16 num_param_bands,                                     /* i  : number of parameter bands                                                                             */
      66             :     const Word16 num_param_bands_residual,                            /* i  : number of parameter bands with a residual mixing matrix (i.e. decorrelation                           */
      67             :     const Word16 nchan_in,                                            /* i  : number of input (transport) channels                                                                  */
      68             :     const Word16 nchan_out,                                           /* i  : number of output channels                                                                             */
      69             :     const Word32 *proto_matrix                                        /* i  : the prototype (upmix) matrix (only used if mode == 1)  Q(15-proto_matrix_e)                                               */
      70             : )
      71             : {
      72             :     Word16 idx;
      73             : 
      74         297 :     h_dirac_output_synthesis_params->max_band_decorr = max_band_decorr;
      75         297 :     move16();
      76             : 
      77             :     /*-----------------------------------------------------------------*
      78             :      * memory allocation
      79             :      *-----------------------------------------------------------------*/
      80             : 
      81             :     /* buffer length and interpolator */
      82         297 :     h_dirac_output_synthesis_params->alpha_synthesis_fx = NULL;
      83         297 :     IF( ( h_dirac_output_synthesis_params->proto_matrix_fx = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
      84             :     {
      85           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
      86             :     }
      87         297 :     h_dirac_output_synthesis_params->proto_matrix_len = imult1616( nchan_out, nchan_in );
      88         297 :     move16();
      89             :     /* cov buffers */
      90        3803 :     FOR( idx = 0; idx < num_param_bands; idx++ )
      91             :     {
      92        3506 :         IF( ( h_dirac_output_synthesis_state->cx_old_fx[idx] = (Word32 *) malloc( nchan_in * nchan_in * sizeof( Word32 ) ) ) == NULL )
      93             :         {
      94           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
      95             :         }
      96        3506 :         h_dirac_output_synthesis_state->cx_old_len = imult1616( nchan_in, nchan_in );
      97        3506 :         IF( ( h_dirac_output_synthesis_state->cy_old_fx[idx] = (Word32 *) malloc( nchan_out * nchan_out * sizeof( Word32 ) ) ) == NULL )
      98             :         {
      99           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     100             :         }
     101        3506 :         IF( ( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
     102             :         {
     103           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     104             :         }
     105        3506 :         set_zero_fx( h_dirac_output_synthesis_state->cx_old_fx[idx], imult1616( nchan_in, nchan_in ) );
     106        3506 :         set_zero_fx( h_dirac_output_synthesis_state->cy_old_fx[idx], imult1616( nchan_out, nchan_out ) );
     107        3506 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx], imult1616( nchan_out, nchan_in ) );
     108             : 
     109        3506 :         IF( ( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
     110             :         {
     111           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     112             :         }
     113        3506 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_fx[idx], imult1616( nchan_out, nchan_in ) );
     114        3506 :         h_dirac_output_synthesis_state->mixing_matrix_len = i_mult( nchan_out, nchan_in );
     115        3506 :         move16();
     116             :     }
     117       14611 :     FOR( ; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
     118             :     {
     119       14314 :         h_dirac_output_synthesis_state->cx_old_fx[idx] = NULL;
     120       14314 :         h_dirac_output_synthesis_state->cy_old_fx[idx] = NULL;
     121       14314 :         h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = NULL;
     122       14314 :         h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = NULL;
     123             :     }
     124             : 
     125        3053 :     FOR( idx = 0; idx < num_param_bands_residual; idx++ )
     126             :     {
     127        2756 :         IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = (Word32 *) malloc( imult1616( nchan_out, nchan_out ) * sizeof( Word32 ) ) ) == NULL )
     128             :         {
     129           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     130             :         }
     131        2756 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx], imult1616( nchan_out, nchan_out ) );
     132             : 
     133        2756 :         IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = (Word32 *) malloc( imult1616( nchan_out, nchan_out ) * sizeof( Word32 ) ) ) == NULL )
     134             :         {
     135           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     136             :         }
     137        2756 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx], imult1616( nchan_out, nchan_out ) );
     138        2756 :         h_dirac_output_synthesis_state->mixing_matrix_res_len = i_mult( nchan_out, nchan_out );
     139        2756 :         move16();
     140             :     }
     141       15361 :     FOR( ; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
     142             :     {
     143       15064 :         h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = NULL;
     144       15064 :         h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = NULL;
     145             :     }
     146             : 
     147         297 :     IF( ( h_dirac_output_synthesis_state->cx_old_e = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     148             :     {
     149           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     150             :     }
     151         297 :     IF( ( h_dirac_output_synthesis_state->cy_old_e = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     152             :     {
     153           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     154             :     }
     155             : 
     156         297 :     set16_fx( h_dirac_output_synthesis_state->cx_old_e, 0, CLDFB_NO_CHANNELS_MAX );
     157         297 :     set16_fx( h_dirac_output_synthesis_state->cy_old_e, 0, CLDFB_NO_CHANNELS_MAX );
     158             : 
     159         297 :     IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     160             :     {
     161           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     162             :     }
     163         297 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_exp, 0, CLDFB_NO_CHANNELS_MAX );
     164             : 
     165         297 :     IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     166             :     {
     167           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     168             :     }
     169         297 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
     170             : 
     171         297 :     IF( ( h_dirac_output_synthesis_state->mixing_matrix_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     172             :     {
     173           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     174             :     }
     175         297 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_exp, 0, CLDFB_NO_CHANNELS_MAX );
     176             : 
     177         297 :     IF( ( h_dirac_output_synthesis_state->mixing_matrix_old_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     178             :     {
     179           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     180             :     }
     181         297 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
     182             : 
     183             :     /*-----------------------------------------------------------------*
     184             :      * prepare processing parameters
     185             :      *-----------------------------------------------------------------*/
     186             : 
     187             :     /* compute interpolator */
     188         297 :     IF( ( h_dirac_output_synthesis_params->interpolator_fx = (Word16 *) malloc( interp_length * sizeof( Word16 ) ) ) == NULL )
     189             :     {
     190           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     191             :     }
     192        9801 :     FOR( idx = 1; idx <= interp_length; ++idx )
     193             :     {
     194        9504 :         h_dirac_output_synthesis_params->interpolator_fx[idx - 1] = div_s( idx, interp_length );
     195        9504 :         move16();
     196             :     }
     197         297 :     Copy32( proto_matrix, h_dirac_output_synthesis_params->proto_matrix_fx, imult1616( nchan_in, nchan_out ) );
     198         297 :     h_dirac_output_synthesis_params->proto_matrix_e = 5;
     199         297 :     move16();
     200         297 :     return IVAS_ERR_OK;
     201             : }
     202             : 
     203             : 
     204             : /*-------------------------------------------------------------------*
     205             :  * ivas_dirac_dec_output_synthesis_get_interpolator_fx()
     206             :  *
     207             :  *
     208             :  *-------------------------------------------------------------------*/
     209             : 
     210      318405 : void ivas_dirac_dec_output_synthesis_get_interpolator_fx(
     211             :     DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params, /* i/o: handle for the covariance synthesis parameters  */
     212             :     const UWord16 interp_length                                     /* i  : interpolator length                             */
     213             : )
     214             : {
     215             :     Word16 idx;
     216      318405 :     Word16 tmp, exp_diff = 0;
     217      318405 :     move16();
     218             : 
     219     1583754 :     FOR( idx = 1; idx <= interp_length; ++idx )
     220             :     {
     221     1265349 :         tmp = BASOP_Util_Divide3232_Scale( L_deposit_l( idx ), L_deposit_l( interp_length ), &exp_diff ); // (Q15 - exp_diff)
     222     1265349 :         h_dirac_output_synthesis_params->interpolator_fx[idx - 1] = shl_sat( tmp, exp_diff );             // Q15
     223     1265349 :         move16();
     224             :     }
     225             : 
     226      318405 :     return;
     227             : }
     228             : 
     229             : 
     230             : /*-------------------------------------------------------------------*
     231             :  * ivas_dirac_dec_output_synthesis_cov_init()
     232             :  *
     233             :  * initialize the states for the covariance synthesis
     234             :  *-------------------------------------------------------------------*/
     235         297 : void ivas_dirac_dec_output_synthesis_cov_init_fx(
     236             :     DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state, /* i/o: pointer to the state of the covariance synthesis                            */
     237             :     const Word16 nchan_in,                                            /* i  : number of input (tranport) channels                                         */
     238             :     const Word16 nchan_out,                                           /* i  : number of output channels                                                   */
     239             :     const Word16 n_param_bands,                                       /* i  : number of total parameter bands                                             */
     240             :     const Word16 n_param_bands_res                                    /* i  : number of parameter bands with a residual mixing matrix (i.e. decorrelation */
     241             : )
     242             : {
     243             : 
     244             :     Word16 idx;
     245             : 
     246             :     /* initialize buffers */
     247        3803 :     FOR( idx = 0; idx < n_param_bands; idx++ )
     248             :     {
     249        3506 :         set_zero_fx( h_dirac_output_synthesis_state->cx_old_fx[idx], imult1616( nchan_in, nchan_in ) );
     250        3506 :         set_zero_fx( h_dirac_output_synthesis_state->cy_old_fx[idx], imult1616( nchan_out, nchan_out ) );
     251        3506 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx], imult1616( nchan_out, nchan_in ) );
     252        3506 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_fx[idx], imult1616( nchan_out, nchan_in ) );
     253             :     }
     254             : 
     255        3053 :     FOR( idx = 0; idx < n_param_bands_res; idx++ )
     256             :     {
     257        2756 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx], imult1616( nchan_out, nchan_out ) );
     258        2756 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx], imult1616( nchan_out, nchan_out ) );
     259             :     }
     260             : 
     261             : 
     262         297 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
     263         297 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_exp, 0, CLDFB_NO_CHANNELS_MAX );
     264         297 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
     265         297 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_exp, 0, CLDFB_NO_CHANNELS_MAX );
     266             : 
     267         297 :     return;
     268             : }
     269             : 
     270             : 
     271             : /*-------------------------------------------------------------------*
     272             :  * ivas_dirac_dec_output_synthesis_cov_close()
     273             :  *
     274             :  * deallocate dynamic memory in the covariance synthesis state
     275             :  *-------------------------------------------------------------------*/
     276             : 
     277         297 : void ivas_dirac_dec_output_synthesis_cov_close_fx(
     278             :     DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params,  /* i  : handle for the covariance synthesis parameters  */
     279             :     DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state /* i/o: handle for the covariance synthesis state       */
     280             : )
     281             : {
     282             :     Word16 idx;
     283             : 
     284             :     /*-----------------------------------------------------------------*
     285             :      * memory deallocation
     286             :      *-----------------------------------------------------------------*/
     287             : 
     288             :     /* free interpolator */
     289         297 :     IF( h_dirac_output_synthesis_params->interpolator_fx != NULL )
     290             :     {
     291         297 :         free( h_dirac_output_synthesis_params->interpolator_fx );
     292         297 :         h_dirac_output_synthesis_params->interpolator_fx = NULL;
     293             :     }
     294             : 
     295             :     /* free alpha */
     296         297 :     IF( h_dirac_output_synthesis_params->alpha_synthesis_fx != NULL )
     297             :     {
     298           0 :         free( h_dirac_output_synthesis_params->alpha_synthesis_fx );
     299           0 :         h_dirac_output_synthesis_params->alpha_synthesis_fx = NULL;
     300             :     }
     301             : 
     302             :     /* free proto_matrix */
     303         297 :     IF( h_dirac_output_synthesis_params->proto_matrix_fx != NULL )
     304             :     {
     305         297 :         free( h_dirac_output_synthesis_params->proto_matrix_fx );
     306         297 :         h_dirac_output_synthesis_params->proto_matrix_fx = NULL;
     307             :     }
     308             : 
     309             : 
     310         297 :     IF( h_dirac_output_synthesis_state->cx_old_e != NULL )
     311             :     {
     312         297 :         free( h_dirac_output_synthesis_state->cx_old_e );
     313         297 :         h_dirac_output_synthesis_state->cx_old_e = NULL;
     314             :     }
     315         297 :     IF( h_dirac_output_synthesis_state->cy_old_e != NULL )
     316             :     {
     317         297 :         free( h_dirac_output_synthesis_state->cy_old_e );
     318         297 :         h_dirac_output_synthesis_state->cy_old_e = NULL;
     319             :     }
     320             : 
     321             :     /* free cov buffers */
     322       18117 :     FOR( idx = 0; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
     323             :     {
     324       17820 :         IF( h_dirac_output_synthesis_state->cx_old_fx[idx] != NULL )
     325             :         {
     326        3506 :             free( h_dirac_output_synthesis_state->cx_old_fx[idx] );
     327        3506 :             h_dirac_output_synthesis_state->cx_old_fx[idx] = NULL;
     328             :         }
     329             : 
     330       17820 :         IF( h_dirac_output_synthesis_state->cy_old_fx[idx] != NULL )
     331             :         {
     332        3506 :             free( h_dirac_output_synthesis_state->cy_old_fx[idx] );
     333        3506 :             h_dirac_output_synthesis_state->cy_old_fx[idx] = NULL;
     334             :         }
     335             : 
     336       17820 :         IF( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] != NULL )
     337             :         {
     338        3506 :             free( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] );
     339        3506 :             h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = NULL;
     340             :         }
     341             : 
     342       17820 :         IF( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] != NULL )
     343             :         {
     344        2756 :             free( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] );
     345        2756 :             h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = NULL;
     346             :         }
     347             : 
     348       17820 :         IF( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] != NULL )
     349             :         {
     350        3506 :             free( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] );
     351        3506 :             h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = NULL;
     352             :         }
     353             : 
     354       17820 :         IF( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] != NULL )
     355             :         {
     356        2756 :             free( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] );
     357        2756 :             h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = NULL;
     358             :         }
     359             :     }
     360             : 
     361         297 :     IF( h_dirac_output_synthesis_state->mixing_matrix_old_exp != NULL )
     362             :     {
     363         297 :         free( h_dirac_output_synthesis_state->mixing_matrix_old_exp );
     364         297 :         h_dirac_output_synthesis_state->mixing_matrix_old_exp = NULL;
     365             :     }
     366             : 
     367         297 :     IF( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp != NULL )
     368             :     {
     369         297 :         free( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp );
     370         297 :         h_dirac_output_synthesis_state->mixing_matrix_res_old_exp = NULL;
     371             :     }
     372             : 
     373         297 :     IF( h_dirac_output_synthesis_state->mixing_matrix_exp != NULL )
     374             :     {
     375         297 :         free( h_dirac_output_synthesis_state->mixing_matrix_exp );
     376         297 :         h_dirac_output_synthesis_state->mixing_matrix_exp = NULL;
     377             :     }
     378             : 
     379         297 :     IF( h_dirac_output_synthesis_state->mixing_matrix_res_exp != NULL )
     380             :     {
     381         297 :         free( h_dirac_output_synthesis_state->mixing_matrix_res_exp );
     382         297 :         h_dirac_output_synthesis_state->mixing_matrix_res_exp = NULL;
     383             :     }
     384         297 :     return;
     385             : }
     386             : 
     387             : /*-------------------------------------------------------------------*
     388             :  * ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot()
     389             :  *
     390             :  * collect the multi channel input covariance for one filter bank time slot
     391             :  *-------------------------------------------------------------------*/
     392             : 
     393             : /*-------------------------------------------------------------------*
     394             :  * ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot()
     395             :  *
     396             :  * collect the multi channel input covariance for one filter bank time slot
     397             :  *-------------------------------------------------------------------*/
     398     1776450 : void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx(
     399             :     Word32 *RealBuffer_fx,                                                          /* i  : input channel filter bank samples (real part)          Q(31- RealBuffer_e)*/
     400             :     Word16 RealBuffer_e,                                                            /* i  : exponent input channel filter bank samples (real part)*/
     401             :     Word32 *ImagBuffer_fx,                                                          /* i  : input channel filter bank samples (imaginary part      Q(ImagBuffer_e)*/
     402             :     Word16 ImagBuffer_e,                                                            /* i  : exponent input channel filter bank samples (real part)*/
     403             :     Word32 cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS],      /* o  : accumulated input covariance (real part)              Q(31- cx_e)*/
     404             :     Word16 *cx_e,                                                                   /* i  : exponent for accumulated input covariance (real part) */
     405             :     Word32 cx_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS], /* o  : accumulated input covariance (imaginary part)         Q(31- cx_imag_e)*/
     406             :     Word16 *cx_imag_e,                                                              /* i : exponent accumulated input covariance (imag part)      */
     407             :     PARAM_MC_DEC_HANDLE hParamMC,                                                   /* i  : handle to Parametric MC state                         */
     408             :     const Word16 param_band,                                                        /* i  : parameter band                                        */
     409             :     const Word16 nchan_in                                                           /* i  : number of input channels                              */
     410             : )
     411             : {
     412             :     Word16 band_idx, ch_idx;
     413             :     Word16 brange[2];
     414             :     Word32 real_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS];
     415             :     Word16 real_in_e;
     416             :     Word32 imag_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS];
     417             :     Word16 imag_in_e;
     418             :     Word32 real_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
     419             :     Word32 imag_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
     420             :     Word16 output_e;
     421             :     Word16 tmp1_e, tmp2_e, shift_imag, shift_real;
     422             :     Word16 band, num_bands;
     423             :     Word16 cx_fx_norm, cx_imag_fx_norm;
     424             :     /* estimate input covariance */
     425             :     /* Already stack here instead of in the process_subframe */
     426             : 
     427             :     /* collect input frame */
     428     1776450 :     brange[0] = hParamMC->band_grouping[param_band];
     429     1776450 :     move16();
     430     1776450 :     brange[1] = hParamMC->band_grouping[param_band + 1];
     431     1776450 :     move16();
     432     1776450 :     num_bands = sub( brange[1], brange[0] );
     433             : 
     434     9379450 :     FOR( band_idx = 0; band_idx < num_bands; band_idx++ )
     435             :     {
     436     7603000 :         band = add( brange[0], band_idx );
     437    22843320 :         FOR( ch_idx = 0; ch_idx < nchan_in; ch_idx++ )
     438             :         {
     439    15240320 :             real_in_buffer_fx[band_idx + num_bands * ch_idx] = RealBuffer_fx[band + hParamMC->num_freq_bands * ch_idx];
     440    15240320 :             move32();
     441    15240320 :             imag_in_buffer_fx[band_idx + num_bands * ch_idx] = ImagBuffer_fx[band + hParamMC->num_freq_bands * ch_idx];
     442    15240320 :             move32();
     443             :         }
     444             :     }
     445             : 
     446     1776450 :     real_in_e = RealBuffer_e;
     447     1776450 :     move16();
     448     1776450 :     imag_in_e = ImagBuffer_e;
     449     1776450 :     move16();
     450             : 
     451     1776450 :     Word16 buf_len = imult1616( num_bands, nchan_in );
     452             : 
     453     1776450 :     shift_real = sub( L_norm_arr( real_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
     454     1776450 :     shift_imag = sub( L_norm_arr( imag_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
     455             : 
     456     1776450 :     real_in_e = sub( real_in_e, shift_real );
     457     1776450 :     imag_in_e = sub( imag_in_e, shift_imag );
     458             : 
     459             : 
     460     1776450 :     output_e = s_max( real_in_e, imag_in_e );
     461             : 
     462     1776450 :     scale_sig32( real_in_buffer_fx, buf_len, sub( RealBuffer_e, output_e ) );
     463     1776450 :     scale_sig32( imag_in_buffer_fx, buf_len, sub( ImagBuffer_e, output_e ) );
     464             : 
     465     1776450 :     cmplx_matrix_square_fx( real_in_buffer_fx, imag_in_buffer_fx, num_bands, nchan_in, real_buffer_fx, imag_buffer_fx, output_e, &output_e );
     466     1776450 :     v_add_fixed_me( cx_fx, *cx_e, real_buffer_fx, output_e, cx_fx, &tmp1_e, imult1616( nchan_in, nchan_in ), 1 );
     467             : 
     468     1776450 :     v_add_fixed_me( cx_imag_fx, *cx_imag_e, imag_buffer_fx, output_e, cx_imag_fx, &tmp2_e, imult1616( nchan_in, nchan_in ), 1 );
     469             : 
     470     1776450 :     cx_fx_norm = L_norm_arr( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
     471     1776450 :     cx_imag_fx_norm = L_norm_arr( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
     472             : 
     473     1776450 :     scale_sig32( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_fx_norm );
     474     1776450 :     scale_sig32( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_imag_fx_norm );
     475             : 
     476     1776450 :     *cx_e = sub( tmp1_e, cx_fx_norm );
     477     1776450 :     move16();
     478     1776450 :     *cx_imag_e = sub( tmp2_e, cx_imag_fx_norm );
     479     1776450 :     move16();
     480             : 
     481     1776450 :     return;
     482             : }
     483             : 
     484             : /*-------------------------------------------------------------------*
     485             :  * ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot()
     486             :  *
     487             :  * synthesize one filter bank slot of multi channel output filter bank
     488             :  * samples with the covariance synthesis
     489             :  *-------------------------------------------------------------------*/
     490             : 
     491      140160 : void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(
     492             :     Word32 *Cldfb_RealBuffer_in_fx,                                                   /*Q6*/
     493             :     Word32 *Cldfb_ImagBuffer_in_fx,                                                   /*Q6*/
     494             :     Word32 Cldfb_RealBuffer_fx[][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX], /* o  : output channel filter bank samples (real part)                   Q6*/
     495             :     Word32 Cldfb_ImagBuffer_fx[][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX], /* o  : output channel filter bank samples (imaginary part)              Q6*/
     496             :     Word32 *mixing_matrix_fx[],                                                       /* i  : parameter band wise mixing matrices (direct part)                Q(31-mixing_matrix_e)*/
     497             :     Word16 *mixing_matrix_e,                                                          /* i  : parameter band wise mixing matrices (direct part)                */
     498             :     Word32 *mixing_matrix_res_fx[],                                                   /* i  : parameter band wise mixing matrices (residual part)              Q(31-mixing_matrix_res_e)*/
     499             :     Word16 *mixing_matrix_res_e,                                                      /* i  : parameter band wise mixing matrices (residual part)              */
     500             :     const UWord16 slot_idx_sfr,                                                       /* i  : time slot index for the current slot within the current subframe */
     501             :     const UWord16 slot_idx_tot,                                                       /* i  : time slot index for the current slot within the frame            */
     502             :     const Word16 nX,                                                                  /* i  : number of input channels                                         */
     503             :     const Word16 nY,                                                                  /* i  : number of output channels                                        */
     504             :     PARAM_MC_DEC_HANDLE hParamMC                                                      /* i  : handle to the Parametric MC decoder state                        */
     505             : )
     506             : {
     507             :     Word16 param_band_idx, band, ch_idx;
     508             :     Word16 have_residual;
     509             :     Word16 brange[2];
     510             :     DIRAC_OUTPUT_SYNTHESIS_COV_STATE h_synthesis_state;
     511             :     Word32 mixing_matrix_smooth_fx[MAX_CICP_CHANNELS * PARAM_MC_MAX_TRANSPORT_CHANS];
     512             :     Word16 mixing_matrix_smooth_e;
     513             :     Word32 mixing_matrix_res_smooth_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
     514      140160 :     Word16 mixing_matrix_res_smooth_e = 0;
     515      140160 :     move16();
     516             :     Word32 mixing_matrix_buffer_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
     517             :     Word16 mixing_matrix_buffer_e;
     518             :     Word32 input_f_real_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
     519             :     Word32 input_f_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
     520             : 
     521             :     Word32 diff_f_real_fx[MAX_CICP_CHANNELS];
     522             :     Word32 diff_f_imag_fx[MAX_CICP_CHANNELS];
     523             : 
     524      140160 :     h_synthesis_state = hParamMC->h_output_synthesis_cov_state;
     525             : 
     526      140160 :     set_zero_fx( input_f_real_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
     527      140160 :     set_zero_fx( input_f_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
     528             : 
     529      140160 :     set_zero_fx( diff_f_real_fx, MAX_CICP_CHANNELS );
     530      140160 :     set_zero_fx( diff_f_imag_fx, MAX_CICP_CHANNELS );
     531             : 
     532     1950080 :     FOR( param_band_idx = 0; param_band_idx < hParamMC->num_param_bands_synth; param_band_idx++ )
     533             :     {
     534             :         /* final mixing */
     535     1809920 :         have_residual = 0;
     536     1809920 :         move16();
     537     1809920 :         brange[0] = hParamMC->band_grouping[param_band_idx];
     538     1809920 :         move16();
     539     1809920 :         brange[1] = hParamMC->band_grouping[( param_band_idx + 1 )];
     540     1809920 :         move16();
     541             : 
     542     1809920 :         if ( LT_16( brange[0], hParamMC->h_output_synthesis_params.max_band_decorr ) )
     543             :         {
     544     1454400 :             have_residual = 1;
     545     1454400 :             move16();
     546             :         }
     547             : 
     548     1809920 :         v_multc_fixed( mixing_matrix_fx[param_band_idx], L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ), mixing_matrix_smooth_fx, imult1616( nY, nX ) );
     549     1809920 :         mixing_matrix_smooth_e = mixing_matrix_e[param_band_idx]; // interpolator is W16
     550     1809920 :         move16();
     551             : 
     552     1809920 :         v_multc_fixed( h_synthesis_state.mixing_matrix_old_fx[param_band_idx], L_sub( ONE_IN_Q31, L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ) ), mixing_matrix_buffer_fx, imult1616( nY, nX ) );
     553     1809920 :         mixing_matrix_buffer_e = h_synthesis_state.mixing_matrix_old_exp[param_band_idx]; // interpolator is W16
     554     1809920 :         move16();
     555             : 
     556     1809920 :         v_add_fixed_me( mixing_matrix_smooth_fx, mixing_matrix_smooth_e, mixing_matrix_buffer_fx, mixing_matrix_buffer_e, mixing_matrix_smooth_fx, &mixing_matrix_smooth_e, imult1616( nY, nX ), 0 );
     557             : 
     558     1809920 :         IF( have_residual )
     559             :         {
     560             :             /* residual mixing matrix interpolation*/
     561             : 
     562     1454400 :             v_multc_fixed( mixing_matrix_res_fx[param_band_idx], L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ), mixing_matrix_res_smooth_fx, imult1616( nY, nY ) );
     563     1454400 :             mixing_matrix_res_smooth_e = mixing_matrix_res_e[param_band_idx]; // interpolator is W16
     564     1454400 :             move16();
     565             : 
     566     1454400 :             set_zero_fx( mixing_matrix_buffer_fx, imult1616( nY, nY ) );
     567     1454400 :             v_multc_fixed( h_synthesis_state.mixing_matrix_res_old_fx[param_band_idx], L_sub( ONE_IN_Q31, L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ) ), mixing_matrix_buffer_fx, imult1616( nY, nY ) );
     568     1454400 :             mixing_matrix_buffer_e = h_synthesis_state.mixing_matrix_res_old_exp[param_band_idx]; // interpolator is W16
     569     1454400 :             move16();
     570             : 
     571     1454400 :             v_add_fixed_me( mixing_matrix_res_smooth_fx, mixing_matrix_res_smooth_e, mixing_matrix_buffer_fx, mixing_matrix_buffer_e, mixing_matrix_res_smooth_fx, &mixing_matrix_res_smooth_e, imult1616( nY, nY ), 0 );
     572             :         }
     573             : 
     574             : 
     575     9553920 :         FOR( band = brange[0]; band < brange[1]; band++ )
     576             :         {
     577     7744000 :             assert( band >= 0 );
     578             : 
     579     7744000 :             IF( have_residual )
     580             :             {
     581             :                 /* collect diffuse prototypes */
     582     2803200 :                 assert( LT_16( band, hParamMC->h_output_synthesis_params.max_band_decorr ) );
     583    19833600 :                 FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
     584             :                 {
     585    17030400 :                     diff_f_real_fx[ch_idx] = Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band];
     586    17030400 :                     move32();
     587    17030400 :                     diff_f_imag_fx[ch_idx] = Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band];
     588    17030400 :                     move32();
     589             :                 }
     590             : 
     591             :                 /* apply residual mixing */
     592             :                 {
     593             :                     Word16 shifter;
     594             : #ifdef OPT_SBA_DEC_V2_NBE
     595     2803200 :                     shifter = sub( mixing_matrix_res_smooth_e, 32 );
     596             : #else /* OPT_SBA_DEC_V2_NBE */
     597             : #ifdef OPT_SBA_DEC_V2_BE
     598             :                     shifter = sub( mixing_matrix_res_smooth_e, 31 );
     599             : #else  /* OPT_SBA_DEC_V2_BE */
     600             :                     shifter = 31 - mixing_matrix_res_smooth_e;
     601             : #endif /* OPT_SBA_DEC_V2_NBE */
     602             : #endif /* OPT_SBA_DEC_V2_BE */
     603    19833600 :                     FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
     604             :                     {
     605             :                         int i;
     606             :                         Word16 idx;
     607             :                         Word64 temp_real, temp_imag;
     608             : 
     609             : 
     610    17030400 :                         idx = ch_idx;
     611    17030400 :                         temp_real = 0;
     612    17030400 :                         temp_imag = 0;
     613    17030400 :                         move64();
     614    17030400 :                         move64();
     615   121209600 :                         for ( i = 0; i < nY; i++ )
     616             :                         {
     617             : #ifdef OPT_SBA_DEC_V2_NBE
     618   104179200 :                             temp_real = W_mac_32_32( temp_real, mixing_matrix_res_smooth_fx[idx], diff_f_real_fx[i] );
     619   104179200 :                             temp_imag = W_mac_32_32( temp_imag, mixing_matrix_res_smooth_fx[idx], diff_f_imag_fx[i] );
     620             : #else  /* OPT_SBA_DEC_V2_NBE */
     621             :                             temp_real = W_add( temp_real, W_mult0_32_32( mixing_matrix_res_smooth_fx[idx], diff_f_real_fx[i] ) );
     622             :                             temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_res_smooth_fx[idx], diff_f_imag_fx[i] ) );
     623             : #endif /* OPT_SBA_DEC_V2_NBE */
     624   104179200 :                             idx += nY;
     625             :                         }
     626             : #ifdef OPT_SBA_DEC_V2_BE
     627    17030400 :                         Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = W_shl_sat_l( temp_real, shifter );
     628    17030400 :                         Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = W_shl_sat_l( temp_imag, shifter );
     629             : #else  /* OPT_SBA_DEC_V2_BE */
     630             :                         Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = W_extract_l( W_shr( temp_real, shifter ) );
     631             :                         Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = W_extract_l( W_shr( temp_imag, shifter ) );
     632             : #endif /* OPT_SBA_DEC_V2_BE */
     633             :                     }
     634             :                 }
     635             :             }
     636             :             ELSE
     637             :             {
     638    34969600 :                 FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
     639             :                 {
     640    30028800 :                     Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = 0;
     641    30028800 :                     move32();
     642    30028800 :                     Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = 0;
     643    30028800 :                     move32();
     644             :                 }
     645             :             }
     646             : 
     647             :             /* collect input signals, still in cldfb buffers */
     648    23270400 :             FOR( ch_idx = 0; ch_idx < nX; ch_idx++ )
     649             :             {
     650             : 
     651    15526400 :                 input_f_real_fx[ch_idx] = Cldfb_RealBuffer_in_fx[ch_idx * hParamMC->num_freq_bands + band]; // Q6
     652    15526400 :                 move32();
     653    15526400 :                 input_f_imag_fx[ch_idx] = Cldfb_ImagBuffer_in_fx[ch_idx * hParamMC->num_freq_bands + band]; // Q6
     654    15526400 :                 move32();
     655             :             }
     656             : 
     657             :             /* apply mixing matrix */
     658             :             {
     659             :                 Word16 shifter;
     660     7744000 :                 shifter = 31 - mixing_matrix_smooth_e;
     661             : 
     662    54803200 :                 FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
     663             :                 {
     664             :                     int i;
     665             :                     Word16 idx;
     666             :                     Word64 temp_real, temp_imag;
     667             : 
     668             : 
     669    47059200 :                     idx = ch_idx;
     670    47059200 :                     temp_real = 0;
     671    47059200 :                     temp_imag = 0;
     672    47059200 :                     move64();
     673    47059200 :                     move64();
     674   141638400 :                     for ( i = 0; i < nX; i++ )
     675             :                     {
     676    94579200 :                         temp_real = W_add( temp_real, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_real_fx[i] ) );
     677    94579200 :                         temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_imag_fx[i] ) );
     678    94579200 :                         idx += nY;
     679             :                     }
     680    47059200 :                     Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_real, shifter ) ) );
     681    47059200 :                     move32();
     682    47059200 :                     Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_imag, shifter ) ) );
     683    47059200 :                     move32();
     684             :                 }
     685             :             }
     686             :         }
     687             :     }
     688             : 
     689      140160 :     return;
     690             : }
     691             : 
     692             : /*-------------------------------------------------------------------*
     693             :  * computeMixingMatrices()
     694             :  *
     695             :  * compute a mixing matrix using the convariance synthesis approach
     696             :  *-------------------------------------------------------------------*/
     697             : 
     698      113120 : Word16 computeMixingMatrices_fx(
     699             :     const Word16 num_inputs,  /* i  : number of input channels                                                                                      */
     700             :     const Word16 num_outputs, /* i  : number of output channels                                                                                     */
     701             :     const Word32 *Cx,         /* i  : input channel covariance matrix Q(31-Cx_e)                                                                               */
     702             :     Word16 Cx_e,
     703             :     const Word32 *Cy, /* i  : target covariance matrix Q(31-Cy_e)                                                                               */
     704             :     Word16 Cy_e,
     705             :     const Word32 *Q, /* i  : prototype matrix (usually a upmix matrix) Q_fx_e                                                                     */
     706             :     Word16 Q_fx_e,
     707             :     const Word16 energy_compensation_flag, /* i  : flag indicating that the energy compensation should be performed (i.e. no residual mixing matrix will follow) */
     708             :     const Word32 reg_Sx_fx,                /* i  : regularization factor for the input channel singular values                                                   */
     709             :     Word16 reg_Sx_e,
     710             :     const Word32 reg_ghat_fx, /* i  : regularization factor for the normalization matrix  Q(31-reg_ghat_e)                                                         */
     711             :     Word16 reg_ghat_e,
     712             :     Word32 *mixing_matrix_fx, /* o  : resulting mixing matrix       Q(31-mixing_matrix_out_e)                                                                                 */
     713             :     Word16 *mixing_matrix_out_e,
     714             :     Word32 *Cr_fx, /* o  : residual covariance matrix Q(31-Cr_e)                                                                                    */
     715             :     Word16 *Cr_e )
     716             : {
     717             :     Word16 i, j;
     718             :     Word16 out;
     719             :     Word16 nL, nC;
     720      113120 :     Word16 lengthCx = num_inputs;
     721      113120 :     Word16 lengthCy = num_outputs;
     722             :     Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
     723             :     Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     724             :     Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     725             :     Word32 Cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
     726             :     Word32 Cy_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
     727             :     Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
     728             :     Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
     729             :     Word16 Cx_fx_e;
     730             :     Word16 Cy_fx_e;
     731             : 
     732             :     Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS];
     733             :     Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
     734             : 
     735             : 
     736             :     Word32 limit_fx;
     737             :     Word16 limit_e;
     738             : 
     739             :     Word32 L_tmp;
     740             :     Word16 tmp_e, tmp, exp;
     741             : 
     742             :     Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     743             :     Word32 Kx_fx[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
     744             : 
     745             :     Word16 Kx_fx_e[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
     746             :     Word16 Ky_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     747             : 
     748             :     Word32 Kx_reg_inv_fx[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
     749             :     Word16 Kx_reg_inv_e[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
     750             : 
     751             :     Word32 Q_fx[PARAM_MC_MAX_TRANSPORT_CHANS * MAX_CICP_CHANNELS];
     752             :     Word16 Q_e;
     753             : 
     754             :     Word32 Q_Cx_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     755             :     Word16 Q_Cx_e;
     756             : 
     757             :     Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
     758             :     Word16 Cy_hat_diag_e;
     759             :     Word16 Cy_hat_diag_buff_e[MAX_OUTPUT_CHANNELS];
     760             :     Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
     761             :     Word16 G_hat_buff_e[MAX_OUTPUT_CHANNELS];
     762             : 
     763             :     Word16 mat_mult_buffer2_e, mat_mult_buffer3_e;
     764             : 
     765             :     Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     766             : 
     767             :     Word16 mixing_matrix_e;
     768             : 
     769             :     Word16 Cr_fx_e;
     770             : 
     771             : 
     772             :     Word32 adj_fx[MAX_OUTPUT_CHANNELS];
     773             :     Word16 adj_e[MAX_OUTPUT_CHANNELS];
     774             :     Word32 *adj_fx_p;
     775             :     Word16 adj_fx_e;
     776             : 
     777             :     Word32 *Cr_p_fx, *Cy_tilde_p_fx, *Cy_p_fx;
     778      113120 :     push_wmops( "dirac_cov_mix_mat" );
     779             : 
     780      113120 :     out = EXIT_SUCCESS;
     781      113120 :     move16();
     782             : 
     783      113120 :     set32_fx( svd_s_buffer_fx, 0, MAX_OUTPUT_CHANNELS );
     784     1923040 :     FOR( i = 0; i < MAX_OUTPUT_CHANNELS; i++ )
     785             :     {
     786     1809920 :         set32_fx( svd_in_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
     787     1809920 :         set32_fx( svd_u_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
     788     1809920 :         set32_fx( svd_v_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
     789             :     }
     790             : 
     791             : 
     792      113120 :     Copy32( Q, Q_fx, imult1616( lengthCy, lengthCx ) );
     793      113120 :     Copy32( Cx, Cx_fx, imult1616( lengthCx, lengthCx ) );
     794      113120 :     Copy32( Cy, Cy_fx, imult1616( lengthCy, lengthCy ) );
     795             : 
     796      113120 :     Q_e = Q_fx_e;
     797      113120 :     move16();
     798      113120 :     Cx_fx_e = Cx_e;
     799      113120 :     move16();
     800      113120 :     Cy_fx_e = Cy_e;
     801      113120 :     move16();
     802             : 
     803             : 
     804             :     /*-----------------------------------------------------------------*
     805             :      * Decomposition of Cy
     806             :      *-----------------------------------------------------------------*/
     807             : 
     808             :     /* Processing the SVD */
     809             : 
     810      113120 :     mat2svdMat_fx( Cy_fx, svd_in_buffer_fx, lengthCy, lengthCy, 0 );
     811             : 
     812      113120 :     svd_fx( svd_in_buffer_fx, Cy_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCy, lengthCy );
     813      113120 :     Word16 max_e = -32;
     814             :     /* Computing Ky */
     815      688444 :     FOR( i = 0; i < lengthCy; ++i )
     816             :     {
     817     3533968 :         FOR( j = 0; j < lengthCy; ++j )
     818             :         {
     819     2958644 :             tmp_e = svd_s_buffer_e[j];
     820     2958644 :             move16();
     821     2958644 :             L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
     822     2958644 :             Ky_fx[i + ( j * lengthCy )] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
     823     2958644 :             move32();
     824     2958644 :             Ky_fx_e[i + ( j * lengthCy )] = tmp_e;
     825     2958644 :             move16();
     826     2958644 :             max_e = s_max( max_e, tmp_e );
     827             :         }
     828             :     }
     829     3071764 :     FOR( i = 0; i < lengthCy * lengthCy; ++i )
     830             :     {
     831     2958644 :         Ky_fx[i] = L_shr( Ky_fx[i], sub( max_e, Ky_fx_e[i] ) );
     832     2958644 :         move32();
     833     2958644 :         Ky_fx_e[i] = max_e;
     834     2958644 :         move16();
     835             :     }
     836             : 
     837             :     /*-----------------------------------------------------------------*
     838             :      * Decomposition of Cx
     839             :      *-----------------------------------------------------------------*/
     840             : 
     841             :     /* Processing the SVD */
     842             : 
     843      113120 :     mat2svdMat_fx( Cx_fx, svd_in_buffer_fx, lengthCx, lengthCx, 0 );
     844             : 
     845      113120 :     svd_fx( svd_in_buffer_fx, Cx_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCx, lengthCx );
     846      113120 :     max_e = -32;
     847      339920 :     FOR( i = 0; i < lengthCx; ++i )
     848             :     {
     849      682080 :         FOR( j = 0; j < lengthCx; ++j )
     850             :         {
     851      455280 :             tmp_e = svd_s_buffer_e[j];
     852      455280 :             move16();
     853      455280 :             L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
     854      455280 :             Kx_fx[( i + ( j * lengthCx ) )] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
     855      455280 :             move32();
     856      455280 :             Kx_fx_e[( i + ( j * lengthCx ) )] = tmp_e;
     857      455280 :             move16();
     858      455280 :             max_e = s_max( max_e, tmp_e );
     859             :         }
     860             :     }
     861      568400 :     FOR( i = 0; i < lengthCx * lengthCx; ++i )
     862             :     {
     863      455280 :         Kx_fx[i] = L_shr( Kx_fx[i], sub( max_e, Kx_fx_e[i] ) );
     864      455280 :         move32();
     865      455280 :         Kx_fx_e[i] = max_e;
     866      455280 :         move16();
     867             :     }
     868             : 
     869      339920 :     FOR( i = 0; i < lengthCx; ++i )
     870             :     {
     871      226800 :         tmp_e = svd_s_buffer_e[i];
     872      226800 :         move16();
     873      226800 :         svd_s_buffer_fx[i] = Sqrt32( svd_s_buffer_fx[i], &tmp_e ); // Q(31-tmp_e)
     874      226800 :         move32();
     875      226800 :         svd_s_buffer_e[i] = tmp_e;
     876      226800 :         move16();
     877             :     }
     878             : 
     879             :     /*-----------------------------------------------------------------*
     880             :      * Regularization of Sx
     881             :      *-----------------------------------------------------------------*/
     882             : 
     883      113120 :     limit_fx = svd_s_buffer_fx[0];
     884      113120 :     move32();
     885      113120 :     limit_e = svd_s_buffer_e[0];
     886      113120 :     move16();
     887      226800 :     FOR( i = 1; i < lengthCx; i++ )
     888             :     {
     889             : #ifdef OPT_MCH_DEC_V1_NBE
     890      113680 :         IF( GT_32( svd_s_buffer_fx[i], L_shl_sat( limit_fx, sub( limit_e, svd_s_buffer_e[i] ) ) ) )
     891             : #else  /* OPT_MCH_DEC_V1_NBE */
     892             :         IF( BASOP_Util_Cmp_Mant32Exp( svd_s_buffer_fx[i], svd_s_buffer_e[i], limit_fx, limit_e ) > 0 )
     893             : #endif /* OPT_MCH_DEC_V1_NBE */
     894             :         {
     895           0 :             limit_fx = svd_s_buffer_fx[i];
     896           0 :             move32();
     897           0 :             limit_e = svd_s_buffer_e[i];
     898           0 :             move16();
     899             :         }
     900             :     }
     901             : 
     902      113120 :     limit_e = add( limit_e, reg_Sx_e );
     903             : 
     904             : #ifdef OPT_MCH_DEC_V1_BE
     905      113120 :     limit_fx = Madd_32_32( EPSILON_FX, limit_fx, reg_Sx_fx );
     906             : #else  /* OPT_MCH_DEC_V1_BE */
     907             :     L_tmp = Mpy_32_32( limit_fx, reg_Sx_fx );
     908             :     limit_fx = L_add( L_tmp, EPSILON_FX );
     909             : #endif /* OPT_MCH_DEC_V1_BE */
     910             : 
     911      339920 :     FOR( i = 0; i < lengthCx; ++i )
     912             :     {
     913             : #ifdef OPT_MCH_DEC_V1_NBE
     914      226800 :         IF( LT_32( L_shl_sat( svd_s_buffer_fx[i], sub( svd_s_buffer_e[i], limit_e ) ), limit_fx ) )
     915             : #else  /* OPT_MCH_DEC_V1_NBE */
     916             :         IF( BASOP_Util_Cmp_Mant32Exp( svd_s_buffer_fx[i], svd_s_buffer_e[i], limit_fx, limit_e ) < 0 )
     917             : #endif /* OPT_MCH_DEC_V1_NBE */
     918             :         {
     919       51307 :             svd_s_buffer_fx[i] = limit_fx;
     920       51307 :             move32();
     921       51307 :             svd_s_buffer_e[i] = limit_e;
     922       51307 :             move16();
     923             :         }
     924             :     }
     925             : 
     926             : 
     927      113120 :     limit_fx = 0;
     928      113120 :     move32();
     929      113120 :     limit_e = 0;
     930      113120 :     move16();
     931             : 
     932             : 
     933             :     /*-----------------------------------------------------------------*
     934             :      * regularized Kx-1
     935             :      *-----------------------------------------------------------------*/
     936             : 
     937      339920 :     FOR( i = 0; i < lengthCx; ++i )
     938             :     {
     939             :         Word16 scale, reg_fac_fx;
     940      226800 :         reg_fac_fx = BASOP_Util_Divide3232_Scale( 1, svd_s_buffer_fx[i], &scale );
     941      226800 :         scale = add( scale, sub( Q31, svd_s_buffer_e[i] ) );
     942      682080 :         FOR( j = 0; j < lengthCx; ++j )
     943             :         {
     944      455280 :             Kx_reg_inv_fx[i + j * lengthCx] = Mpy_32_16_1( svd_u_buffer_fx[j][i], reg_fac_fx ); // Q(31-scale)
     945      455280 :             move32();
     946      455280 :             Kx_reg_inv_e[i + j * lengthCx] = scale;
     947      455280 :             move16();
     948             :         }
     949             :     }
     950             : 
     951             :     /*-----------------------------------------------------------------*
     952             :      * normalization matrix G hat
     953             :      *-----------------------------------------------------------------*/
     954             : 
     955             :     /* Computing Q*Cx*Q' */
     956             : 
     957      113120 :     matrix_product_mant_exp_fx( Q_fx, Q_e, lengthCy, lengthCx, 0, Cx_fx, Cx_fx_e, lengthCx, lengthCx, 0, Q_Cx_fx, &Q_Cx_e );
     958             : 
     959      113120 :     matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e );
     960             : 
     961             : 
     962             : #ifdef OPT_MCH_DEC_V1_NBE
     963      113120 :     Word16 com_e = sub( limit_e, Cy_hat_diag_e );
     964             : #endif /* OPT_MCH_DEC_V1_NBE */
     965      688444 :     FOR( i = 0; i < lengthCy; ++i )
     966             :     {
     967             : #ifdef OPT_MCH_DEC_V1_NBE
     968      575324 :         IF( GT_32( Cy_hat_diag_fx[i], L_shl_sat( limit_fx, com_e ) ) )
     969             : #else  /* OPT_MCH_DEC_V1_NBE */
     970             :         IF( BASOP_Util_Cmp_Mant32Exp( Cy_hat_diag_fx[i], Cy_hat_diag_e, limit_fx, limit_e ) > 0 )
     971             : #endif /* OPT_MCH_DEC_V1_NBE */
     972             :         {
     973      575324 :             limit_fx = Cy_hat_diag_fx[i];
     974      575324 :             move32();
     975      575324 :             limit_e = Cy_hat_diag_e;
     976      575324 :             move16();
     977             :         }
     978             :     }
     979             : #ifdef OPT_MCH_DEC_V1_BE
     980      113120 :     limit_fx = Madd_32_32( EPSILON_FX, limit_fx, reg_ghat_fx ); // limit_e+ reg_ghat_e
     981             : #else                                                           /* OPT_MCH_DEC_V1_BE */
     982             :     L_tmp = Mpy_32_32( limit_fx, reg_ghat_fx ); // limit_e+ reg_ghat_e
     983             :     limit_fx = L_add( L_tmp, EPSILON_FX );
     984             : #endif                                                          /* OPT_MCH_DEC_V1_BE */
     985      113120 :     limit_e = add( limit_e, reg_ghat_e );
     986             : 
     987             : #ifdef OPT_MCH_DEC_V1_NBE
     988      113120 :     com_e = sub( Cy_hat_diag_e, limit_e );
     989             : #endif /* OPT_MCH_DEC_V1_NBE */
     990      688444 :     FOR( i = 0; i < lengthCy; ++i )
     991             :     {
     992      575324 :         Cy_hat_diag_buff_e[i] = Cy_hat_diag_e;
     993      575324 :         move16();
     994             : 
     995             : #ifdef OPT_MCH_DEC_V1_NBE
     996      575324 :         IF( GT_32( limit_fx, L_shl_sat( Cy_hat_diag_fx[i], com_e ) ) )
     997             : #else  /* OPT_MCH_DEC_V1_NBE */
     998             :         IF( BASOP_Util_Cmp_Mant32Exp( limit_fx, limit_e, Cy_hat_diag_fx[i], Cy_hat_diag_buff_e[i] ) > 0 ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
     999             : #endif /* OPT_MCH_DEC_V1_NBE */
    1000             :         {
    1001          10 :             Cy_hat_diag_fx[i] = limit_fx;
    1002          10 :             move32();
    1003          10 :             Cy_hat_diag_buff_e[i] = limit_e;
    1004          10 :             move16();
    1005             :         }
    1006             : 
    1007      575324 :         tmp = BASOP_Util_Divide3232_Scale( Cy_fx[( i + ( i * lengthCy ) )], Cy_hat_diag_fx[i], &exp );
    1008      575324 :         exp = add( exp, sub( Cy_fx_e, Cy_hat_diag_buff_e[i] ) );
    1009      575324 :         L_tmp = Sqrt32( L_deposit_h( tmp ), &exp );
    1010      575324 :         G_hat_fx[i] = L_tmp;
    1011      575324 :         move32();
    1012      575324 :         G_hat_buff_e[i] = exp;
    1013      575324 :         move16();
    1014             :     }
    1015             : 
    1016             : 
    1017             :     /*-----------------------------------------------------------------*
    1018             :      * Formulate optimal P
    1019             :      *-----------------------------------------------------------------*/
    1020             : 
    1021             :     /* Computing the input matrix Kx'*Q'*G_hat'*Ky */
    1022             : 
    1023             :     Word16 mat_mult_buffer1_fx_e;
    1024             : 
    1025             :     Word16 mat_mult_buffer2_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1026             : 
    1027      113120 :     matrix_product_mant_exp_fx( Kx_fx, Kx_fx_e[0], lengthCx, lengthCx, 1, Q_fx, Q_e, lengthCy, lengthCx, 1, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
    1028             : 
    1029      113120 :     matrix_diag_product_fx_2( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e, lengthCx, lengthCy, 0, G_hat_fx, G_hat_buff_e, lengthCy, mat_mult_buffer2_fx, mat_mult_buffer2_fx_e );
    1030             : 
    1031      113120 :     matrix_product_mant_exp_fx( mat_mult_buffer2_fx, mat_mult_buffer2_fx_e[0], lengthCx, lengthCy, 0, Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
    1032             : 
    1033      113120 :     IF( LT_16( lengthCx, lengthCy ) )
    1034             :     {
    1035      113120 :         mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 1 );
    1036      113120 :         nL = lengthCy;
    1037      113120 :         move16();
    1038      113120 :         nC = lengthCx;
    1039      113120 :         move16();
    1040      113120 :         svd_fx( svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, svd_s_buffer_e, nL, nC );
    1041             :     }
    1042             :     ELSE
    1043             :     {
    1044           0 :         mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 0 );
    1045           0 :         nL = lengthCx;
    1046           0 :         move16();
    1047           0 :         nC = lengthCy;
    1048           0 :         move16();
    1049           0 :         svd_fx( svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, nL, nC );
    1050             :     }
    1051             : 
    1052             :     /* Actually Processing P */
    1053             : 
    1054             :     /* can be skipped: lambda is always column-truncated identity matrix, so this operation just
    1055             :        truncates V to num_input_channel columns */
    1056             : 
    1057      113120 :     svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, lengthCy, lengthCx );
    1058      113120 :     svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
    1059             : 
    1060      113120 :     mat_mult_buffer1_fx_e = 0;
    1061      113120 :     move16();
    1062      113120 :     mat_mult_buffer2_e = 0;
    1063      113120 :     move16();
    1064             : 
    1065      113120 :     matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e, lengthCy, lengthCx, 0,
    1066             :                                 mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1,
    1067             :                                 mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    1068             : 
    1069             :     /************************ Formulate M **********************/
    1070             : 
    1071      113120 :     matrix_product_mant_exp_fx( Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, lengthCy, lengthCx, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
    1072             : 
    1073             :     Word16 mixing_matrix_fx_e[MAX_CICP_CHANNELS * PARAM_MC_MAX_TRANSPORT_CHANS];
    1074             : 
    1075             :     Word16 mat_mult_buffer1_fx_e1[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1076      113120 :     set16_fx( mat_mult_buffer1_fx_e1, mat_mult_buffer1_fx_e, MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS );
    1077             : 
    1078      113120 :     matrix_product_mant_exp( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1, lengthCy, lengthCx, 0, Kx_reg_inv_fx, Kx_reg_inv_e, lengthCx, lengthCx, 0, mixing_matrix_fx, mixing_matrix_fx_e );
    1079             : 
    1080             :     /*-----------------------------------------------------------------*
    1081             :      * Formulate Cr
    1082             :      *-----------------------------------------------------------------*/
    1083             : 
    1084             :     /* Compute Cy_tilde = M*Cx*M' */
    1085             : 
    1086             : 
    1087             :     Word16 Cx_e_arr[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
    1088      113120 :     set16_fx( Cx_e_arr, Cx_fx_e, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
    1089      113120 :     matrix_product_mant_exp( mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 0, Cx_fx, Cx_e_arr, lengthCx, lengthCx, 0, mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1 );
    1090             : 
    1091      113120 :     matrix_product_mant_exp( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1, lengthCy, lengthCx, 0, mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 1, mat_mult_buffer2_fx, mat_mult_buffer2_fx_e );
    1092             : 
    1093      113120 :     exp = mixing_matrix_fx_e[0];
    1094      113120 :     move16();
    1095     1156848 :     FOR( i = 1; i < lengthCy * lengthCx; i++ )
    1096             :     {
    1097     1043728 :         if ( LT_16( exp, mixing_matrix_fx_e[i] ) )
    1098             :         {
    1099       83145 :             exp = mixing_matrix_fx_e[i];
    1100       83145 :             move16();
    1101             :         }
    1102             :     }
    1103             : 
    1104     1269968 :     FOR( i = 0; i < lengthCy * lengthCx; i++ )
    1105             :     {
    1106     1156848 :         mixing_matrix_fx[i] = L_shr( mixing_matrix_fx[i], sub( exp, mixing_matrix_fx_e[i] ) ); // Q(31-exp)
    1107     1156848 :         move32();
    1108             :     }
    1109             : 
    1110      113120 :     mixing_matrix_e = exp;
    1111      113120 :     move16();
    1112             : 
    1113      113120 :     Cr_p_fx = Cr_fx;
    1114      113120 :     Cy_p_fx = Cy_fx;
    1115      113120 :     Cy_tilde_p_fx = mat_mult_buffer2_fx;
    1116             :     Word16 Cr_e_arr[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
    1117      688444 :     FOR( i = 0; i < lengthCy; ++i )
    1118             :     {
    1119     3533968 :         FOR( j = 0; j < lengthCy; ++j )
    1120             :         {
    1121             : 
    1122     2958644 :             *( Cr_p_fx ) = BASOP_Util_Add_Mant32Exp( *( Cy_p_fx ), Cy_fx_e, L_negate( *( Cy_tilde_p_fx ) ), mat_mult_buffer2_fx_e[i * lengthCy + j], &Cr_e_arr[i * lengthCy + j] );
    1123     2958644 :             move32();
    1124     2958644 :             Cr_p_fx++;
    1125     2958644 :             Cy_p_fx++;
    1126     2958644 :             Cy_tilde_p_fx++;
    1127             :         }
    1128             : 
    1129             :         /* Avoid Meaningless negative main diagonal elements */
    1130      575324 :         IF( Cr_fx[i + ( i * lengthCy )] < 0 )
    1131             :         {
    1132       62377 :             Cr_fx[i + ( i * lengthCy )] = 0;
    1133       62377 :             move32();
    1134       62377 :             Cr_e_arr[i + ( i * lengthCy )] = 0;
    1135       62377 :             move16();
    1136             :         }
    1137             :     }
    1138             : 
    1139      113120 :     exp = Cr_e_arr[0];
    1140      113120 :     move16();
    1141     2958644 :     FOR( i = 1; i < lengthCy * lengthCy; i++ )
    1142             :     {
    1143     2845524 :         if ( LT_16( exp, Cr_e_arr[i] ) )
    1144             :         {
    1145      107041 :             exp = Cr_e_arr[i];
    1146      107041 :             move16();
    1147             :         }
    1148             :     }
    1149             : 
    1150     3071764 :     FOR( i = 0; i < lengthCy * lengthCy; i++ )
    1151             :     {
    1152     2958644 :         Cr_fx[i] = L_shr( Cr_fx[i], sub( exp, Cr_e_arr[i] ) ); // Q(31-exp)
    1153     2958644 :         move32();
    1154             :     }
    1155             : 
    1156      113120 :     Cr_fx_e = exp;
    1157      113120 :     move16();
    1158             : 
    1159      113120 :     exp = mat_mult_buffer2_fx_e[0];
    1160      113120 :     move16();
    1161     2958644 :     FOR( i = 1; i < lengthCy * lengthCy; i++ )
    1162             :     {
    1163     2845524 :         if ( LT_16( exp, mat_mult_buffer2_fx_e[i] ) )
    1164             :         {
    1165      157715 :             exp = mat_mult_buffer2_fx_e[i];
    1166      157715 :             move16();
    1167             :         }
    1168             :     }
    1169             : 
    1170     3071764 :     FOR( i = 0; i < lengthCy * lengthCy; i++ )
    1171             :     {
    1172     2958644 :         mat_mult_buffer2_fx[i] = L_shr( mat_mult_buffer2_fx[i], sub( exp, mat_mult_buffer2_fx_e[i] ) ); // Q(31-exp)
    1173     2958644 :         move32();
    1174             :     }
    1175             : 
    1176      113120 :     mat_mult_buffer2_e = exp;
    1177      113120 :     move16();
    1178             : 
    1179             :     /*-----------------------------------------------------------------*
    1180             :      * Energy Compensation
    1181             :      *-----------------------------------------------------------------*/
    1182             : 
    1183      113120 :     IF( EQ_16( energy_compensation_flag, 1 ) )
    1184             :     {
    1185       22220 :         adj_fx_p = svd_s_buffer_fx;
    1186       22220 :         Cy_tilde_p_fx = mat_mult_buffer2_fx;
    1187             : 
    1188      135100 :         FOR( i = 0; i < lengthCy; ++i )
    1189             :         {
    1190             :             /* Avoid correction for very small energies,
    1191             :                main diagonal elements of Cy_tilde_p may be negative */
    1192      112880 :             IF( Cy_tilde_p_fx[i + ( i * lengthCy )] < 0 )
    1193             :             {
    1194           0 :                 adj_fx_p[i] = 1073741824; // 1.0f in Q30
    1195           0 :                 move32();
    1196           0 :                 adj_e[i] = 1;
    1197           0 :                 move16();
    1198             :             }
    1199             :             ELSE
    1200             :             {
    1201      112880 :                 tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + ( i * lengthCy )], L_add( Cy_tilde_p_fx[i + ( i * lengthCy )], EPSILON_FX ), &exp );
    1202      112880 :                 exp = add( exp, sub( Cy_fx_e, mat_mult_buffer2_e ) );
    1203      112880 :                 L_tmp = L_deposit_h( tmp );
    1204      112880 :                 L_tmp = Sqrt32( L_tmp, &exp );
    1205      112880 :                 adj_fx_p[i] = L_tmp;
    1206      112880 :                 move32();
    1207      112880 :                 adj_e[i] = exp;
    1208      112880 :                 move16();
    1209             :             }
    1210             : 
    1211      112880 :             Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, adj_e[i] ) );
    1212      112880 :             IF( GT_32( adj_fx_p[i], temp ) )
    1213             :             {
    1214           5 :                 adj_fx_p[i] = 1073741824; // 1.0f in Q30
    1215           5 :                 move32();
    1216           5 :                 adj_e[i] = 3;
    1217           5 :                 move16();
    1218             :             }
    1219             :         }
    1220             : 
    1221       22220 :         exp = adj_e[0];
    1222       22220 :         move16();
    1223      112880 :         FOR( i = 1; i < lengthCy; i++ )
    1224             :         {
    1225       90660 :             if ( LT_16( exp, adj_e[i] ) )
    1226             :             {
    1227        1074 :                 exp = adj_e[i];
    1228        1074 :                 move16();
    1229             :             }
    1230             :         }
    1231             : 
    1232      135100 :         FOR( i = 0; i < lengthCy; i++ )
    1233             :         {
    1234      112880 :             adj_fx[i] = L_shr( adj_fx_p[i], sub( exp, adj_e[i] ) ); // Q(31-exp)
    1235      112880 :             move32();
    1236             :         }
    1237       22220 :         adj_fx_e = exp;
    1238       22220 :         move16();
    1239             : 
    1240       22220 :         diag_matrix_product_fx( adj_fx, adj_fx_e, lengthCy, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    1241             : 
    1242       22220 :         Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCx, lengthCy ) ); // Q(31-mat_mult_buffer3_e)
    1243       22220 :         mixing_matrix_e = mat_mult_buffer3_e;
    1244       22220 :         move16();
    1245             :     }
    1246             : 
    1247      113120 :     *mixing_matrix_out_e = mixing_matrix_e;
    1248      113120 :     move16();
    1249      113120 :     *Cr_e = Cr_fx_e;
    1250      113120 :     move16();
    1251      113120 :     pop_wmops();
    1252             : 
    1253      113120 :     return out;
    1254             : }
    1255             : 
    1256             : /*-------------------------------------------------------------------*
    1257             :  * computeMixingMatricesResidual()
    1258             :  *
    1259             :  * compute a residual mixing matrix using the covariance synthesis approach
    1260             :  *-------------------------------------------------------------------*/
    1261             : 
    1262       90900 : Word16 computeMixingMatricesResidual_fx(
    1263             :     const Word32 num_outputs, /* i  : number of output channels                                           */
    1264             :     const Word32 *Cx_fx,      /* i  : vector containing the diagonal diffuse prototype covariance Q(31-Cx_e)         */
    1265             :     const Word16 Cx_e,
    1266             :     const Word32 *Cy_fx, /* i  : matrix containing the missing cov (Cr from computeMixingMatrices()) Q(31-Cy_fx_e) */
    1267             :     const Word16 Cy_fx_e,
    1268             :     const Word32 reg_Sx_fx, /* i  : regularization factor for the input channel singular values Q(31-reg_Sx_e)         */
    1269             :     const Word16 reg_Sx_e,
    1270             :     const Word32 reg_ghat_fx, /* i  : regularization factor for the normalization matrix Q(31-reg_ghat_e)                  */
    1271             :     const Word16 reg_ghat_e,
    1272             :     Word32 *mixing_matrix_fx, /* o  : resulting residual mixing matrix Q(31-mixing_matrix_ret_e)                                    */
    1273             :     Word16 *mixing_matrix_ret_e )
    1274             : {
    1275             :     Word16 i, j;
    1276             :     Word16 out, lengthCx, lengthCy;
    1277       90900 :     out = EXIT_SUCCESS;
    1278       90900 :     move16();
    1279       90900 :     lengthCx = extract_l( num_outputs );
    1280       90900 :     lengthCy = extract_l( num_outputs );
    1281             : 
    1282             :     Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
    1283             :     Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1284             :     Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
    1285             :     Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
    1286             : 
    1287             :     Word16 mat_mult_buffer1_buff_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1288             : 
    1289       90900 :     push_wmops( "dirac_cov_mix_mat_r" );
    1290             : 
    1291             :     Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1292             :     Word32 adj_fx[MAX_OUTPUT_CHANNELS];
    1293             :     Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1294       90900 :     Word16 mixing_matrix_e = 0, mat_mult_buffer1_e, adj_e, mat_mult_buffer3_e, mat_mult_buffer2_e;
    1295       90900 :     move16();
    1296             : 
    1297       90900 :     Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS] = { 0 };
    1298             :     Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
    1299             :     Word32 L_tmp;
    1300             :     Word16 tmp_e;
    1301             :     Word16 tmp, scale;
    1302             :     Word32 div_tmp;
    1303             :     Word16 exp;
    1304             : 
    1305             :     Word32 Kx_fx[MAX_OUTPUT_CHANNELS];
    1306             :     Word16 Ky_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1307             :     Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1308             :     Word16 Kx_fx_e[MAX_OUTPUT_CHANNELS];
    1309             : 
    1310             :     Word32 limit_fx;
    1311             :     Word16 limit_e;
    1312             : 
    1313             :     Word32 Kx_reg_inv_fx[MAX_OUTPUT_CHANNELS];
    1314             :     Word16 Kx_reg_inv_e[MAX_OUTPUT_CHANNELS];
    1315             : 
    1316             :     Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
    1317             :     Word16 Cy_hat_diag_e;
    1318             : 
    1319             :     Word16 Cy_hat_diag_fx_e[MAX_OUTPUT_CHANNELS];
    1320             :     Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
    1321             :     Word16 G_hat_e[MAX_OUTPUT_CHANNELS];
    1322             : 
    1323             :     Word32 *adj_fx_p;
    1324             :     Word16 adj_buff_e[MAX_OUTPUT_CHANNELS];
    1325             : 
    1326             :     Word32 Cy_tilde_fx[MAX_OUTPUT_CHANNELS];
    1327             :     Word16 Cy_tilde_e;
    1328             : 
    1329             :     /*-----------------------------------------------------------------*
    1330             :      * Decomposition of Cy
    1331             :      *-----------------------------------------------------------------*/
    1332             : 
    1333             :     /* Processing the SVD */
    1334             : 
    1335             :     /* linear array to svd buffer */
    1336       90900 :     mat2svdMat_fx( Cy_fx, svd_in_buffer_fx, lengthCy, lengthCy, 0 );
    1337             : 
    1338       90900 :     svd_fx( svd_in_buffer_fx, Cy_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCy, lengthCy );
    1339             : 
    1340             :     /* Computing Ky */
    1341       90900 :     Word16 max_e = -32;
    1342      553344 :     FOR( i = 0; i < lengthCy; ++i )
    1343             :     {
    1344     2841348 :         FOR( j = 0; j < lengthCy; ++j )
    1345             :         {
    1346     2378904 :             tmp_e = svd_s_buffer_e[j];
    1347     2378904 :             move16();
    1348     2378904 :             L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
    1349     2378904 :             Ky_fx[i + j * lengthCy] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
    1350     2378904 :             move32();
    1351     2378904 :             Ky_fx_e[i + j * lengthCy] = tmp_e;
    1352     2378904 :             move16();
    1353     2378904 :             max_e = s_max( max_e, tmp_e );
    1354             :         }
    1355             :     }
    1356             : 
    1357     2469804 :     FOR( i = 0; i < lengthCy * lengthCy; ++i )
    1358             :     {
    1359     2378904 :         Ky_fx[i] = L_shr( Ky_fx[i], sub( max_e, Ky_fx_e[i] ) );
    1360     2378904 :         move32();
    1361     2378904 :         Ky_fx_e[i] = max_e;
    1362     2378904 :         move16();
    1363             :     }
    1364             : 
    1365             :     /*-----------------------------------------------------------------*
    1366             :      * Decomposition of Cx
    1367             :      *-----------------------------------------------------------------*/
    1368             : 
    1369             :     /* Processing the SVD of Cx*/
    1370             :     /* Cx is a diagonal matrix, so SVD would lead to the sorted diagonal as S and u
    1371             :      *  would be just indicating the sorting index, so go straight to Kx as the
    1372             :      * square root of the diagonal of Cx */
    1373             : 
    1374             :     /* Computing Kx */
    1375       90900 :     max_e = -32;
    1376      553344 :     FOR( i = 0; i < lengthCx; ++i )
    1377             :     {
    1378      462444 :         exp = Cx_e;
    1379      462444 :         move16();
    1380      462444 :         Kx_fx[i] = Sqrt32( Cx_fx[i], &exp );
    1381      462444 :         move32();
    1382      462444 :         Kx_fx_e[i] = exp;
    1383      462444 :         move16();
    1384      462444 :         max_e = s_max( max_e, exp );
    1385             :     }
    1386             : 
    1387      553344 :     FOR( i = 0; i < lengthCx; ++i )
    1388             :     {
    1389      462444 :         Kx_fx[i] = L_shr( Kx_fx[i], sub( max_e, Kx_fx_e[i] ) );
    1390      462444 :         move32();
    1391      462444 :         Kx_fx_e[i] = max_e;
    1392      462444 :         move16();
    1393             :     }
    1394             : 
    1395             :     /*-----------------------------------------------------------------*
    1396             :      * Regularization of Sx
    1397             :      *-----------------------------------------------------------------*/
    1398             : 
    1399       90900 :     limit_fx = Kx_fx[0];
    1400       90900 :     move32();
    1401             : 
    1402      462444 :     FOR( i = 1; i < lengthCx; i++ )
    1403             :     {
    1404      371544 :         IF( GT_32( Kx_fx[i], limit_fx ) )
    1405             :         {
    1406      142098 :             limit_fx = Kx_fx[i];
    1407      142098 :             move32();
    1408             :         }
    1409             :     }
    1410             : 
    1411       90900 :     L_tmp = Mpy_32_32( limit_fx, reg_Sx_fx ); // limit_e + reg_Sx_e
    1412       90900 :     L_tmp = L_add( L_tmp, EPSILLON_FX );
    1413       90900 :     limit_fx = L_tmp;
    1414       90900 :     move16();
    1415       90900 :     limit_e = add( Kx_fx_e[0], reg_Sx_e );
    1416             : 
    1417      553344 :     FOR( i = 0; i < lengthCx; ++i )
    1418             :     {
    1419             : #ifdef OPT_MCH_DEC_V1_NBE
    1420      462444 :         IF( GT_32( Kx_fx[i], L_shl_sat( limit_fx, sub( limit_e, Kx_fx_e[i] ) ) ) )
    1421             : #else  /* OPT_MCH_DEC_V1_NBE */
    1422             :         IF( BASOP_Util_Cmp_Mant32Exp( Kx_fx[i], Kx_fx_e[i], limit_fx, limit_e ) > 0 )
    1423             : #endif /* OPT_MCH_DEC_V1_NBE */
    1424             :         {
    1425      461587 :             div_tmp = Kx_fx[i];
    1426      461587 :             move32();
    1427      461587 :             exp = Kx_fx_e[i];
    1428      461587 :             move16();
    1429             :         }
    1430             :         ELSE
    1431             :         {
    1432         857 :             div_tmp = limit_fx;
    1433         857 :             move32();
    1434         857 :             exp = limit_e;
    1435         857 :             move16();
    1436             :         }
    1437      462444 :         tmp = BASOP_Util_Divide3232_Scale( 1073741824, div_tmp, &scale ); // 1073741824 -> 1.0f in Q30
    1438      462444 :         scale = add( scale, sub( Q1, exp ) );
    1439             : 
    1440      462444 :         Kx_reg_inv_fx[i] = L_deposit_h( tmp );
    1441      462444 :         move32();
    1442      462444 :         Kx_reg_inv_e[i] = scale;
    1443      462444 :         move16();
    1444             :     }
    1445             : 
    1446       90900 :     limit_fx = 0;
    1447       90900 :     move32();
    1448       90900 :     limit_e = 0;
    1449       90900 :     move16();
    1450             : 
    1451             :     /*-----------------------------------------------------------------*
    1452             :      * regularized Kx-1
    1453             :      *-----------------------------------------------------------------*/
    1454             : 
    1455             :     /*-----------------------------------------------------------------*
    1456             :      * normalization matrix G hat
    1457             :      *-----------------------------------------------------------------*/
    1458             : 
    1459             :     /* Computing Cy_hat_diag */
    1460       90900 :     Copy32( Cx_fx, Cy_hat_diag_fx, extract_l( num_outputs ) ); // Q(31-Cx_e)
    1461       90900 :     Cy_hat_diag_e = Cx_e;
    1462       90900 :     move16();
    1463             : 
    1464             : #ifdef OPT_MCH_DEC_V1_NBE
    1465       90900 :     Word16 com_e = sub( limit_e, Cy_hat_diag_e );
    1466             : #endif /* OPT_MCH_DEC_V1_NBE */
    1467      553344 :     FOR( i = 0; i < lengthCy; ++i )
    1468             :     {
    1469             : #ifdef OPT_MCH_DEC_V1_NBE
    1470      462444 :         IF( GT_32( Cy_hat_diag_fx[i], L_shl_sat( limit_fx, com_e ) ) )
    1471             : #else  /* OPT_MCH_DEC_V1_NBE */
    1472             :         IF( BASOP_Util_Cmp_Mant32Exp( Cy_hat_diag_fx[i], Cy_hat_diag_e, limit_fx, limit_e ) > 0 )
    1473             : #endif /* OPT_MCH_DEC_V1_NBE */
    1474             :         {
    1475      462444 :             limit_fx = Cy_hat_diag_fx[i];
    1476      462444 :             move32();
    1477      462444 :             limit_e = Cy_hat_diag_e;
    1478      462444 :             move16();
    1479             :         }
    1480             :     }
    1481             : 
    1482             : #ifdef OPT_MCH_DEC_V1_BE
    1483       90900 :     limit_fx = Madd_32_32( EPSILON_FX, limit_fx, reg_ghat_fx ); // Q(limit_e+reg_ghat_e)
    1484             : #else                                                           /* OPT_MCH_DEC_V1_BE */
    1485             :     L_tmp = Mpy_32_32( limit_fx, reg_ghat_fx ); // Q(limit_e+reg_ghat_e)
    1486             :     limit_fx = L_add( L_tmp, EPSILON_FX );
    1487             : #endif                                                          /* OPT_MCH_DEC_V1_BE */
    1488       90900 :     limit_e = add( limit_e, reg_ghat_e );
    1489             : 
    1490             :     /* Computing G_hat */
    1491             : 
    1492             : #ifdef OPT_MCH_DEC_V1_NBE
    1493       90900 :     com_e = sub( Cy_hat_diag_e, limit_e );
    1494             : #endif /* OPT_MCH_DEC_V1_NBE */
    1495      553344 :     FOR( i = 0; i < lengthCy; ++i )
    1496             :     {
    1497      462444 :         Cy_hat_diag_fx_e[i] = Cy_hat_diag_e;
    1498      462444 :         move16();
    1499             : #ifdef OPT_MCH_DEC_V1_NBE
    1500      462444 :         IF( GT_32( limit_fx, L_shl_sat( Cy_hat_diag_fx[i], com_e ) ) ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
    1501             : #else                                                                  /* OPT_MCH_DEC_V1_NBE */
    1502             :         IF( BASOP_Util_Cmp_Mant32Exp( limit_fx, limit_e, Cy_hat_diag_fx[i], Cy_hat_diag_e ) > 0 ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
    1503             : #endif                                                                 /* OPT_MCH_DEC_V1_NBE */
    1504             :         {
    1505          10 :             Cy_hat_diag_fx[i] = limit_fx;
    1506          10 :             move32();
    1507          10 :             Cy_hat_diag_fx_e[i] = limit_e;
    1508          10 :             move16();
    1509             :         }
    1510      462444 :         tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + i * lengthCy], Cy_hat_diag_fx[i], &scale );
    1511      462444 :         scale = add( scale, sub( Cy_fx_e, Cy_hat_diag_fx_e[i] ) );
    1512      462444 :         L_tmp = Sqrt32( L_deposit_h( tmp ), &scale );
    1513             : 
    1514      462444 :         G_hat_fx[i] = L_tmp;
    1515      462444 :         move32();
    1516      462444 :         G_hat_e[i] = scale;
    1517      462444 :         move16();
    1518             :     }
    1519             : 
    1520             :     /*-----------------------------------------------------------------*
    1521             :      * Formulate optimal P
    1522             :      *-----------------------------------------------------------------*/
    1523             : 
    1524      553344 :     FOR( i = 0; i < num_outputs; i++ )
    1525             :     {
    1526             : #ifdef OPT_MCH_DEC_V1_BE
    1527      462444 :         Kx_fx[i] = Mpy_32_32( Kx_fx[i], G_hat_fx[i] ); // Q(31-(Kx_fx_e+G_hag_e))
    1528             : #else                                                  /* OPT_MCH_DEC_V1_BE */
    1529             :         L_tmp = Mpy_32_32( Kx_fx[i], G_hat_fx[i] );                                               // Q(31-(Kx_fx_e+G_hag_e))
    1530             :         Kx_fx[i] = L_tmp;
    1531             : #endif                                                 /* OPT_MCH_DEC_V1_BE */
    1532      462444 :         move32();
    1533      462444 :         Kx_fx_e[i] = add( Kx_fx_e[i], G_hat_e[i] );
    1534      462444 :         move16();
    1535             :     }
    1536             : 
    1537      553344 :     FOR( i = 0; i < num_outputs; i++ )
    1538             :     {
    1539             :         Word32 fac_fx;
    1540      462444 :         fac_fx = Kx_fx[i];
    1541      462444 :         move32();
    1542             : 
    1543     2841348 :         FOR( j = 0; j < num_outputs; j++ )
    1544             :         {
    1545             : #ifdef OPT_MCH_DEC_V1_BE
    1546     2378904 :             mat_mult_buffer1_fx[i + j * num_outputs] = Mpy_32_32( Ky_fx[i + j * num_outputs], fac_fx ); // Q(31-(Ky_fx_e+Kx_fx_e));
    1547     2378904 :             move32();
    1548     2378904 :             mat_mult_buffer1_buff_e[i + j * num_outputs] = add( Ky_fx_e[i + j * num_outputs], Kx_fx_e[i] );
    1549     2378904 :             move16();
    1550             : #else  /* OPT_MCH_DEC_V1_BE */
    1551             :             L_tmp = Mpy_32_32( Ky_fx[i + j * num_outputs], fac_fx ); // Q(31-(Ky_fx_e+Kx_fx_e))
    1552             :             mat_mult_buffer1_fx[i + j * num_outputs] = L_tmp;
    1553             :             move32();
    1554             :             mat_mult_buffer1_buff_e[i + j * num_outputs] = extract_l( L_add( Ky_fx_e[i + j * num_outputs], Kx_fx_e[i] ) );
    1555             :             move16();
    1556             : #endif /* OPT_MCH_DEC_V1_BE */
    1557             :         }
    1558             :     }
    1559             : 
    1560       90900 :     mat_mult_buffer1_e = mat_mult_buffer1_buff_e[0];
    1561       90900 :     move16();
    1562     2378904 :     FOR( i = 1; i < num_outputs * num_outputs; i++ )
    1563             :     {
    1564     2288004 :         if ( LT_16( mat_mult_buffer1_e, mat_mult_buffer1_buff_e[i] ) )
    1565             :         {
    1566       24811 :             mat_mult_buffer1_e = mat_mult_buffer1_buff_e[i];
    1567       24811 :             move16();
    1568             :         }
    1569             :     }
    1570             : 
    1571     2469804 :     FOR( i = 0; i < num_outputs * num_outputs; i++ )
    1572             :     {
    1573     2378904 :         mat_mult_buffer1_fx[i] = L_shr( mat_mult_buffer1_fx[i], sub( mat_mult_buffer1_e, mat_mult_buffer1_buff_e[i] ) ); // Q(31-mat_mult_buffer1_e)
    1574     2378904 :         move32();
    1575             :     }
    1576             : 
    1577       90900 :     mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 0 );
    1578             : 
    1579       90900 :     svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCx, lengthCy );
    1580             : 
    1581             :     /* Actually Processing P */
    1582             : 
    1583       90900 :     svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, lengthCy, lengthCx );
    1584       90900 :     svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
    1585       90900 :     mat_mult_buffer1_e = 0;
    1586       90900 :     move16();
    1587       90900 :     mat_mult_buffer2_e = 0;
    1588       90900 :     move16();
    1589             : 
    1590       90900 :     matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0,
    1591             :                                 mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1,
    1592             :                                 mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    1593             : 
    1594             :     /*-----------------------------------------------------------------*
    1595             :      * Formulate M
    1596             :      *-----------------------------------------------------------------*/
    1597             : 
    1598             : 
    1599       90900 :     matrix_product_mant_exp_fx( Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, lengthCy, lengthCx, 0, mat_mult_buffer1_fx, mat_mult_buffer1_buff_e );
    1600       90900 :     set16_fx( mat_mult_buffer1_buff_e, mat_mult_buffer1_buff_e[0], MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS );
    1601             : 
    1602             :     Word16 mixing_matrix_fx_e[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
    1603             : 
    1604      553344 :     FOR( i = 0; i < num_outputs; i++ )
    1605             :     {
    1606             :         Word32 fac_fx;
    1607      462444 :         fac_fx = Kx_reg_inv_fx[i];
    1608      462444 :         move32();
    1609             : 
    1610     2841348 :         FOR( j = 0; j < num_outputs; j++ )
    1611             :         {
    1612             : #ifdef OPT_MCH_DEC_V1_BE
    1613     2378904 :             mixing_matrix_fx[j + i * num_outputs] = Mpy_32_32( mat_mult_buffer1_fx[j + i * num_outputs], fac_fx ); // Q(31-mat_mult_buffer1_e+Kx_reg_inv_e);
    1614     2378904 :             move32();
    1615             : #else  /* OPT_MCH_DEC_V1_BE */
    1616             :             L_tmp = Mpy_32_32( mat_mult_buffer1_fx[j + i * num_outputs], fac_fx ); // Q(31-mat_mult_buffer1_e+Kx_reg_inv_e)
    1617             :             mixing_matrix_fx[j + i * num_outputs] = L_tmp;
    1618             :             move32();
    1619             : #endif /* OPT_MCH_DEC_V1_BE */
    1620     2378904 :             mixing_matrix_fx_e[j + i * num_outputs] = add( mat_mult_buffer1_buff_e[j + i * num_outputs], Kx_reg_inv_e[i] );
    1621     2378904 :             move16();
    1622             :         }
    1623             :     }
    1624             : 
    1625             :     /*-----------------------------------------------------------------*
    1626             :      * Formulate Cr
    1627             :      *-----------------------------------------------------------------*/
    1628             : 
    1629             :     /* Compute Cy_tilde = M*Cx*M' */
    1630             : 
    1631             :     Word16 Cx_e_arr[MAX_CICP_CHANNELS];
    1632       90900 :     set16_fx( Cx_e_arr, Cx_e, MAX_CICP_CHANNELS );
    1633             : 
    1634       90900 :     matrix_diag_product_fx_1( mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 0, Cx_fx, Cx_e_arr, lengthCx, mat_mult_buffer1_fx, mat_mult_buffer1_buff_e );
    1635             : 
    1636       90900 :     exp = mixing_matrix_fx_e[0];
    1637       90900 :     move16();
    1638     2378904 :     FOR( i = 1; i < num_outputs * num_outputs; i++ )
    1639             :     {
    1640     2288004 :         if ( LT_16( exp, mixing_matrix_fx_e[i] ) )
    1641             :         {
    1642        9746 :             exp = mixing_matrix_fx_e[i];
    1643        9746 :             move16();
    1644             :         }
    1645             :     }
    1646             : 
    1647     2469804 :     FOR( i = 0; i < num_outputs * num_outputs; i++ )
    1648             :     {
    1649     2378904 :         mixing_matrix_fx[i] = L_shr( mixing_matrix_fx[i], sub( exp, mixing_matrix_fx_e[i] ) ); // Q(31-exp)
    1650     2378904 :         move32();
    1651             :     }
    1652       90900 :     mixing_matrix_e = exp;
    1653       90900 :     move16();
    1654             : 
    1655       90900 :     exp = mat_mult_buffer1_buff_e[0];
    1656       90900 :     move16();
    1657     2378904 :     FOR( i = 1; i < num_outputs * num_outputs; i++ )
    1658             :     {
    1659     2288004 :         if ( LT_16( exp, mat_mult_buffer1_buff_e[i] ) )
    1660             :         {
    1661        9746 :             exp = mat_mult_buffer1_buff_e[i];
    1662        9746 :             move16();
    1663             :         }
    1664             :     }
    1665             : 
    1666     2469804 :     FOR( i = 0; i < num_outputs * num_outputs; i++ )
    1667             :     {
    1668     2378904 :         mat_mult_buffer1_fx[i] = L_shr( mat_mult_buffer1_fx[i], sub( exp, mat_mult_buffer1_buff_e[i] ) ); // Q(31-exp)
    1669     2378904 :         move32();
    1670             :     }
    1671       90900 :     mat_mult_buffer1_e = exp;
    1672       90900 :     move16();
    1673             : 
    1674       90900 :     matrix_product_diag_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 1, Cy_tilde_fx, &Cy_tilde_e );
    1675             : 
    1676             :     /*-----------------------------------------------------------------*
    1677             :      * Energy Compensation
    1678             :      *-----------------------------------------------------------------*/
    1679             : 
    1680       90900 :     adj_fx_p = svd_s_buffer_fx;
    1681             : 
    1682      553344 :     FOR( i = 0; i < lengthCy; ++i )
    1683             :     {
    1684      462444 :         tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + ( lengthCy * i )], L_add( Cy_tilde_fx[i], EPSILON_FX ), &scale );
    1685      462444 :         scale = add( scale, sub( Cy_fx_e, Cy_tilde_e ) );
    1686      462444 :         adj_fx_p[i] = Sqrt32( L_deposit_h( tmp ), &scale );
    1687      462444 :         move32();
    1688      462444 :         adj_buff_e[i] = scale;
    1689      462444 :         move16();
    1690      462444 :         Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, scale ) );
    1691      462444 :         IF( GT_32( adj_fx_p[i], temp ) ) // 1073741824 -> 1.0f in Q30
    1692             :         {
    1693           1 :             adj_fx_p[i] = 1073741824; // 1.0f in Q30
    1694           1 :             move32();
    1695           1 :             adj_buff_e[i] = 3;
    1696           1 :             move16();
    1697             :         }
    1698             :     }
    1699             : 
    1700       90900 :     adj_e = adj_buff_e[0];
    1701       90900 :     move16();
    1702             : 
    1703      462444 :     FOR( i = 1; i < lengthCy; i++ )
    1704             :     {
    1705      371544 :         if ( LT_16( adj_e, adj_buff_e[i] ) )
    1706             :         {
    1707       36682 :             adj_e = adj_buff_e[i];
    1708       36682 :             move16();
    1709             :         }
    1710             :     }
    1711             : 
    1712             : 
    1713      553344 :     FOR( i = 0; i < lengthCy; i++ )
    1714             :     {
    1715      462444 :         adj_fx[i] = L_shr( adj_fx_p[i], sub( adj_e, adj_buff_e[i] ) ); // Q(31-adj_e)
    1716      462444 :         move32();
    1717             :     }
    1718             : 
    1719       90900 :     diag_matrix_product_fx( adj_fx, adj_e, lengthCy, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    1720       90900 :     Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCy, lengthCx ) );
    1721       90900 :     *mixing_matrix_ret_e = mat_mult_buffer3_e;
    1722       90900 :     move16();
    1723             : 
    1724       90900 :     pop_wmops();
    1725             : 
    1726       90900 :     return out;
    1727             : }
    1728             : 
    1729             : 
    1730             : /*-------------------------------------------------------------------*
    1731             :  * computeMixingMatricesISM()
    1732             :  *
    1733             :  *
    1734             :  *-------------------------------------------------------------------*/
    1735      315180 : Word16 computeMixingMatricesISM_fx(
    1736             :     const Word16 num_inputs,
    1737             :     const Word16 num_responses,
    1738             :     const Word16 num_outputs,
    1739             :     const Word32 *responses_fx, /*Q(31-responses_e) */
    1740             :     const Word16 responses_e,
    1741             :     const Word32 *ener_fx, /*Q(31-ener_e) */
    1742             :     const Word16 ener_e,
    1743             :     const Word32 *Cx_diag_fx, /*Q(31-diag_e) */
    1744             :     const Word16 Cx_diag_e,
    1745             :     const Word32 *Cy_diag_fx, /*Q(31-diag_e) */
    1746             :     const Word16 Cy_diag_e,
    1747             :     const Word16 *Q_16fx, // Q15
    1748             :     const Word16 energy_compensation_flag,
    1749             :     const Word32 reg_Sx_fx,   /*Q0*/
    1750             :     const Word32 reg_ghat_fx, /*Q0*/
    1751             :     Word32 *mixing_matrix_fx, /*Q(31-mixing_matrix_e) */
    1752             :     Word16 *mixing_matrix_e )
    1753             : {
    1754             :     Word16 i, out;
    1755             :     Word16 lengthCx, lengthCy;
    1756             :     Word16 nL, nC;
    1757             : 
    1758             :     Word32 *Cy_tilde_p_fx;
    1759             :     Word32 *adj_fx;
    1760             :     Word32 limit_fx;
    1761             : 
    1762             :     Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1763             :     Word32 Q_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1764             :     Word32 Q_Cx_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1765             :     Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1766             :     Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
    1767             :     Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1768             :     Word32 Kx_reg_inv_fx[MAX_TRANSPORT_CHANNELS];
    1769             :     Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1770             :     Word32 Kx_fx[MAX_TRANSPORT_CHANNELS];
    1771             :     Word16 Ky_e, Q_e, Q_Cx_e, mat_mult_buffer1_e, G_hat_e, mat_mult_buffer2_e, Kx_reg_inv_e, adj_e, mat_mult_buffer3_e, Kx_e;
    1772             : 
    1773             :     Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
    1774             :     Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
    1775             :     Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS];
    1776             :     Word16 svd_s_buffer_fx_e;
    1777             :     Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
    1778             :     Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
    1779             : 
    1780             :     Word16 temp_e[MAX_OUTPUT_CHANNELS];
    1781      315180 :     push_wmops( "dirac_cov_mix_mat" );
    1782             : 
    1783      315180 :     out = EXIT_SUCCESS;
    1784      315180 :     move16();
    1785      315180 :     lengthCx = num_inputs;
    1786      315180 :     move16();
    1787      315180 :     lengthCy = num_outputs;
    1788      315180 :     move16();
    1789             : 
    1790     6073020 :     FOR( i = 0; i < lengthCy * lengthCx; i++ )
    1791             :     {
    1792     5757840 :         IF( EQ_16( Q_16fx[i], MAX_16 ) )
    1793             :         {
    1794     2575200 :             Q_fx[i] = MAX_32;
    1795     2575200 :             move32();
    1796             :         }
    1797             :         ELSE
    1798             :         {
    1799     3182640 :             Q_fx[i] = L_deposit_h( Q_16fx[i] );
    1800     3182640 :             move32();
    1801             :         }
    1802             :     }
    1803      315180 :     Q_e = 0;
    1804      315180 :     move16();
    1805      315180 :     set32_fx( svd_s_buffer_fx, 0, MAX_OUTPUT_CHANNELS );
    1806     5358060 :     FOR( i = 0; i < MAX_OUTPUT_CHANNELS; i++ )
    1807             :     {
    1808     5042880 :         set32_fx( svd_in_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
    1809     5042880 :         set32_fx( svd_u_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
    1810     5042880 :         set32_fx( svd_v_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
    1811             :     }
    1812             : 
    1813             :     /* Decomposition of Cy = Ky*Ky' */
    1814             :     /* Ky = responses*diag(ener) */
    1815      315180 :     matrix_diag_product_fx( responses_fx, responses_e, lengthCy, num_responses, 0, ener_fx, ener_e, num_responses, Ky_fx, &Ky_e );
    1816             : 
    1817             :     /* Decomposition of Cx -> Computing Kx */
    1818      315180 :     set16_fx( temp_e, Cx_diag_e, lengthCx );
    1819      315180 :     v_sqrt_fx( Cx_diag_fx, temp_e, Kx_fx, lengthCx );
    1820      315180 :     Kx_e = temp_e[0];
    1821      315180 :     move16();
    1822      630360 :     FOR( i = 1; i < lengthCx; i++ )
    1823             :     {
    1824      315180 :         Kx_e = s_max( Kx_e, temp_e[i] );
    1825             :     }
    1826      945540 :     FOR( i = 0; i < lengthCx; i++ )
    1827             :     {
    1828      630360 :         Kx_fx[i] = L_shr_r( Kx_fx[i], sub( Kx_e, temp_e[i] ) ); // Q(31-Kx_e)
    1829      630360 :         move32();
    1830             :     }
    1831             : 
    1832             :     /* Regularization of Sx */
    1833      315180 :     maximum_32_fx( Kx_fx, lengthCx, &limit_fx );
    1834      315180 :     limit_fx = Mpy_32_32( limit_fx, reg_Sx_fx ); // Cy_hat_diag_e + reg_ghat_e
    1835             : 
    1836      945540 :     FOR( i = 0; i < lengthCx; ++i )
    1837             :     {
    1838      630360 :         IF( GT_32( Kx_fx[i], limit_fx ) )
    1839             :         {
    1840      595915 :             svd_s_buffer_fx[i] = Kx_fx[i];
    1841      595915 :             move32();
    1842             :         }
    1843             :         ELSE
    1844             :         {
    1845       34445 :             svd_s_buffer_fx[i] = limit_fx;
    1846       34445 :             move32();
    1847             :         }
    1848             :     }
    1849      315180 :     svd_s_buffer_fx_e = Kx_e;
    1850      315180 :     move16();
    1851             : 
    1852      315180 :     limit_fx = 0;
    1853      315180 :     move32();
    1854             : 
    1855             :     /* regularized Kx-1 */
    1856             : 
    1857      945540 :     FOR( i = 0; i < lengthCx; ++i )
    1858             :     {
    1859      630360 :         IF( svd_s_buffer_fx[i] )
    1860             :         {
    1861             :             Word32 reg_fac;
    1862      630358 :             reg_fac = BASOP_Util_Divide3232_Scale_newton( MAX_32, svd_s_buffer_fx[i], &temp_e[i] );
    1863      630358 :             Kx_reg_inv_fx[i] = reg_fac;
    1864      630358 :             move32();
    1865      630358 :             temp_e[i] = sub( temp_e[i], svd_s_buffer_fx_e );
    1866      630358 :             move16();
    1867             :         }
    1868             :         ELSE
    1869             :         {
    1870             :             Word32 reg_fac;
    1871           2 :             reg_fac = BASOP_Util_Divide3232_Scale_newton( MAX_32, EPSILON_FX_M, &temp_e[i] );
    1872           2 :             Kx_reg_inv_fx[i] = reg_fac;
    1873           2 :             move32();
    1874           2 :             temp_e[i] = sub( temp_e[i], EPSILON_FX_E );
    1875           2 :             move16();
    1876             :         }
    1877             :     }
    1878      315180 :     Kx_reg_inv_e = temp_e[0];
    1879      315180 :     move16();
    1880      630360 :     FOR( i = 1; i < lengthCx; i++ )
    1881             :     {
    1882      315180 :         Kx_reg_inv_e = s_max( Kx_reg_inv_e, temp_e[i] );
    1883             :     }
    1884      945540 :     FOR( i = 0; i < lengthCx; i++ )
    1885             :     {
    1886      630360 :         Kx_reg_inv_fx[i] = L_shr_r( Kx_reg_inv_fx[i], sub( Kx_reg_inv_e, temp_e[i] ) ); // Q(31- Kx_reg_inv_e)
    1887      630360 :         move32();
    1888             :     }
    1889             : 
    1890             :     /************************ normalization matrix G hat **********************/
    1891             : 
    1892             :     /* Computing Q*Cx*Q' */
    1893             :     Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
    1894             :     Word16 Cy_hat_diag_e;
    1895             : 
    1896      315180 :     matrix_diag_product_fx( Q_fx, Q_e, lengthCy, lengthCx, 0, Cx_diag_fx, Cx_diag_e, lengthCx, Q_Cx_fx, &Q_Cx_e );
    1897             : 
    1898      315180 :     Word16 guard_bits = find_guarded_bits_fx( lengthCx + 1 );
    1899             : 
    1900     6073020 :     FOR( i = 0; i < lengthCy * lengthCx; ++i )
    1901             :     {
    1902     5757840 :         IF( GT_16( Q_Cx_e, Q_e ) )
    1903             :         {
    1904     5757826 :             Q_fx[i] = L_shr( Q_fx[i], guard_bits );
    1905     5757826 :             move32();
    1906             :         }
    1907             :         ELSE
    1908             :         {
    1909          14 :             Q_Cx_fx[i] = L_shr( Q_Cx_fx[i], guard_bits );
    1910          14 :             move32();
    1911             :         }
    1912             :     }
    1913             : 
    1914      315180 :     IF( GT_16( Q_Cx_e, Q_e ) )
    1915             :     {
    1916      315179 :         Q_e = add( Q_e, guard_bits );
    1917             :     }
    1918             :     ELSE
    1919             :     {
    1920           1 :         Q_Cx_e = add( Q_Cx_e, guard_bits );
    1921             :     }
    1922      315180 :     matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e );
    1923             : 
    1924             :     /* Computing Cy_hat_diag */
    1925     3194100 :     FOR( i = 0; i < lengthCy; ++i )
    1926             :     {
    1927     2878920 :         if ( GT_32( Cy_hat_diag_fx[i], limit_fx ) )
    1928             :         {
    1929      469328 :             limit_fx = Cy_hat_diag_fx[i];
    1930      469328 :             move32();
    1931             :         }
    1932             :     }
    1933             : 
    1934      315180 :     limit_fx = Mpy_32_32( limit_fx, reg_ghat_fx ); // Cy_hat_diag_e + reg_ghat_e
    1935             : 
    1936             :     /* Computing G_hat */
    1937     3194100 :     FOR( i = 0; i < lengthCy; ++i )
    1938             :     {
    1939     2878920 :         if ( GT_32( limit_fx, Cy_hat_diag_fx[i] ) ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
    1940             :         {
    1941       11610 :             Cy_hat_diag_fx[i] = limit_fx;
    1942       11610 :             move32();
    1943             :         }
    1944     2878920 :         IF( Cy_diag_fx[i] )
    1945             :         {
    1946     1875175 :             IF( Cy_hat_diag_fx[i] )
    1947             :             {
    1948     1875175 :                 G_hat_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], Cy_hat_diag_fx[i], &temp_e[i] );
    1949     1875175 :                 move32();
    1950     1875175 :                 temp_e[i] = add( temp_e[i], sub( Cy_diag_e, Cy_hat_diag_e ) );
    1951     1875175 :                 move16();
    1952     1875175 :                 G_hat_fx[i] = Sqrt32( G_hat_fx[i], &temp_e[i] );
    1953     1875175 :                 move32();
    1954             :             }
    1955             :             ELSE
    1956             :             {
    1957           0 :                 G_hat_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], EPSILON_FX_M, &temp_e[i] );
    1958           0 :                 move32();
    1959           0 :                 temp_e[i] = add( temp_e[i], sub( Cy_diag_e, EPSILON_FX_E ) );
    1960           0 :                 move16();
    1961           0 :                 G_hat_fx[i] = Sqrt32( G_hat_fx[i], &temp_e[i] );
    1962           0 :                 move32();
    1963             :             }
    1964             :         }
    1965             :         ELSE
    1966             :         {
    1967     1003745 :             G_hat_fx[i] = 0;
    1968     1003745 :             move32();
    1969     1003745 :             temp_e[i] = 0;
    1970     1003745 :             move16();
    1971             :         }
    1972             :     }
    1973      315180 :     G_hat_e = temp_e[0];
    1974      315180 :     move16();
    1975     2878920 :     FOR( i = 1; i < lengthCy; i++ )
    1976             :     {
    1977     2563740 :         G_hat_e = s_max( G_hat_e, temp_e[i] );
    1978             :     }
    1979     3194100 :     FOR( i = 0; i < lengthCy; i++ )
    1980             :     {
    1981     2878920 :         G_hat_fx[i] = L_shr_r( G_hat_fx[i], sub( G_hat_e, temp_e[i] ) ); // Q(31-G_hat_e)
    1982     2878920 :         move32();
    1983             :     }
    1984             : 
    1985             :     /************************ Formulate optimal P **********************/
    1986             : 
    1987             :     /* Computing the input matrix Kx'*Q'*G_hat'*Ky */
    1988      315180 :     diag_matrix_product_fx( Kx_fx, Kx_e, lengthCx, Q_fx, Q_e, lengthCy, lengthCx, 1, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
    1989             : 
    1990      315180 :     matrix_diag_product_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCx, lengthCy, 0, G_hat_fx, G_hat_e, lengthCy, mat_mult_buffer2_fx, &mat_mult_buffer2_e );
    1991             : 
    1992      315180 :     matrix_product_mant_exp_fx( mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCy, 0, Ky_fx, Ky_e, lengthCy, num_responses, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
    1993             : 
    1994      315180 :     IF( LT_16( lengthCx, num_responses ) )
    1995             :     {
    1996        3600 :         mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 1 );
    1997             : 
    1998        3600 :         nL = num_responses;
    1999        3600 :         move16();
    2000        3600 :         nC = lengthCx;
    2001        3600 :         move16();
    2002        3600 :         svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, svd_s_buffer_e, nL, nC );
    2003             :     }
    2004             :     ELSE
    2005             :     {
    2006      311580 :         mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 0 );
    2007             : 
    2008      311580 :         nL = lengthCx;
    2009      311580 :         move16();
    2010      311580 :         nC = num_responses;
    2011      311580 :         move16();
    2012      311580 :         svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, nL, nC );
    2013             :     }
    2014             : 
    2015             :     /* Actually Processing P */
    2016             : 
    2017             :     /* can be skipped: lambda is always column-truncated identity matrix, so this operation just truncates V to num_input_channel columns */
    2018      315180 :     svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, num_responses, lengthCx );
    2019      315180 :     mat_mult_buffer1_e = 0;
    2020      315180 :     move16();
    2021      315180 :     svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
    2022      315180 :     mat_mult_buffer2_e = 0;
    2023      315180 :     move16();
    2024             : 
    2025      315180 :     matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, num_responses, lengthCx, 0, mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    2026             : 
    2027             :     /************************ Formulate M **********************/
    2028             : 
    2029      315180 :     matrix_product_mant_exp_fx( Ky_fx, Ky_e, lengthCy, num_responses, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, num_responses, lengthCx, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
    2030             : 
    2031      315180 :     matrix_diag_product_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, Kx_reg_inv_fx, Kx_reg_inv_e, lengthCx, mixing_matrix_fx, mixing_matrix_e );
    2032             : 
    2033             :     /*********************** Energy Compensation ****************/
    2034             : 
    2035             :     /* Compute Cy_tilde = M*Cx*M' */
    2036      315180 :     matrix_diag_product_fx( mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 0, Cx_diag_fx, Cx_diag_e, lengthCx, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
    2037             : 
    2038      315180 :     matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 1, mat_mult_buffer2_fx, &mat_mult_buffer2_e );
    2039             : 
    2040      315180 :     IF( EQ_16( energy_compensation_flag, 1 ) )
    2041             :     {
    2042      315180 :         adj_fx = svd_s_buffer_fx;
    2043      315180 :         Cy_tilde_p_fx = mat_mult_buffer2_fx;
    2044     3194100 :         FOR( i = 0; i < lengthCy; ++i )
    2045             :         {
    2046             :             /* Avoid correction for very small energies, main diagonal elements of Cy_tilde_p may be negative */
    2047     2878920 :             IF( Cy_tilde_p_fx[( i + ( i * lengthCy ) )] < 0 )
    2048             :             {
    2049           0 :                 adj_fx[i] = MAX_32;
    2050           0 :                 move32();
    2051           0 :                 temp_e[i] = 0;
    2052           0 :                 move16();
    2053             :             }
    2054             :             ELSE
    2055             :             {
    2056     2878920 :                 IF( Cy_diag_fx[i] )
    2057             :                 {
    2058     1875175 :                     IF( Cy_tilde_p_fx[i + ( i * lengthCy )] )
    2059             :                     {
    2060     1875155 :                         adj_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], Cy_tilde_p_fx[i + ( i * lengthCy )], &temp_e[i] );
    2061     1875155 :                         move32();
    2062     1875155 :                         temp_e[i] = add( temp_e[i], sub( Cy_diag_e, mat_mult_buffer2_e ) );
    2063     1875155 :                         move16();
    2064     1875155 :                         adj_fx[i] = Sqrt32( adj_fx[i], &temp_e[i] );
    2065     1875155 :                         move32();
    2066             :                     }
    2067             :                     ELSE
    2068             :                     {
    2069          20 :                         adj_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], EPSILON_FX_M, &temp_e[i] );
    2070          20 :                         move32();
    2071          20 :                         temp_e[i] = add( temp_e[i], sub( Cy_diag_e, EPSILON_FX_E ) );
    2072          20 :                         move16();
    2073          20 :                         adj_fx[i] = Sqrt32( adj_fx[i], &temp_e[i] );
    2074          20 :                         move32();
    2075             :                     }
    2076             :                 }
    2077             :                 ELSE
    2078             :                 {
    2079     1003745 :                     adj_fx[i] = 0;
    2080     1003745 :                     move32();
    2081     1003745 :                     temp_e[i] = 0;
    2082     1003745 :                     move16();
    2083             :                 }
    2084             :             }
    2085             : 
    2086     2878920 :             Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, temp_e[i] ) );
    2087     2878920 :             IF( GT_32( adj_fx[i], temp ) )
    2088             :             {
    2089        1549 :                 adj_fx[i] = MAX_32;
    2090        1549 :                 move32();
    2091        1549 :                 temp_e[i] = 2;
    2092        1549 :                 move16();
    2093             :             }
    2094             :         }
    2095      315180 :         adj_e = temp_e[0];
    2096      315180 :         move16();
    2097     2878920 :         FOR( i = 1; i < lengthCy; i++ )
    2098             :         {
    2099     2563740 :             adj_e = s_max( adj_e, temp_e[i] );
    2100             :         }
    2101     3194100 :         FOR( i = 0; i < lengthCy; i++ )
    2102             :         {
    2103     2878920 :             adj_fx[i] = L_shr_r( adj_fx[i], sub( adj_e, temp_e[i] ) ); // Q(31-adj_e)
    2104     2878920 :             move32();
    2105             :         }
    2106             : 
    2107      315180 :         diag_matrix_product_fx( adj_fx, adj_e, lengthCy, mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    2108             : 
    2109      315180 :         Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCy, lengthCx ) );
    2110      315180 :         *mixing_matrix_e = mat_mult_buffer3_e;
    2111      315180 :         move16();
    2112             :     }
    2113             : 
    2114      315180 :     pop_wmops();
    2115             : 
    2116      315180 :     return out;
    2117             : }

Generated by: LCOV version 1.14