LCOV - code coverage report
Current view: top level - lib_dec - ivas_dirac_output_synthesis_cov_fx.c (source / functions) Hit Total Coverage
Test: Coverage on main @ e70d960493906e094628337ac73d3c408863a5e6 Lines: 835 879 95.0 %
Date: 2025-09-17 02:22:58 Functions: 9 9 100.0 %

          Line data    Source code
       1             : /******************************************************************************************************
       2             : 
       3             :    (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
       4             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
       5             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
       6             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
       7             :    contributors to this repository. All Rights Reserved.
       8             : 
       9             :    This software is protected by copyright law and by international treaties.
      10             :    The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
      11             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
      12             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
      13             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
      14             :    contributors to this repository retain full ownership rights in their respective contributions in
      15             :    the software. This notice grants no license of any kind, including but not limited to patent
      16             :    license, nor is any license granted by implication, estoppel or otherwise.
      17             : 
      18             :    Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
      19             :    contributions.
      20             : 
      21             :    This software is provided "AS IS", without any express or implied warranties. The software is in the
      22             :    development stage. It is intended exclusively for experts who have experience with such software and
      23             :    solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
      24             :    and fitness for a particular purpose are hereby disclaimed and excluded.
      25             : 
      26             :    Any dispute, controversy or claim arising under or in relation to providing this software shall be
      27             :    submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
      28             :    accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
      29             :    the United Nations Convention on Contracts on the International Sales of Goods.
      30             : 
      31             : *******************************************************************************************************/
      32             : 
      33             : 
      34             : #include "basop_util.h"
      35             : #include "enh32.h"
      36             : #include <stdint.h>
      37             : #include <string.h>
      38             : #include <stdio.h>
      39             : #include <stdlib.h>
      40             : #include <assert.h>
      41             : #include <math.h>
      42             : #include "options.h"
      43             : #include "cnst.h"
      44             : #include "rom_enc.h"
      45             : #include "rom_com.h"
      46             : #include "prot_fx.h"
      47             : #include "ivas_stat_dec.h"
      48             : #include "ivas_cnst.h"
      49             : #include "ivas_rom_com.h"
      50             : #include "ivas_rom_dec.h"
      51             : #include "wmc_auto.h"
      52             : #include "rom_dec.h"
      53             : #include "ivas_prot_fx.h"
      54             : 
      55             : /*-----------------------------------------------------------------------*
      56             :  * Local constants
      57             :  *-----------------------------------------------------------------------*/
      58             : 
      59             : #define SQRT_EPSILON_FX 68 /* Q31 square root of EPSILON */
      60             : 
      61             : /*-------------------------------------------------------------------*
      62             :  * ivas_dirac_dec_output_synthesis_cov_open()
      63             :  *
      64             :  * Sets up the state and parameters for the Covariance Synthesis
      65             :  *-------------------------------------------------------------------*/
      66             : 
      67         341 : ivas_error ivas_dirac_dec_output_synthesis_cov_open_fx(
      68             :     DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params,   /* i/o: handle for the covariance synthesis parameters                                                        */
      69             :     DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state, /* i/o: hanlde for the covariance synthesis state                                                             */
      70             :     const Word16 max_band_decorr,                                     /* i  : uppermost frequency band where decorrelation is applied                                               */
      71             :     const Word16 interp_length,                                       /* i  : length for interpolating the mixing matrices in time slots                                            */
      72             :     const Word16 num_param_bands,                                     /* i  : number of parameter bands                                                                             */
      73             :     const Word16 num_param_bands_residual,                            /* i  : number of parameter bands with a residual mixing matrix (i.e. decorrelation                           */
      74             :     const Word16 nchan_in,                                            /* i  : number of input (transport) channels                                                                  */
      75             :     const Word16 nchan_out,                                           /* i  : number of output channels                                                                             */
      76             :     const Word32 *proto_matrix                                        /* i  : the prototype (upmix) matrix (only used if mode == 1)  Q(15-proto_matrix_e)                                               */
      77             : )
      78             : {
      79             :     Word16 idx;
      80             : 
      81         341 :     h_dirac_output_synthesis_params->max_band_decorr = max_band_decorr;
      82         341 :     move16();
      83             : 
      84             :     /*-----------------------------------------------------------------*
      85             :      * memory allocation
      86             :      *-----------------------------------------------------------------*/
      87             : 
      88             :     /* buffer length and interpolator */
      89         341 :     h_dirac_output_synthesis_params->alpha_synthesis_fx = NULL;
      90         341 :     IF( ( h_dirac_output_synthesis_params->proto_matrix_fx = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
      91             :     {
      92           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
      93             :     }
      94         341 :     h_dirac_output_synthesis_params->proto_matrix_len = imult1616( nchan_out, nchan_in );
      95         341 :     move16();
      96             :     /* cov buffers */
      97        4445 :     FOR( idx = 0; idx < num_param_bands; idx++ )
      98             :     {
      99        4104 :         IF( ( h_dirac_output_synthesis_state->cx_old_fx[idx] = (Word32 *) malloc( nchan_in * nchan_in * sizeof( Word32 ) ) ) == NULL )
     100             :         {
     101           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     102             :         }
     103        4104 :         h_dirac_output_synthesis_state->cx_old_len = imult1616( nchan_in, nchan_in );
     104        4104 :         IF( ( h_dirac_output_synthesis_state->cy_old_fx[idx] = (Word32 *) malloc( nchan_out * nchan_out * sizeof( Word32 ) ) ) == NULL )
     105             :         {
     106           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     107             :         }
     108        4104 :         IF( ( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
     109             :         {
     110           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     111             :         }
     112        4104 :         set_zero_fx( h_dirac_output_synthesis_state->cx_old_fx[idx], imult1616( nchan_in, nchan_in ) );
     113        4104 :         set_zero_fx( h_dirac_output_synthesis_state->cy_old_fx[idx], imult1616( nchan_out, nchan_out ) );
     114        4104 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx], imult1616( nchan_out, nchan_in ) );
     115             : 
     116        4104 :         IF( ( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
     117             :         {
     118           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     119             :         }
     120        4104 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_fx[idx], imult1616( nchan_out, nchan_in ) );
     121        4104 :         h_dirac_output_synthesis_state->mixing_matrix_len = i_mult( nchan_out, nchan_in );
     122        4104 :         move16();
     123             :     }
     124       16697 :     FOR( ; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
     125             :     {
     126       16356 :         h_dirac_output_synthesis_state->cx_old_fx[idx] = NULL;
     127       16356 :         h_dirac_output_synthesis_state->cy_old_fx[idx] = NULL;
     128       16356 :         h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = NULL;
     129       16356 :         h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = NULL;
     130             :     }
     131             : 
     132        3563 :     FOR( idx = 0; idx < num_param_bands_residual; idx++ )
     133             :     {
     134        3222 :         IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = (Word32 *) malloc( imult1616( nchan_out, nchan_out ) * sizeof( Word32 ) ) ) == NULL )
     135             :         {
     136           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     137             :         }
     138        3222 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx], imult1616( nchan_out, nchan_out ) );
     139             : 
     140        3222 :         IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = (Word32 *) malloc( imult1616( nchan_out, nchan_out ) * sizeof( Word32 ) ) ) == NULL )
     141             :         {
     142           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     143             :         }
     144        3222 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx], imult1616( nchan_out, nchan_out ) );
     145        3222 :         h_dirac_output_synthesis_state->mixing_matrix_res_len = i_mult( nchan_out, nchan_out );
     146        3222 :         move16();
     147             :     }
     148       17579 :     FOR( ; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
     149             :     {
     150       17238 :         h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = NULL;
     151       17238 :         h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = NULL;
     152             :     }
     153             : 
     154         341 :     IF( ( h_dirac_output_synthesis_state->cx_old_e = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     155             :     {
     156           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     157             :     }
     158         341 :     IF( ( h_dirac_output_synthesis_state->cy_old_e = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     159             :     {
     160           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     161             :     }
     162             : 
     163         341 :     set16_fx( h_dirac_output_synthesis_state->cx_old_e, 0, CLDFB_NO_CHANNELS_MAX );
     164         341 :     set16_fx( h_dirac_output_synthesis_state->cy_old_e, 0, CLDFB_NO_CHANNELS_MAX );
     165             : 
     166         341 :     IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     167             :     {
     168           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     169             :     }
     170         341 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_exp, 0, CLDFB_NO_CHANNELS_MAX );
     171             : 
     172         341 :     IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     173             :     {
     174           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     175             :     }
     176         341 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
     177             : 
     178         341 :     IF( ( h_dirac_output_synthesis_state->mixing_matrix_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     179             :     {
     180           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     181             :     }
     182         341 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_exp, 0, CLDFB_NO_CHANNELS_MAX );
     183             : 
     184         341 :     IF( ( h_dirac_output_synthesis_state->mixing_matrix_old_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     185             :     {
     186           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     187             :     }
     188         341 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
     189             : 
     190             :     /*-----------------------------------------------------------------*
     191             :      * prepare processing parameters
     192             :      *-----------------------------------------------------------------*/
     193             : 
     194             :     /* compute interpolator */
     195         341 :     IF( ( h_dirac_output_synthesis_params->interpolator_fx = (Word16 *) malloc( interp_length * sizeof( Word16 ) ) ) == NULL )
     196             :     {
     197           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     198             :     }
     199       11253 :     FOR( idx = 1; idx <= interp_length; ++idx )
     200             :     {
     201       10912 :         h_dirac_output_synthesis_params->interpolator_fx[idx - 1] = div_s( idx, interp_length );
     202       10912 :         move16();
     203             :     }
     204         341 :     Copy32( proto_matrix, h_dirac_output_synthesis_params->proto_matrix_fx, imult1616( nchan_in, nchan_out ) );
     205         341 :     h_dirac_output_synthesis_params->proto_matrix_e = 5;
     206         341 :     move16();
     207         341 :     return IVAS_ERR_OK;
     208             : }
     209             : 
     210             : 
     211             : /*-------------------------------------------------------------------*
     212             :  * ivas_dirac_dec_output_synthesis_get_interpolator_fx()
     213             :  *
     214             :  *
     215             :  *-------------------------------------------------------------------*/
     216             : 
     217      328605 : void ivas_dirac_dec_output_synthesis_get_interpolator_fx(
     218             :     DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params, /* i/o: handle for the covariance synthesis parameters  */
     219             :     const UWord16 interp_length                                     /* i  : interpolator length                             */
     220             : )
     221             : {
     222             :     Word16 idx;
     223      328605 :     Word16 tmp, exp_diff = 0;
     224      328605 :     move16();
     225             : 
     226     1633254 :     FOR( idx = 1; idx <= interp_length; ++idx )
     227             :     {
     228     1304649 :         tmp = BASOP_Util_Divide3232_Scale( L_deposit_l( idx ), L_deposit_l( interp_length ), &exp_diff ); // (Q15 - exp_diff)
     229     1304649 :         h_dirac_output_synthesis_params->interpolator_fx[idx - 1] = shl_sat( tmp, exp_diff );             // Q15
     230     1304649 :         move16();
     231             :     }
     232             : 
     233      328605 :     return;
     234             : }
     235             : 
     236             : 
     237             : /*-------------------------------------------------------------------*
     238             :  * ivas_dirac_dec_output_synthesis_cov_init()
     239             :  *
     240             :  * initialize the states for the covariance synthesis
     241             :  *-------------------------------------------------------------------*/
     242         341 : void ivas_dirac_dec_output_synthesis_cov_init_fx(
     243             :     DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state, /* i/o: pointer to the state of the covariance synthesis                            */
     244             :     const Word16 nchan_in,                                            /* i  : number of input (tranport) channels                                         */
     245             :     const Word16 nchan_out,                                           /* i  : number of output channels                                                   */
     246             :     const Word16 n_param_bands,                                       /* i  : number of total parameter bands                                             */
     247             :     const Word16 n_param_bands_res                                    /* i  : number of parameter bands with a residual mixing matrix (i.e. decorrelation */
     248             : )
     249             : {
     250             : 
     251             :     Word16 idx;
     252             : 
     253             :     /* initialize buffers */
     254        4445 :     FOR( idx = 0; idx < n_param_bands; idx++ )
     255             :     {
     256        4104 :         set_zero_fx( h_dirac_output_synthesis_state->cx_old_fx[idx], imult1616( nchan_in, nchan_in ) );
     257        4104 :         set_zero_fx( h_dirac_output_synthesis_state->cy_old_fx[idx], imult1616( nchan_out, nchan_out ) );
     258        4104 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx], imult1616( nchan_out, nchan_in ) );
     259        4104 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_fx[idx], imult1616( nchan_out, nchan_in ) );
     260             :     }
     261             : 
     262        3563 :     FOR( idx = 0; idx < n_param_bands_res; idx++ )
     263             :     {
     264        3222 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx], imult1616( nchan_out, nchan_out ) );
     265        3222 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx], imult1616( nchan_out, nchan_out ) );
     266             :     }
     267             : 
     268             : 
     269         341 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
     270         341 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_exp, 0, CLDFB_NO_CHANNELS_MAX );
     271         341 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
     272         341 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_exp, 0, CLDFB_NO_CHANNELS_MAX );
     273             : 
     274         341 :     return;
     275             : }
     276             : 
     277             : 
     278             : /*-------------------------------------------------------------------*
     279             :  * ivas_dirac_dec_output_synthesis_cov_close()
     280             :  *
     281             :  * deallocate dynamic memory in the covariance synthesis state
     282             :  *-------------------------------------------------------------------*/
     283             : 
     284         341 : void ivas_dirac_dec_output_synthesis_cov_close_fx(
     285             :     DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params,  /* i  : handle for the covariance synthesis parameters  */
     286             :     DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state /* i/o: handle for the covariance synthesis state       */
     287             : )
     288             : {
     289             :     Word16 idx;
     290             : 
     291             :     /*-----------------------------------------------------------------*
     292             :      * memory deallocation
     293             :      *-----------------------------------------------------------------*/
     294             : 
     295             :     /* free interpolator */
     296         341 :     IF( h_dirac_output_synthesis_params->interpolator_fx != NULL )
     297             :     {
     298         341 :         free( h_dirac_output_synthesis_params->interpolator_fx );
     299         341 :         h_dirac_output_synthesis_params->interpolator_fx = NULL;
     300             :     }
     301             : 
     302             :     /* free alpha */
     303         341 :     IF( h_dirac_output_synthesis_params->alpha_synthesis_fx != NULL )
     304             :     {
     305           0 :         free( h_dirac_output_synthesis_params->alpha_synthesis_fx );
     306           0 :         h_dirac_output_synthesis_params->alpha_synthesis_fx = NULL;
     307             :     }
     308             : 
     309             :     /* free proto_matrix */
     310         341 :     IF( h_dirac_output_synthesis_params->proto_matrix_fx != NULL )
     311             :     {
     312         341 :         free( h_dirac_output_synthesis_params->proto_matrix_fx );
     313         341 :         h_dirac_output_synthesis_params->proto_matrix_fx = NULL;
     314             :     }
     315             : 
     316             : 
     317         341 :     IF( h_dirac_output_synthesis_state->cx_old_e != NULL )
     318             :     {
     319         341 :         free( h_dirac_output_synthesis_state->cx_old_e );
     320         341 :         h_dirac_output_synthesis_state->cx_old_e = NULL;
     321             :     }
     322         341 :     IF( h_dirac_output_synthesis_state->cy_old_e != NULL )
     323             :     {
     324         341 :         free( h_dirac_output_synthesis_state->cy_old_e );
     325         341 :         h_dirac_output_synthesis_state->cy_old_e = NULL;
     326             :     }
     327             : 
     328             :     /* free cov buffers */
     329       20801 :     FOR( idx = 0; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
     330             :     {
     331       20460 :         IF( h_dirac_output_synthesis_state->cx_old_fx[idx] != NULL )
     332             :         {
     333        4104 :             free( h_dirac_output_synthesis_state->cx_old_fx[idx] );
     334        4104 :             h_dirac_output_synthesis_state->cx_old_fx[idx] = NULL;
     335             :         }
     336             : 
     337       20460 :         IF( h_dirac_output_synthesis_state->cy_old_fx[idx] != NULL )
     338             :         {
     339        4104 :             free( h_dirac_output_synthesis_state->cy_old_fx[idx] );
     340        4104 :             h_dirac_output_synthesis_state->cy_old_fx[idx] = NULL;
     341             :         }
     342             : 
     343       20460 :         IF( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] != NULL )
     344             :         {
     345        4104 :             free( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] );
     346        4104 :             h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = NULL;
     347             :         }
     348             : 
     349       20460 :         IF( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] != NULL )
     350             :         {
     351        3222 :             free( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] );
     352        3222 :             h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = NULL;
     353             :         }
     354             : 
     355       20460 :         IF( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] != NULL )
     356             :         {
     357        4104 :             free( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] );
     358        4104 :             h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = NULL;
     359             :         }
     360             : 
     361       20460 :         IF( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] != NULL )
     362             :         {
     363        3222 :             free( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] );
     364        3222 :             h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = NULL;
     365             :         }
     366             :     }
     367             : 
     368         341 :     IF( h_dirac_output_synthesis_state->mixing_matrix_old_exp != NULL )
     369             :     {
     370         341 :         free( h_dirac_output_synthesis_state->mixing_matrix_old_exp );
     371         341 :         h_dirac_output_synthesis_state->mixing_matrix_old_exp = NULL;
     372             :     }
     373             : 
     374         341 :     IF( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp != NULL )
     375             :     {
     376         341 :         free( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp );
     377         341 :         h_dirac_output_synthesis_state->mixing_matrix_res_old_exp = NULL;
     378             :     }
     379             : 
     380         341 :     IF( h_dirac_output_synthesis_state->mixing_matrix_exp != NULL )
     381             :     {
     382         341 :         free( h_dirac_output_synthesis_state->mixing_matrix_exp );
     383         341 :         h_dirac_output_synthesis_state->mixing_matrix_exp = NULL;
     384             :     }
     385             : 
     386         341 :     IF( h_dirac_output_synthesis_state->mixing_matrix_res_exp != NULL )
     387             :     {
     388         341 :         free( h_dirac_output_synthesis_state->mixing_matrix_res_exp );
     389         341 :         h_dirac_output_synthesis_state->mixing_matrix_res_exp = NULL;
     390             :     }
     391         341 :     return;
     392             : }
     393             : 
     394             : /*-------------------------------------------------------------------*
     395             :  * ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot()
     396             :  *
     397             :  * collect the multi channel input covariance for one filter bank time slot
     398             :  *-------------------------------------------------------------------*/
     399             : 
     400             : /*-------------------------------------------------------------------*
     401             :  * ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot()
     402             :  *
     403             :  * collect the multi channel input covariance for one filter bank time slot
     404             :  *-------------------------------------------------------------------*/
     405     2084335 : void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx(
     406             :     Word32 *RealBuffer_fx,                                                          /* i  : input channel filter bank samples (real part)          Q(31- RealBuffer_e)*/
     407             :     Word16 RealBuffer_e,                                                            /* i  : exponent input channel filter bank samples (real part)*/
     408             :     Word32 *ImagBuffer_fx,                                                          /* i  : input channel filter bank samples (imaginary part      Q(ImagBuffer_e)*/
     409             :     Word16 ImagBuffer_e,                                                            /* i  : exponent input channel filter bank samples (real part)*/
     410             :     Word32 cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS],      /* o  : accumulated input covariance (real part)              Q(31- cx_e)*/
     411             :     Word16 *cx_e,                                                                   /* i  : exponent for accumulated input covariance (real part) */
     412             :     Word32 cx_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS], /* o  : accumulated input covariance (imaginary part)         Q(31- cx_imag_e)*/
     413             :     Word16 *cx_imag_e,                                                              /* i : exponent accumulated input covariance (imag part)      */
     414             :     PARAM_MC_DEC_HANDLE hParamMC,                                                   /* i  : handle to Parametric MC state                         */
     415             :     const Word16 param_band,                                                        /* i  : parameter band                                        */
     416             :     const Word16 nchan_in                                                           /* i  : number of input channels                              */
     417             : )
     418             : {
     419             :     Word16 band_idx, ch_idx;
     420             :     Word16 brange[2];
     421             :     Word32 real_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS];
     422             :     Word16 real_in_e;
     423             :     Word32 imag_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS];
     424             :     Word16 imag_in_e;
     425             :     Word32 real_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
     426             :     Word32 imag_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
     427             :     Word16 output_e;
     428             :     Word16 tmp1_e, tmp2_e, shift_imag, shift_real;
     429             :     Word16 band, num_bands;
     430             :     Word16 cx_fx_norm, cx_imag_fx_norm;
     431             :     /* estimate input covariance */
     432             :     /* Already stack here instead of in the process_subframe */
     433             : 
     434             :     /* collect input frame */
     435     2084335 :     brange[0] = hParamMC->band_grouping[param_band];
     436     2084335 :     move16();
     437     2084335 :     brange[1] = hParamMC->band_grouping[param_band + 1];
     438     2084335 :     move16();
     439     2084335 :     num_bands = sub( brange[1], brange[0] );
     440             : 
     441    10914375 :     FOR( band_idx = 0; band_idx < num_bands; band_idx++ )
     442             :     {
     443     8830040 :         band = add( brange[0], band_idx );
     444    26558880 :         FOR( ch_idx = 0; ch_idx < nchan_in; ch_idx++ )
     445             :         {
     446    17728840 :             real_in_buffer_fx[band_idx + num_bands * ch_idx] = RealBuffer_fx[band + hParamMC->num_freq_bands * ch_idx];
     447    17728840 :             move32();
     448    17728840 :             imag_in_buffer_fx[band_idx + num_bands * ch_idx] = ImagBuffer_fx[band + hParamMC->num_freq_bands * ch_idx];
     449    17728840 :             move32();
     450             :         }
     451             :     }
     452             : 
     453     2084335 :     real_in_e = RealBuffer_e;
     454     2084335 :     move16();
     455     2084335 :     imag_in_e = ImagBuffer_e;
     456     2084335 :     move16();
     457             : 
     458     2084335 :     Word16 buf_len = imult1616( num_bands, nchan_in );
     459             : 
     460     2084335 :     shift_real = sub( L_norm_arr( real_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
     461     2084335 :     shift_imag = sub( L_norm_arr( imag_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
     462             : 
     463     2084335 :     real_in_e = sub( real_in_e, shift_real );
     464     2084335 :     imag_in_e = sub( imag_in_e, shift_imag );
     465             : 
     466             : 
     467     2084335 :     output_e = s_max( real_in_e, imag_in_e );
     468             : 
     469     2084335 :     scale_sig32( real_in_buffer_fx, buf_len, sub( RealBuffer_e, output_e ) );
     470     2084335 :     scale_sig32( imag_in_buffer_fx, buf_len, sub( ImagBuffer_e, output_e ) );
     471             : 
     472     2084335 :     cmplx_matrix_square_fx( real_in_buffer_fx, imag_in_buffer_fx, num_bands, nchan_in, real_buffer_fx, imag_buffer_fx, output_e, &output_e );
     473     2084335 :     v_add_fixed_me( cx_fx, *cx_e, real_buffer_fx, output_e, cx_fx, &tmp1_e, imult1616( nchan_in, nchan_in ), 1 );
     474             : 
     475     2084335 :     v_add_fixed_me( cx_imag_fx, *cx_imag_e, imag_buffer_fx, output_e, cx_imag_fx, &tmp2_e, imult1616( nchan_in, nchan_in ), 1 );
     476             : 
     477     2084335 :     cx_fx_norm = L_norm_arr( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
     478     2084335 :     cx_imag_fx_norm = L_norm_arr( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
     479             : 
     480     2084335 :     scale_sig32( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_fx_norm );
     481     2084335 :     scale_sig32( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_imag_fx_norm );
     482             : 
     483     2084335 :     *cx_e = sub( tmp1_e, cx_fx_norm );
     484     2084335 :     move16();
     485     2084335 :     *cx_imag_e = sub( tmp2_e, cx_imag_fx_norm );
     486     2084335 :     move16();
     487             : 
     488     2084335 :     return;
     489             : }
     490             : 
     491             : /*-------------------------------------------------------------------*
     492             :  * ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot()
     493             :  *
     494             :  * synthesize one filter bank slot of multi channel output filter bank
     495             :  * samples with the covariance synthesis
     496             :  *-------------------------------------------------------------------*/
     497             : 
     498      163667 : void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(
     499             :     Word32 *Cldfb_RealBuffer_in_fx,                                                   /*Q6*/
     500             :     Word32 *Cldfb_ImagBuffer_in_fx,                                                   /*Q6*/
     501             :     Word32 Cldfb_RealBuffer_fx[][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX], /* o  : output channel filter bank samples (real part)                   Q6*/
     502             :     Word32 Cldfb_ImagBuffer_fx[][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX], /* o  : output channel filter bank samples (imaginary part)              Q6*/
     503             :     Word32 *mixing_matrix_fx[],                                                       /* i  : parameter band wise mixing matrices (direct part)                Q(31-mixing_matrix_e)*/
     504             :     Word16 *mixing_matrix_e,                                                          /* i  : parameter band wise mixing matrices (direct part)                */
     505             :     Word32 *mixing_matrix_res_fx[],                                                   /* i  : parameter band wise mixing matrices (residual part)              Q(31-mixing_matrix_res_e)*/
     506             :     Word16 *mixing_matrix_res_e,                                                      /* i  : parameter band wise mixing matrices (residual part)              */
     507             :     const UWord16 slot_idx_sfr,                                                       /* i  : time slot index for the current slot within the current subframe */
     508             :     const UWord16 slot_idx_tot,                                                       /* i  : time slot index for the current slot within the frame            */
     509             :     const Word16 nX,                                                                  /* i  : number of input channels                                         */
     510             :     const Word16 nY,                                                                  /* i  : number of output channels                                        */
     511             :     PARAM_MC_DEC_HANDLE hParamMC                                                      /* i  : handle to the Parametric MC decoder state                        */
     512             : )
     513             : {
     514             :     Word16 param_band_idx, band, ch_idx;
     515             :     Word16 have_residual;
     516             :     Word16 brange[2];
     517             :     DIRAC_OUTPUT_SYNTHESIS_COV_STATE h_synthesis_state;
     518             :     Word32 mixing_matrix_smooth_fx[MAX_CICP_CHANNELS * PARAM_MC_MAX_TRANSPORT_CHANS];
     519             :     Word16 mixing_matrix_smooth_e;
     520             :     Word32 mixing_matrix_res_smooth_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
     521      163667 :     Word16 mixing_matrix_res_smooth_e = 0;
     522      163667 :     move16();
     523             :     Word32 mixing_matrix_buffer_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
     524             :     Word16 mixing_matrix_buffer_e;
     525             :     Word32 input_f_real_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
     526             :     Word32 input_f_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
     527             : 
     528             :     Word32 diff_f_real_fx[MAX_CICP_CHANNELS];
     529             :     Word32 diff_f_imag_fx[MAX_CICP_CHANNELS];
     530             : 
     531      163667 :     h_synthesis_state = hParamMC->h_output_synthesis_cov_state;
     532             : 
     533      163667 :     set_zero_fx( input_f_real_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
     534      163667 :     set_zero_fx( input_f_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
     535             : 
     536      163667 :     set_zero_fx( diff_f_real_fx, MAX_CICP_CHANNELS );
     537      163667 :     set_zero_fx( diff_f_imag_fx, MAX_CICP_CHANNELS );
     538             : 
     539     2288286 :     FOR( param_band_idx = 0; param_band_idx < hParamMC->num_param_bands_synth; param_band_idx++ )
     540             :     {
     541             :         /* final mixing */
     542     2124619 :         have_residual = 0;
     543     2124619 :         move16();
     544     2124619 :         brange[0] = hParamMC->band_grouping[param_band_idx];
     545     2124619 :         move16();
     546     2124619 :         brange[1] = hParamMC->band_grouping[( param_band_idx + 1 )];
     547     2124619 :         move16();
     548             : 
     549     2124619 :         if ( LT_16( brange[0], hParamMC->h_output_synthesis_params.max_band_decorr ) )
     550             :         {
     551     1709608 :             have_residual = 1;
     552     1709608 :             move16();
     553             :         }
     554             : 
     555     2124619 :         v_multc_fixed( mixing_matrix_fx[param_band_idx], L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ), mixing_matrix_smooth_fx, imult1616( nY, nX ) );
     556     2124619 :         mixing_matrix_smooth_e = mixing_matrix_e[param_band_idx]; // interpolator is W16
     557     2124619 :         move16();
     558             : 
     559     2124619 :         v_multc_fixed( h_synthesis_state.mixing_matrix_old_fx[param_band_idx], L_sub( ONE_IN_Q31, L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ) ), mixing_matrix_buffer_fx, imult1616( nY, nX ) );
     560     2124619 :         mixing_matrix_buffer_e = h_synthesis_state.mixing_matrix_old_exp[param_band_idx]; // interpolator is W16
     561     2124619 :         move16();
     562             : 
     563     2124619 :         v_add_fixed_me( mixing_matrix_smooth_fx, mixing_matrix_smooth_e, mixing_matrix_buffer_fx, mixing_matrix_buffer_e, mixing_matrix_smooth_fx, &mixing_matrix_smooth_e, imult1616( nY, nX ), 0 );
     564             : 
     565     2124619 :         IF( have_residual )
     566             :         {
     567             :             /* residual mixing matrix interpolation*/
     568             : 
     569     1709608 :             v_multc_fixed( mixing_matrix_res_fx[param_band_idx], L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ), mixing_matrix_res_smooth_fx, imult1616( nY, nY ) );
     570     1709608 :             mixing_matrix_res_smooth_e = mixing_matrix_res_e[param_band_idx]; // interpolator is W16
     571     1709608 :             move16();
     572             : 
     573     1709608 :             set_zero_fx( mixing_matrix_buffer_fx, imult1616( nY, nY ) );
     574     1709608 :             v_multc_fixed( h_synthesis_state.mixing_matrix_res_old_fx[param_band_idx], L_sub( ONE_IN_Q31, L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ) ), mixing_matrix_buffer_fx, imult1616( nY, nY ) );
     575     1709608 :             mixing_matrix_buffer_e = h_synthesis_state.mixing_matrix_res_old_exp[param_band_idx]; // interpolator is W16
     576     1709608 :             move16();
     577             : 
     578     1709608 :             v_add_fixed_me( mixing_matrix_res_smooth_fx, mixing_matrix_res_smooth_e, mixing_matrix_buffer_fx, mixing_matrix_buffer_e, mixing_matrix_res_smooth_fx, &mixing_matrix_res_smooth_e, imult1616( nY, nY ), 0 );
     579             :         }
     580             : 
     581             : 
     582    11119299 :         FOR( band = brange[0]; band < brange[1]; band++ )
     583             :         {
     584     8994680 :             assert( band >= 0 );
     585             : 
     586     8994680 :             IF( have_residual )
     587             :             {
     588             :                 /* collect diffuse prototypes */
     589     3273340 :                 assert( LT_16( band, hParamMC->h_output_synthesis_params.max_band_decorr ) );
     590    23265500 :                 FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
     591             :                 {
     592    19992160 :                     diff_f_real_fx[ch_idx] = Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band];
     593    19992160 :                     move32();
     594    19992160 :                     diff_f_imag_fx[ch_idx] = Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band];
     595    19992160 :                     move32();
     596             :                 }
     597             : 
     598             :                 /* apply residual mixing */
     599             :                 {
     600             :                     Word16 shifter;
     601     3273340 :                     shifter = sub( mixing_matrix_res_smooth_e, 32 );
     602    23265500 :                     FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
     603             :                     {
     604             :                         int i;
     605             :                         Word16 idx;
     606             :                         Word64 temp_real, temp_imag;
     607             : 
     608             : 
     609    19992160 :                         idx = ch_idx;
     610    19992160 :                         temp_real = 0;
     611    19992160 :                         temp_imag = 0;
     612    19992160 :                         move64();
     613    19992160 :                         move64();
     614   143222880 :                         for ( i = 0; i < nY; i++ )
     615             :                         {
     616   123230720 :                             temp_real = W_mac_32_32( temp_real, mixing_matrix_res_smooth_fx[idx], diff_f_real_fx[i] );
     617   123230720 :                             temp_imag = W_mac_32_32( temp_imag, mixing_matrix_res_smooth_fx[idx], diff_f_imag_fx[i] );
     618   123230720 :                             idx += nY;
     619             :                         }
     620    19992160 :                         Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = W_shl_sat_l( temp_real, shifter );
     621    19992160 :                         Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = W_shl_sat_l( temp_imag, shifter );
     622             :                     }
     623             :                 }
     624             :             }
     625             :             ELSE
     626             :             {
     627    40612700 :                 FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
     628             :                 {
     629    34891360 :                     Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = 0;
     630    34891360 :                     move32();
     631    34891360 :                     Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = 0;
     632    34891360 :                     move32();
     633             :                 }
     634             :             }
     635             : 
     636             :             /* collect input signals, still in cldfb buffers */
     637    27060840 :             FOR( ch_idx = 0; ch_idx < nX; ch_idx++ )
     638             :             {
     639             : 
     640    18066160 :                 input_f_real_fx[ch_idx] = Cldfb_RealBuffer_in_fx[ch_idx * hParamMC->num_freq_bands + band]; // Q6
     641    18066160 :                 move32();
     642    18066160 :                 input_f_imag_fx[ch_idx] = Cldfb_ImagBuffer_in_fx[ch_idx * hParamMC->num_freq_bands + band]; // Q6
     643    18066160 :                 move32();
     644             :             }
     645             : 
     646             :             /* apply mixing matrix */
     647             :             {
     648             :                 Word16 shifter;
     649     8994680 :                 shifter = 31 - mixing_matrix_smooth_e;
     650             : 
     651    63878200 :                 FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
     652             :                 {
     653             :                     int i;
     654             :                     Word16 idx;
     655             :                     Word64 temp_real, temp_imag;
     656             : 
     657             : 
     658    54883520 :                     idx = ch_idx;
     659    54883520 :                     temp_real = 0;
     660    54883520 :                     temp_imag = 0;
     661    54883520 :                     move64();
     662    54883520 :                     move64();
     663   165495360 :                     for ( i = 0; i < nX; i++ )
     664             :                     {
     665   110611840 :                         temp_real = W_add( temp_real, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_real_fx[i] ) );
     666   110611840 :                         temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_imag_fx[i] ) );
     667   110611840 :                         idx += nY;
     668             :                     }
     669    54883520 :                     Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_real, shifter ) ) );
     670    54883520 :                     move32();
     671    54883520 :                     Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_imag, shifter ) ) );
     672    54883520 :                     move32();
     673             :                 }
     674             :             }
     675             :         }
     676             :     }
     677             : 
     678      163667 :     return;
     679             : }
     680             : 
     681             : /*-------------------------------------------------------------------*
     682             :  * computeMixingMatrices()
     683             :  *
     684             :  * compute a mixing matrix using the convariance synthesis approach
     685             :  *-------------------------------------------------------------------*/
     686             : 
     687      132787 : Word16 computeMixingMatrices_fx(
     688             :     const Word16 num_inputs,  /* i  : number of input channels                                                                                      */
     689             :     const Word16 num_outputs, /* i  : number of output channels                                                                                     */
     690             :     const Word32 *Cx,         /* i  : input channel covariance matrix Q(31-Cx_e)                                                                               */
     691             :     Word16 Cx_e,
     692             :     const Word32 *Cy, /* i  : target covariance matrix Q(31-Cy_e)                                                                               */
     693             :     Word16 Cy_e,
     694             :     const Word32 *Q, /* i  : prototype matrix (usually a upmix matrix) Q_fx_e                                                                     */
     695             :     Word16 Q_fx_e,
     696             :     const Word16 energy_compensation_flag, /* i  : flag indicating that the energy compensation should be performed (i.e. no residual mixing matrix will follow) */
     697             :     const Word32 reg_Sx_fx,                /* i  : regularization factor for the input channel singular values                                                   */
     698             :     Word16 reg_Sx_e,
     699             :     const Word32 reg_ghat_fx, /* i  : regularization factor for the normalization matrix  Q(31-reg_ghat_e)                                                         */
     700             :     Word16 reg_ghat_e,
     701             :     Word32 *mixing_matrix_fx, /* o  : resulting mixing matrix       Q(31-mixing_matrix_out_e)                                                                                 */
     702             :     Word16 *mixing_matrix_out_e,
     703             :     Word32 *Cr_fx, /* o  : residual covariance matrix Q(31-Cr_e)                                                                                    */
     704             :     Word16 *Cr_e )
     705             : {
     706             :     Word16 i, j;
     707             :     Word16 out;
     708             :     Word16 nL, nC;
     709      132787 :     Word16 lengthCx = num_inputs;
     710      132787 :     Word16 lengthCy = num_outputs;
     711             :     Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
     712             :     Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     713             :     Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     714             :     Word32 Cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
     715             :     Word32 Cy_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
     716             :     Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
     717             :     Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
     718             :     Word16 Cx_fx_e;
     719             :     Word16 Cy_fx_e;
     720             : 
     721             :     Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS];
     722             :     Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
     723             : 
     724             : 
     725             :     Word32 limit_fx;
     726             :     Word16 limit_e;
     727             : 
     728             :     Word32 L_tmp;
     729             :     Word16 tmp_e, tmp, exp;
     730             : 
     731             :     Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     732             :     Word32 Kx_fx[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
     733             : 
     734             :     Word16 Kx_fx_e[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
     735             :     Word16 Ky_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     736             : 
     737             :     Word32 Kx_reg_inv_fx[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
     738             :     Word16 Kx_reg_inv_e[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
     739             : 
     740             :     Word32 Q_fx[PARAM_MC_MAX_TRANSPORT_CHANS * MAX_CICP_CHANNELS];
     741             :     Word16 Q_e;
     742             : 
     743             :     Word32 Q_Cx_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     744             :     Word16 Q_Cx_e;
     745             : 
     746             :     Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
     747             :     Word16 Cy_hat_diag_e;
     748             :     Word16 Cy_hat_diag_buff_e[MAX_OUTPUT_CHANNELS];
     749             :     Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
     750             :     Word16 G_hat_buff_e[MAX_OUTPUT_CHANNELS];
     751             : 
     752             :     Word16 mat_mult_buffer2_e, mat_mult_buffer3_e;
     753             : 
     754             :     Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     755             : 
     756             :     Word16 mixing_matrix_e;
     757             : 
     758             :     Word16 Cr_fx_e;
     759             : 
     760             : 
     761             :     Word32 adj_fx[MAX_OUTPUT_CHANNELS];
     762             :     Word16 adj_e[MAX_OUTPUT_CHANNELS];
     763             :     Word32 *adj_fx_p;
     764             :     Word16 adj_fx_e;
     765             : 
     766             :     Word32 *Cr_p_fx, *Cy_tilde_p_fx, *Cy_p_fx;
     767      132787 :     push_wmops( "dirac_cov_mix_mat" );
     768             : 
     769      132787 :     out = EXIT_SUCCESS;
     770      132787 :     move16();
     771             : 
     772      132787 :     set32_fx( svd_s_buffer_fx, 0, MAX_OUTPUT_CHANNELS );
     773     2257379 :     FOR( i = 0; i < MAX_OUTPUT_CHANNELS; i++ )
     774             :     {
     775     2124592 :         set32_fx( svd_in_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
     776     2124592 :         set32_fx( svd_u_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
     777     2124592 :         set32_fx( svd_v_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
     778             :     }
     779             : 
     780             : 
     781      132787 :     Copy32( Q, Q_fx, imult1616( lengthCy, lengthCx ) );
     782      132787 :     Copy32( Cx, Cx_fx, imult1616( lengthCx, lengthCx ) );
     783      132787 :     Copy32( Cy, Cy_fx, imult1616( lengthCy, lengthCy ) );
     784             : 
     785      132787 :     Q_e = Q_fx_e;
     786      132787 :     move16();
     787      132787 :     Cx_fx_e = Cx_e;
     788      132787 :     move16();
     789      132787 :     Cy_fx_e = Cy_e;
     790      132787 :     move16();
     791             : 
     792             : 
     793             :     /*-----------------------------------------------------------------*
     794             :      * Decomposition of Cy
     795             :      *-----------------------------------------------------------------*/
     796             : 
     797             :     /* Processing the SVD */
     798             : 
     799      132787 :     mat2svdMat_fx( Cy_fx, svd_in_buffer_fx, lengthCy, lengthCy, 0 );
     800             : 
     801      132787 :     svd_fx( svd_in_buffer_fx, Cy_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCy, lengthCy );
     802      132787 :     Word16 max_e = -32;
     803             :     /* Computing Ky */
     804      812472 :     FOR( i = 0; i < lengthCy; ++i )
     805             :     {
     806     4209670 :         FOR( j = 0; j < lengthCy; ++j )
     807             :         {
     808     3529985 :             tmp_e = svd_s_buffer_e[j];
     809     3529985 :             move16();
     810     3529985 :             L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
     811     3529985 :             Ky_fx[i + ( j * lengthCy )] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
     812     3529985 :             move32();
     813     3529985 :             Ky_fx_e[i + ( j * lengthCy )] = tmp_e;
     814     3529985 :             move16();
     815     3529985 :             max_e = s_max( max_e, tmp_e );
     816             :         }
     817             :     }
     818     3662772 :     FOR( i = 0; i < lengthCy * lengthCy; ++i )
     819             :     {
     820     3529985 :         Ky_fx[i] = L_shr( Ky_fx[i], sub( max_e, Ky_fx_e[i] ) );
     821     3529985 :         move32();
     822     3529985 :         Ky_fx_e[i] = max_e;
     823     3529985 :         move16();
     824             :     }
     825             : 
     826             :     /*-----------------------------------------------------------------*
     827             :      * Decomposition of Cx
     828             :      *-----------------------------------------------------------------*/
     829             : 
     830             :     /* Processing the SVD */
     831             : 
     832      132787 :     mat2svdMat_fx( Cx_fx, svd_in_buffer_fx, lengthCx, lengthCx, 0 );
     833             : 
     834      132787 :     svd_fx( svd_in_buffer_fx, Cx_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCx, lengthCx );
     835      132787 :     max_e = -32;
     836      399721 :     FOR( i = 0; i < lengthCx; ++i )
     837             :     {
     838      804882 :         FOR( j = 0; j < lengthCx; ++j )
     839             :         {
     840      537948 :             tmp_e = svd_s_buffer_e[j];
     841      537948 :             move16();
     842      537948 :             L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
     843      537948 :             Kx_fx[( i + ( j * lengthCx ) )] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
     844      537948 :             move32();
     845      537948 :             Kx_fx_e[( i + ( j * lengthCx ) )] = tmp_e;
     846      537948 :             move16();
     847      537948 :             max_e = s_max( max_e, tmp_e );
     848             :         }
     849             :     }
     850      670735 :     FOR( i = 0; i < lengthCx * lengthCx; ++i )
     851             :     {
     852      537948 :         Kx_fx[i] = L_shr( Kx_fx[i], sub( max_e, Kx_fx_e[i] ) );
     853      537948 :         move32();
     854      537948 :         Kx_fx_e[i] = max_e;
     855      537948 :         move16();
     856             :     }
     857             : 
     858      399721 :     FOR( i = 0; i < lengthCx; ++i )
     859             :     {
     860      266934 :         tmp_e = svd_s_buffer_e[i];
     861      266934 :         move16();
     862      266934 :         svd_s_buffer_fx[i] = Sqrt32( svd_s_buffer_fx[i], &tmp_e ); // Q(31-tmp_e)
     863      266934 :         move32();
     864      266934 :         svd_s_buffer_e[i] = tmp_e;
     865      266934 :         move16();
     866             :     }
     867             : 
     868             :     /*-----------------------------------------------------------------*
     869             :      * Regularization of Sx
     870             :      *-----------------------------------------------------------------*/
     871             : 
     872      132787 :     limit_fx = svd_s_buffer_fx[0];
     873      132787 :     move32();
     874      132787 :     limit_e = svd_s_buffer_e[0];
     875      132787 :     move16();
     876      266934 :     FOR( i = 1; i < lengthCx; i++ )
     877             :     {
     878      134147 :         IF( GT_32( svd_s_buffer_fx[i], L_shl_sat( limit_fx, sub( limit_e, svd_s_buffer_e[i] ) ) ) )
     879             :         {
     880           0 :             limit_fx = svd_s_buffer_fx[i];
     881           0 :             move32();
     882           0 :             limit_e = svd_s_buffer_e[i];
     883           0 :             move16();
     884             :         }
     885             :     }
     886             : 
     887      132787 :     limit_e = add( limit_e, reg_Sx_e );
     888      132787 :     limit_fx = Mpy_32_32( limit_fx, reg_Sx_fx );
     889      132787 :     IF( BASOP_Util_Cmp_Mant32Exp( limit_fx, limit_e, SQRT_EPSILON_FX /* Q31 */, 0 ) < 0 )
     890             :     {
     891           0 :         limit_fx = SQRT_EPSILON_FX;
     892           0 :         move32();
     893           0 :         limit_e = 0;
     894           0 :         move16();
     895             :     }
     896             : 
     897      399721 :     FOR( i = 0; i < lengthCx; ++i )
     898             :     {
     899      266934 :         IF( LT_32( L_shl_sat( svd_s_buffer_fx[i], sub( svd_s_buffer_e[i], limit_e ) ), limit_fx ) )
     900             :         {
     901       58524 :             svd_s_buffer_fx[i] = limit_fx;
     902       58524 :             move32();
     903       58524 :             svd_s_buffer_e[i] = limit_e;
     904       58524 :             move16();
     905             :         }
     906             :     }
     907             : 
     908             : 
     909      132787 :     limit_fx = 0;
     910      132787 :     move32();
     911      132787 :     limit_e = 0;
     912      132787 :     move16();
     913             : 
     914             : 
     915             :     /*-----------------------------------------------------------------*
     916             :      * regularized Kx-1
     917             :      *-----------------------------------------------------------------*/
     918             : 
     919      399721 :     FOR( i = 0; i < lengthCx; ++i )
     920             :     {
     921             :         Word16 scale, reg_fac_fx;
     922      266934 :         reg_fac_fx = BASOP_Util_Divide3232_Scale( 1, svd_s_buffer_fx[i], &scale );
     923      266934 :         scale = add( scale, sub( Q31, svd_s_buffer_e[i] ) );
     924      804882 :         FOR( j = 0; j < lengthCx; ++j )
     925             :         {
     926      537948 :             Kx_reg_inv_fx[i + j * lengthCx] = Mpy_32_16_1( svd_u_buffer_fx[j][i], reg_fac_fx ); // Q(31-scale)
     927      537948 :             move32();
     928      537948 :             Kx_reg_inv_e[i + j * lengthCx] = scale;
     929      537948 :             move16();
     930             :         }
     931             :     }
     932             : 
     933             :     /*-----------------------------------------------------------------*
     934             :      * normalization matrix G hat
     935             :      *-----------------------------------------------------------------*/
     936             : 
     937             :     /* Computing Q*Cx*Q' */
     938             : 
     939      132787 :     matrix_product_mant_exp_fx( Q_fx, Q_e, lengthCy, lengthCx, 0, Cx_fx, Cx_fx_e, lengthCx, lengthCx, 0, Q_Cx_fx, &Q_Cx_e );
     940             : 
     941      132787 :     matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e );
     942             : 
     943             : 
     944      132787 :     Word16 com_e = sub( limit_e, Cy_hat_diag_e );
     945      812472 :     FOR( i = 0; i < lengthCy; ++i )
     946             :     {
     947      679685 :         IF( GT_32( Cy_hat_diag_fx[i], L_shl_sat( limit_fx, com_e ) ) )
     948             :         {
     949      679685 :             limit_fx = Cy_hat_diag_fx[i];
     950      679685 :             move32();
     951      679685 :             limit_e = Cy_hat_diag_e;
     952      679685 :             move16();
     953             :         }
     954             :     }
     955      132787 :     limit_fx = Madd_32_32( EPSILON_FX, limit_fx, reg_ghat_fx ); // limit_e+ reg_ghat_e
     956      132787 :     limit_e = add( limit_e, reg_ghat_e );
     957             : 
     958      132787 :     com_e = sub( Cy_hat_diag_e, limit_e );
     959      812472 :     FOR( i = 0; i < lengthCy; ++i )
     960             :     {
     961      679685 :         Cy_hat_diag_buff_e[i] = Cy_hat_diag_e;
     962      679685 :         move16();
     963             : 
     964      679685 :         IF( GT_32( limit_fx, L_shl_sat( Cy_hat_diag_fx[i], com_e ) ) )
     965             :         {
     966          15 :             Cy_hat_diag_fx[i] = limit_fx;
     967          15 :             move32();
     968          15 :             Cy_hat_diag_buff_e[i] = limit_e;
     969          15 :             move16();
     970             :         }
     971             : 
     972      679685 :         tmp = BASOP_Util_Divide3232_Scale( Cy_fx[( i + ( i * lengthCy ) )], Cy_hat_diag_fx[i], &exp );
     973      679685 :         exp = add( exp, sub( Cy_fx_e, Cy_hat_diag_buff_e[i] ) );
     974      679685 :         L_tmp = Sqrt32( L_deposit_h( tmp ), &exp );
     975      679685 :         G_hat_fx[i] = L_tmp;
     976      679685 :         move32();
     977      679685 :         G_hat_buff_e[i] = exp;
     978      679685 :         move16();
     979             :     }
     980             : 
     981             : 
     982             :     /*-----------------------------------------------------------------*
     983             :      * Formulate optimal P
     984             :      *-----------------------------------------------------------------*/
     985             : 
     986             :     /* Computing the input matrix Kx'*Q'*G_hat'*Ky */
     987             : 
     988             :     Word16 mat_mult_buffer1_fx_e;
     989             : 
     990             :     Word16 mat_mult_buffer2_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     991             : 
     992      132787 :     matrix_product_mant_exp_fx( Kx_fx, Kx_fx_e[0], lengthCx, lengthCx, 1, Q_fx, Q_e, lengthCy, lengthCx, 1, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
     993             : 
     994      132787 :     matrix_diag_product_fx_2( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e, lengthCx, lengthCy, 0, G_hat_fx, G_hat_buff_e, lengthCy, mat_mult_buffer2_fx, mat_mult_buffer2_fx_e );
     995             : 
     996      132787 :     matrix_product_mant_exp_fx( mat_mult_buffer2_fx, mat_mult_buffer2_fx_e[0], lengthCx, lengthCy, 0, Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
     997             : 
     998      132787 :     IF( LT_16( lengthCx, lengthCy ) )
     999             :     {
    1000      132787 :         mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 1 );
    1001      132787 :         nL = lengthCy;
    1002      132787 :         move16();
    1003      132787 :         nC = lengthCx;
    1004      132787 :         move16();
    1005      132787 :         svd_fx( svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, svd_s_buffer_e, nL, nC );
    1006             :     }
    1007             :     ELSE
    1008             :     {
    1009           0 :         mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 0 );
    1010           0 :         nL = lengthCx;
    1011           0 :         move16();
    1012           0 :         nC = lengthCy;
    1013           0 :         move16();
    1014           0 :         svd_fx( svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, nL, nC );
    1015             :     }
    1016             : 
    1017             :     /* Actually Processing P */
    1018             : 
    1019             :     /* can be skipped: lambda is always column-truncated identity matrix, so this operation just
    1020             :        truncates V to num_input_channel columns */
    1021             : 
    1022      132787 :     svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, lengthCy, lengthCx );
    1023      132787 :     svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
    1024             : 
    1025      132787 :     mat_mult_buffer1_fx_e = 0;
    1026      132787 :     move16();
    1027      132787 :     mat_mult_buffer2_e = 0;
    1028      132787 :     move16();
    1029             : 
    1030      132787 :     matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e, lengthCy, lengthCx, 0,
    1031             :                                 mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1,
    1032             :                                 mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    1033             : 
    1034             :     /************************ Formulate M **********************/
    1035             : 
    1036      132787 :     matrix_product_mant_exp_fx( Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, lengthCy, lengthCx, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
    1037             : 
    1038             :     Word16 mixing_matrix_fx_e[MAX_CICP_CHANNELS * PARAM_MC_MAX_TRANSPORT_CHANS];
    1039             : 
    1040             :     Word16 mat_mult_buffer1_fx_e1[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1041      132787 :     set16_fx( mat_mult_buffer1_fx_e1, mat_mult_buffer1_fx_e, MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS );
    1042             : 
    1043      132787 :     matrix_product_mant_exp( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1, lengthCy, lengthCx, 0, Kx_reg_inv_fx, Kx_reg_inv_e, lengthCx, lengthCx, 0, mixing_matrix_fx, mixing_matrix_fx_e );
    1044             : 
    1045             :     /*-----------------------------------------------------------------*
    1046             :      * Formulate Cr
    1047             :      *-----------------------------------------------------------------*/
    1048             : 
    1049             :     /* Compute Cy_tilde = M*Cx*M' */
    1050             : 
    1051             : 
    1052             :     Word16 Cx_e_arr[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
    1053      132787 :     set16_fx( Cx_e_arr, Cx_fx_e, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
    1054      132787 :     matrix_product_mant_exp( mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 0, Cx_fx, Cx_e_arr, lengthCx, lengthCx, 0, mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1 );
    1055             : 
    1056      132787 :     matrix_product_mant_exp( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1, lengthCy, lengthCx, 0, mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 1, mat_mult_buffer2_fx, mat_mult_buffer2_fx_e );
    1057             : 
    1058      132787 :     exp = mixing_matrix_fx_e[0];
    1059      132787 :     move16();
    1060     1372340 :     FOR( i = 1; i < lengthCy * lengthCx; i++ )
    1061             :     {
    1062     1239553 :         if ( LT_16( exp, mixing_matrix_fx_e[i] ) )
    1063             :         {
    1064       97373 :             exp = mixing_matrix_fx_e[i];
    1065       97373 :             move16();
    1066             :         }
    1067             :     }
    1068             : 
    1069     1505127 :     FOR( i = 0; i < lengthCy * lengthCx; i++ )
    1070             :     {
    1071     1372340 :         mixing_matrix_fx[i] = L_shr( mixing_matrix_fx[i], sub( exp, mixing_matrix_fx_e[i] ) ); // Q(31-exp)
    1072     1372340 :         move32();
    1073             :     }
    1074             : 
    1075      132787 :     mixing_matrix_e = exp;
    1076      132787 :     move16();
    1077             : 
    1078      132787 :     Cr_p_fx = Cr_fx;
    1079      132787 :     Cy_p_fx = Cy_fx;
    1080      132787 :     Cy_tilde_p_fx = mat_mult_buffer2_fx;
    1081             :     Word16 Cr_e_arr[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
    1082      812472 :     FOR( i = 0; i < lengthCy; ++i )
    1083             :     {
    1084     4209670 :         FOR( j = 0; j < lengthCy; ++j )
    1085             :         {
    1086             : 
    1087     3529985 :             *( Cr_p_fx ) = BASOP_Util_Add_Mant32Exp( *( Cy_p_fx ), Cy_fx_e, L_negate( *( Cy_tilde_p_fx ) ), mat_mult_buffer2_fx_e[i * lengthCy + j], &Cr_e_arr[i * lengthCy + j] );
    1088     3529985 :             move32();
    1089     3529985 :             Cr_p_fx++;
    1090     3529985 :             Cy_p_fx++;
    1091     3529985 :             Cy_tilde_p_fx++;
    1092             :         }
    1093             : 
    1094             :         /* Avoid Meaningless negative main diagonal elements */
    1095      679685 :         IF( Cr_fx[i + ( i * lengthCy )] < 0 )
    1096             :         {
    1097       76181 :             Cr_fx[i + ( i * lengthCy )] = 0;
    1098       76181 :             move32();
    1099       76181 :             Cr_e_arr[i + ( i * lengthCy )] = 0;
    1100       76181 :             move16();
    1101             :         }
    1102             :     }
    1103             : 
    1104      132787 :     exp = Cr_e_arr[0];
    1105      132787 :     move16();
    1106     3529985 :     FOR( i = 1; i < lengthCy * lengthCy; i++ )
    1107             :     {
    1108     3397198 :         if ( LT_16( exp, Cr_e_arr[i] ) )
    1109             :         {
    1110      126463 :             exp = Cr_e_arr[i];
    1111      126463 :             move16();
    1112             :         }
    1113             :     }
    1114             : 
    1115     3662772 :     FOR( i = 0; i < lengthCy * lengthCy; i++ )
    1116             :     {
    1117     3529985 :         Cr_fx[i] = L_shr( Cr_fx[i], sub( exp, Cr_e_arr[i] ) ); // Q(31-exp)
    1118     3529985 :         move32();
    1119             :     }
    1120             : 
    1121      132787 :     Cr_fx_e = exp;
    1122      132787 :     move16();
    1123             : 
    1124      132787 :     exp = mat_mult_buffer2_fx_e[0];
    1125      132787 :     move16();
    1126     3529985 :     FOR( i = 1; i < lengthCy * lengthCy; i++ )
    1127             :     {
    1128     3397198 :         if ( LT_16( exp, mat_mult_buffer2_fx_e[i] ) )
    1129             :         {
    1130      184298 :             exp = mat_mult_buffer2_fx_e[i];
    1131      184298 :             move16();
    1132             :         }
    1133             :     }
    1134             : 
    1135     3662772 :     FOR( i = 0; i < lengthCy * lengthCy; i++ )
    1136             :     {
    1137     3529985 :         mat_mult_buffer2_fx[i] = L_shr( mat_mult_buffer2_fx[i], sub( exp, mat_mult_buffer2_fx_e[i] ) ); // Q(31-exp)
    1138     3529985 :         move32();
    1139             :     }
    1140             : 
    1141      132787 :     mat_mult_buffer2_e = exp;
    1142      132787 :     move16();
    1143             : 
    1144             :     /*-----------------------------------------------------------------*
    1145             :      * Energy Compensation
    1146             :      *-----------------------------------------------------------------*/
    1147             : 
    1148      132787 :     IF( EQ_16( energy_compensation_flag, 1 ) )
    1149             :     {
    1150       25938 :         adj_fx_p = svd_s_buffer_fx;
    1151       25938 :         Cy_tilde_p_fx = mat_mult_buffer2_fx;
    1152             : 
    1153      158348 :         FOR( i = 0; i < lengthCy; ++i )
    1154             :         {
    1155             :             /* Avoid correction for very small energies,
    1156             :                main diagonal elements of Cy_tilde_p may be negative */
    1157      132410 :             IF( Cy_tilde_p_fx[i + ( i * lengthCy )] < 0 )
    1158             :             {
    1159           0 :                 adj_fx_p[i] = 1073741824; // 1.0f in Q30
    1160           0 :                 move32();
    1161           0 :                 adj_e[i] = 1;
    1162           0 :                 move16();
    1163             :             }
    1164             :             ELSE
    1165             :             {
    1166      132410 :                 tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + ( i * lengthCy )], L_add( Cy_tilde_p_fx[i + ( i * lengthCy )], EPSILON_FX ), &exp );
    1167      132410 :                 exp = add( exp, sub( Cy_fx_e, mat_mult_buffer2_e ) );
    1168      132410 :                 L_tmp = L_deposit_h( tmp );
    1169      132410 :                 L_tmp = Sqrt32( L_tmp, &exp );
    1170      132410 :                 adj_fx_p[i] = L_tmp;
    1171      132410 :                 move32();
    1172      132410 :                 adj_e[i] = exp;
    1173      132410 :                 move16();
    1174             :             }
    1175             : 
    1176      132410 :             Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, adj_e[i] ) );
    1177      132410 :             IF( GT_32( adj_fx_p[i], temp ) )
    1178             :             {
    1179           6 :                 adj_fx_p[i] = 1073741824; // 1.0f in Q30
    1180           6 :                 move32();
    1181           6 :                 adj_e[i] = 3;
    1182           6 :                 move16();
    1183             :             }
    1184             :         }
    1185             : 
    1186       25938 :         exp = adj_e[0];
    1187       25938 :         move16();
    1188      132410 :         FOR( i = 1; i < lengthCy; i++ )
    1189             :         {
    1190      106472 :             if ( LT_16( exp, adj_e[i] ) )
    1191             :             {
    1192        1422 :                 exp = adj_e[i];
    1193        1422 :                 move16();
    1194             :             }
    1195             :         }
    1196             : 
    1197      158348 :         FOR( i = 0; i < lengthCy; i++ )
    1198             :         {
    1199      132410 :             adj_fx[i] = L_shr( adj_fx_p[i], sub( exp, adj_e[i] ) ); // Q(31-exp)
    1200      132410 :             move32();
    1201             :         }
    1202       25938 :         adj_fx_e = exp;
    1203       25938 :         move16();
    1204             : 
    1205       25938 :         diag_matrix_product_fx( adj_fx, adj_fx_e, lengthCy, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    1206             : 
    1207       25938 :         Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCx, lengthCy ) ); // Q(31-mat_mult_buffer3_e)
    1208       25938 :         mixing_matrix_e = mat_mult_buffer3_e;
    1209       25938 :         move16();
    1210             :     }
    1211             : 
    1212      132787 :     *mixing_matrix_out_e = mixing_matrix_e;
    1213      132787 :     move16();
    1214      132787 :     *Cr_e = Cr_fx_e;
    1215      132787 :     move16();
    1216      132787 :     pop_wmops();
    1217             : 
    1218      132787 :     return out;
    1219             : }
    1220             : 
    1221             : /*-------------------------------------------------------------------*
    1222             :  * computeMixingMatricesResidual()
    1223             :  *
    1224             :  * compute a residual mixing matrix using the covariance synthesis approach
    1225             :  *-------------------------------------------------------------------*/
    1226             : 
    1227      106849 : Word16 computeMixingMatricesResidual_fx(
    1228             :     const Word32 num_outputs, /* i  : number of output channels                                           */
    1229             :     const Word32 *Cx_fx,      /* i  : vector containing the diagonal diffuse prototype covariance Q(31-Cx_e)         */
    1230             :     const Word16 Cx_e,
    1231             :     const Word32 *Cy_fx, /* i  : matrix containing the missing cov (Cr from computeMixingMatrices()) Q(31-Cy_fx_e) */
    1232             :     const Word16 Cy_fx_e,
    1233             :     const Word32 reg_Sx_fx, /* i  : regularization factor for the input channel singular values Q(31-reg_Sx_e)         */
    1234             :     const Word16 reg_Sx_e,
    1235             :     const Word32 reg_ghat_fx, /* i  : regularization factor for the normalization matrix Q(31-reg_ghat_e)                  */
    1236             :     const Word16 reg_ghat_e,
    1237             :     Word32 *mixing_matrix_fx, /* o  : resulting residual mixing matrix Q(31-mixing_matrix_ret_e)                                    */
    1238             :     Word16 *mixing_matrix_ret_e )
    1239             : {
    1240             :     Word16 i, j;
    1241             :     Word16 out, lengthCx, lengthCy;
    1242      106849 :     out = EXIT_SUCCESS;
    1243      106849 :     move16();
    1244      106849 :     lengthCx = extract_l( num_outputs );
    1245      106849 :     lengthCy = extract_l( num_outputs );
    1246             : 
    1247             :     Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
    1248             :     Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1249             :     Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
    1250             :     Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
    1251             : 
    1252             :     Word16 mat_mult_buffer1_buff_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1253             : 
    1254      106849 :     push_wmops( "dirac_cov_mix_mat_r" );
    1255             : 
    1256             :     Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1257             :     Word32 adj_fx[MAX_OUTPUT_CHANNELS];
    1258             :     Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1259      106849 :     Word16 mixing_matrix_e = 0, mat_mult_buffer1_e, adj_e, mat_mult_buffer3_e, mat_mult_buffer2_e;
    1260      106849 :     move16();
    1261             : 
    1262      106849 :     Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS] = { 0 };
    1263             :     Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
    1264             :     Word32 L_tmp;
    1265             :     Word16 tmp_e;
    1266             :     Word16 tmp, scale;
    1267             :     Word32 div_tmp;
    1268             :     Word16 exp;
    1269             : 
    1270             :     Word32 Kx_fx[MAX_OUTPUT_CHANNELS];
    1271             :     Word16 Ky_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1272             :     Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1273             :     Word16 Kx_fx_e[MAX_OUTPUT_CHANNELS];
    1274             : 
    1275             :     Word32 limit_fx;
    1276             :     Word16 limit_e;
    1277             : 
    1278             :     Word32 Kx_reg_inv_fx[MAX_OUTPUT_CHANNELS];
    1279             :     Word16 Kx_reg_inv_e[MAX_OUTPUT_CHANNELS];
    1280             : 
    1281             :     Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
    1282             :     Word16 Cy_hat_diag_e;
    1283             : 
    1284             :     Word16 Cy_hat_diag_fx_e[MAX_OUTPUT_CHANNELS];
    1285             :     Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
    1286             :     Word16 G_hat_e[MAX_OUTPUT_CHANNELS];
    1287             : 
    1288             :     Word32 *adj_fx_p;
    1289             :     Word16 adj_buff_e[MAX_OUTPUT_CHANNELS];
    1290             : 
    1291             :     Word32 Cy_tilde_fx[MAX_OUTPUT_CHANNELS];
    1292             :     Word16 Cy_tilde_e;
    1293             : 
    1294             :     /*-----------------------------------------------------------------*
    1295             :      * Decomposition of Cy
    1296             :      *-----------------------------------------------------------------*/
    1297             : 
    1298             :     /* Processing the SVD */
    1299             : 
    1300             :     /* linear array to svd buffer */
    1301      106849 :     mat2svdMat_fx( Cy_fx, svd_in_buffer_fx, lengthCy, lengthCy, 0 );
    1302             : 
    1303      106849 :     svd_fx( svd_in_buffer_fx, Cy_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCy, lengthCy );
    1304             : 
    1305             :     /* Computing Ky */
    1306      106849 :     Word16 max_e = -32;
    1307      654124 :     FOR( i = 0; i < lengthCy; ++i )
    1308             :     {
    1309     3391850 :         FOR( j = 0; j < lengthCy; ++j )
    1310             :         {
    1311     2844575 :             tmp_e = svd_s_buffer_e[j];
    1312     2844575 :             move16();
    1313     2844575 :             L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
    1314     2844575 :             Ky_fx[i + j * lengthCy] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
    1315     2844575 :             move32();
    1316     2844575 :             Ky_fx_e[i + j * lengthCy] = tmp_e;
    1317     2844575 :             move16();
    1318     2844575 :             max_e = s_max( max_e, tmp_e );
    1319             :         }
    1320             :     }
    1321             : 
    1322     2951424 :     FOR( i = 0; i < lengthCy * lengthCy; ++i )
    1323             :     {
    1324     2844575 :         Ky_fx[i] = L_shr( Ky_fx[i], sub( max_e, Ky_fx_e[i] ) );
    1325     2844575 :         move32();
    1326     2844575 :         Ky_fx_e[i] = max_e;
    1327     2844575 :         move16();
    1328             :     }
    1329             : 
    1330             :     /*-----------------------------------------------------------------*
    1331             :      * Decomposition of Cx
    1332             :      *-----------------------------------------------------------------*/
    1333             : 
    1334             :     /* Processing the SVD of Cx*/
    1335             :     /* Cx is a diagonal matrix, so SVD would lead to the sorted diagonal as S and u
    1336             :      *  would be just indicating the sorting index, so go straight to Kx as the
    1337             :      * square root of the diagonal of Cx */
    1338             : 
    1339             :     /* Computing Kx */
    1340      106849 :     max_e = -32;
    1341      654124 :     FOR( i = 0; i < lengthCx; ++i )
    1342             :     {
    1343      547275 :         exp = Cx_e;
    1344      547275 :         move16();
    1345      547275 :         Kx_fx[i] = Sqrt32( Cx_fx[i], &exp );
    1346      547275 :         move32();
    1347      547275 :         Kx_fx_e[i] = exp;
    1348      547275 :         move16();
    1349      547275 :         max_e = s_max( max_e, exp );
    1350             :     }
    1351             : 
    1352      654124 :     FOR( i = 0; i < lengthCx; ++i )
    1353             :     {
    1354      547275 :         Kx_fx[i] = L_shr( Kx_fx[i], sub( max_e, Kx_fx_e[i] ) );
    1355      547275 :         move32();
    1356      547275 :         Kx_fx_e[i] = max_e;
    1357      547275 :         move16();
    1358             :     }
    1359             : 
    1360             :     /*-----------------------------------------------------------------*
    1361             :      * Regularization of Sx
    1362             :      *-----------------------------------------------------------------*/
    1363             : 
    1364      106849 :     limit_fx = Kx_fx[0];
    1365      106849 :     move32();
    1366             : 
    1367      547275 :     FOR( i = 1; i < lengthCx; i++ )
    1368             :     {
    1369      440426 :         IF( GT_32( Kx_fx[i], limit_fx ) )
    1370             :         {
    1371      165660 :             limit_fx = Kx_fx[i];
    1372      165660 :             move32();
    1373             :         }
    1374             :     }
    1375             : 
    1376      106849 :     L_tmp = Mpy_32_32( limit_fx, reg_Sx_fx ); // limit_e + reg_Sx_e
    1377      106849 :     L_tmp = L_add( L_tmp, EPSILLON_FX );
    1378      106849 :     limit_fx = L_tmp;
    1379      106849 :     move16();
    1380      106849 :     limit_e = add( Kx_fx_e[0], reg_Sx_e );
    1381             : 
    1382      654124 :     FOR( i = 0; i < lengthCx; ++i )
    1383             :     {
    1384      547275 :         IF( GT_32( Kx_fx[i], L_shl_sat( limit_fx, sub( limit_e, Kx_fx_e[i] ) ) ) )
    1385             :         {
    1386      545970 :             div_tmp = Kx_fx[i];
    1387      545970 :             move32();
    1388      545970 :             exp = Kx_fx_e[i];
    1389      545970 :             move16();
    1390             :         }
    1391             :         ELSE
    1392             :         {
    1393        1305 :             div_tmp = limit_fx;
    1394        1305 :             move32();
    1395        1305 :             exp = limit_e;
    1396        1305 :             move16();
    1397             :         }
    1398      547275 :         tmp = BASOP_Util_Divide3232_Scale( 1073741824, div_tmp, &scale ); // 1073741824 -> 1.0f in Q30
    1399      547275 :         scale = add( scale, sub( Q1, exp ) );
    1400             : 
    1401      547275 :         Kx_reg_inv_fx[i] = L_deposit_h( tmp );
    1402      547275 :         move32();
    1403      547275 :         Kx_reg_inv_e[i] = scale;
    1404      547275 :         move16();
    1405             :     }
    1406             : 
    1407      106849 :     limit_fx = 0;
    1408      106849 :     move32();
    1409      106849 :     limit_e = 0;
    1410      106849 :     move16();
    1411             : 
    1412             :     /*-----------------------------------------------------------------*
    1413             :      * regularized Kx-1
    1414             :      *-----------------------------------------------------------------*/
    1415             : 
    1416             :     /*-----------------------------------------------------------------*
    1417             :      * normalization matrix G hat
    1418             :      *-----------------------------------------------------------------*/
    1419             : 
    1420             :     /* Computing Cy_hat_diag */
    1421      106849 :     Copy32( Cx_fx, Cy_hat_diag_fx, extract_l( num_outputs ) ); // Q(31-Cx_e)
    1422      106849 :     Cy_hat_diag_e = Cx_e;
    1423      106849 :     move16();
    1424             : 
    1425      106849 :     Word16 com_e = sub( limit_e, Cy_hat_diag_e );
    1426      654124 :     FOR( i = 0; i < lengthCy; ++i )
    1427             :     {
    1428      547275 :         IF( GT_32( Cy_hat_diag_fx[i], L_shl_sat( limit_fx, com_e ) ) )
    1429             :         {
    1430      547275 :             limit_fx = Cy_hat_diag_fx[i];
    1431      547275 :             move32();
    1432      547275 :             limit_e = Cy_hat_diag_e;
    1433      547275 :             move16();
    1434             :         }
    1435             :     }
    1436             : 
    1437      106849 :     limit_fx = Madd_32_32( EPSILON_FX, limit_fx, reg_ghat_fx ); // Q(limit_e+reg_ghat_e)
    1438      106849 :     limit_e = add( limit_e, reg_ghat_e );
    1439             : 
    1440             :     /* Computing G_hat */
    1441             : 
    1442      106849 :     com_e = sub( Cy_hat_diag_e, limit_e );
    1443      654124 :     FOR( i = 0; i < lengthCy; ++i )
    1444             :     {
    1445      547275 :         Cy_hat_diag_fx_e[i] = Cy_hat_diag_e;
    1446      547275 :         move16();
    1447      547275 :         IF( GT_32( limit_fx, L_shl_sat( Cy_hat_diag_fx[i], com_e ) ) ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
    1448             :         {
    1449          15 :             Cy_hat_diag_fx[i] = limit_fx;
    1450          15 :             move32();
    1451          15 :             Cy_hat_diag_fx_e[i] = limit_e;
    1452          15 :             move16();
    1453             :         }
    1454      547275 :         tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + i * lengthCy], Cy_hat_diag_fx[i], &scale );
    1455      547275 :         scale = add( scale, sub( Cy_fx_e, Cy_hat_diag_fx_e[i] ) );
    1456      547275 :         L_tmp = Sqrt32( L_deposit_h( tmp ), &scale );
    1457             : 
    1458      547275 :         G_hat_fx[i] = L_tmp;
    1459      547275 :         move32();
    1460      547275 :         G_hat_e[i] = scale;
    1461      547275 :         move16();
    1462             :     }
    1463             : 
    1464             :     /*-----------------------------------------------------------------*
    1465             :      * Formulate optimal P
    1466             :      *-----------------------------------------------------------------*/
    1467             : 
    1468      654124 :     FOR( i = 0; i < num_outputs; i++ )
    1469             :     {
    1470      547275 :         Kx_fx[i] = Mpy_32_32( Kx_fx[i], G_hat_fx[i] ); // Q(31-(Kx_fx_e+G_hag_e))
    1471      547275 :         move32();
    1472      547275 :         Kx_fx_e[i] = add( Kx_fx_e[i], G_hat_e[i] );
    1473      547275 :         move16();
    1474             :     }
    1475             : 
    1476      654124 :     FOR( i = 0; i < num_outputs; i++ )
    1477             :     {
    1478             :         Word32 fac_fx;
    1479      547275 :         fac_fx = Kx_fx[i];
    1480      547275 :         move32();
    1481             : 
    1482     3391850 :         FOR( j = 0; j < num_outputs; j++ )
    1483             :         {
    1484     2844575 :             mat_mult_buffer1_fx[i + j * num_outputs] = Mpy_32_32( Ky_fx[i + j * num_outputs], fac_fx ); // Q(31-(Ky_fx_e+Kx_fx_e));
    1485     2844575 :             move32();
    1486     2844575 :             mat_mult_buffer1_buff_e[i + j * num_outputs] = add( Ky_fx_e[i + j * num_outputs], Kx_fx_e[i] );
    1487     2844575 :             move16();
    1488             :         }
    1489             :     }
    1490             : 
    1491      106849 :     mat_mult_buffer1_e = mat_mult_buffer1_buff_e[0];
    1492      106849 :     move16();
    1493     2844575 :     FOR( i = 1; i < num_outputs * num_outputs; i++ )
    1494             :     {
    1495     2737726 :         if ( LT_16( mat_mult_buffer1_e, mat_mult_buffer1_buff_e[i] ) )
    1496             :         {
    1497       29410 :             mat_mult_buffer1_e = mat_mult_buffer1_buff_e[i];
    1498       29410 :             move16();
    1499             :         }
    1500             :     }
    1501             : 
    1502     2951424 :     FOR( i = 0; i < num_outputs * num_outputs; i++ )
    1503             :     {
    1504     2844575 :         mat_mult_buffer1_fx[i] = L_shr( mat_mult_buffer1_fx[i], sub( mat_mult_buffer1_e, mat_mult_buffer1_buff_e[i] ) ); // Q(31-mat_mult_buffer1_e)
    1505     2844575 :         move32();
    1506             :     }
    1507             : 
    1508      106849 :     mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 0 );
    1509             : 
    1510      106849 :     svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCx, lengthCy );
    1511             : 
    1512             :     /* Actually Processing P */
    1513             : 
    1514      106849 :     svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, lengthCy, lengthCx );
    1515      106849 :     svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
    1516      106849 :     mat_mult_buffer1_e = 0;
    1517      106849 :     move16();
    1518      106849 :     mat_mult_buffer2_e = 0;
    1519      106849 :     move16();
    1520             : 
    1521      106849 :     matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0,
    1522             :                                 mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1,
    1523             :                                 mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    1524             : 
    1525             :     /*-----------------------------------------------------------------*
    1526             :      * Formulate M
    1527             :      *-----------------------------------------------------------------*/
    1528             : 
    1529             : 
    1530      106849 :     matrix_product_mant_exp_fx( Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, lengthCy, lengthCx, 0, mat_mult_buffer1_fx, mat_mult_buffer1_buff_e );
    1531      106849 :     set16_fx( mat_mult_buffer1_buff_e, mat_mult_buffer1_buff_e[0], MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS );
    1532             : 
    1533             :     Word16 mixing_matrix_fx_e[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
    1534             : 
    1535      654124 :     FOR( i = 0; i < num_outputs; i++ )
    1536             :     {
    1537             :         Word32 fac_fx;
    1538      547275 :         fac_fx = Kx_reg_inv_fx[i];
    1539      547275 :         move32();
    1540             : 
    1541     3391850 :         FOR( j = 0; j < num_outputs; j++ )
    1542             :         {
    1543     2844575 :             mixing_matrix_fx[j + i * num_outputs] = Mpy_32_32( mat_mult_buffer1_fx[j + i * num_outputs], fac_fx ); // Q(31-mat_mult_buffer1_e+Kx_reg_inv_e);
    1544     2844575 :             move32();
    1545     2844575 :             mixing_matrix_fx_e[j + i * num_outputs] = add( mat_mult_buffer1_buff_e[j + i * num_outputs], Kx_reg_inv_e[i] );
    1546     2844575 :             move16();
    1547             :         }
    1548             :     }
    1549             : 
    1550             :     /*-----------------------------------------------------------------*
    1551             :      * Formulate Cr
    1552             :      *-----------------------------------------------------------------*/
    1553             : 
    1554             :     /* Compute Cy_tilde = M*Cx*M' */
    1555             : 
    1556             :     Word16 Cx_e_arr[MAX_CICP_CHANNELS];
    1557      106849 :     set16_fx( Cx_e_arr, Cx_e, MAX_CICP_CHANNELS );
    1558             : 
    1559      106849 :     matrix_diag_product_fx_1( mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 0, Cx_fx, Cx_e_arr, lengthCx, mat_mult_buffer1_fx, mat_mult_buffer1_buff_e );
    1560             : 
    1561      106849 :     exp = mixing_matrix_fx_e[0];
    1562      106849 :     move16();
    1563     2844575 :     FOR( i = 1; i < num_outputs * num_outputs; i++ )
    1564             :     {
    1565     2737726 :         if ( LT_16( exp, mixing_matrix_fx_e[i] ) )
    1566             :         {
    1567       11815 :             exp = mixing_matrix_fx_e[i];
    1568       11815 :             move16();
    1569             :         }
    1570             :     }
    1571             : 
    1572     2951424 :     FOR( i = 0; i < num_outputs * num_outputs; i++ )
    1573             :     {
    1574     2844575 :         mixing_matrix_fx[i] = L_shr( mixing_matrix_fx[i], sub( exp, mixing_matrix_fx_e[i] ) ); // Q(31-exp)
    1575     2844575 :         move32();
    1576             :     }
    1577      106849 :     mixing_matrix_e = exp;
    1578      106849 :     move16();
    1579             : 
    1580      106849 :     exp = mat_mult_buffer1_buff_e[0];
    1581      106849 :     move16();
    1582     2844575 :     FOR( i = 1; i < num_outputs * num_outputs; i++ )
    1583             :     {
    1584     2737726 :         if ( LT_16( exp, mat_mult_buffer1_buff_e[i] ) )
    1585             :         {
    1586       11815 :             exp = mat_mult_buffer1_buff_e[i];
    1587       11815 :             move16();
    1588             :         }
    1589             :     }
    1590             : 
    1591     2951424 :     FOR( i = 0; i < num_outputs * num_outputs; i++ )
    1592             :     {
    1593     2844575 :         mat_mult_buffer1_fx[i] = L_shr( mat_mult_buffer1_fx[i], sub( exp, mat_mult_buffer1_buff_e[i] ) ); // Q(31-exp)
    1594     2844575 :         move32();
    1595             :     }
    1596      106849 :     mat_mult_buffer1_e = exp;
    1597      106849 :     move16();
    1598             : 
    1599      106849 :     matrix_product_diag_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 1, Cy_tilde_fx, &Cy_tilde_e );
    1600             : 
    1601             :     /*-----------------------------------------------------------------*
    1602             :      * Energy Compensation
    1603             :      *-----------------------------------------------------------------*/
    1604             : 
    1605      106849 :     adj_fx_p = svd_s_buffer_fx;
    1606             : 
    1607      654124 :     FOR( i = 0; i < lengthCy; ++i )
    1608             :     {
    1609      547275 :         tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + ( lengthCy * i )], L_add( Cy_tilde_fx[i], EPSILON_FX ), &scale );
    1610      547275 :         scale = add( scale, sub( Cy_fx_e, Cy_tilde_e ) );
    1611      547275 :         adj_fx_p[i] = Sqrt32( L_deposit_h( tmp ), &scale );
    1612      547275 :         move32();
    1613      547275 :         adj_buff_e[i] = scale;
    1614      547275 :         move16();
    1615      547275 :         Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, scale ) );
    1616      547275 :         IF( GT_32( adj_fx_p[i], temp ) ) // 1073741824 -> 1.0f in Q30
    1617             :         {
    1618           2 :             adj_fx_p[i] = 1073741824; // 1.0f in Q30
    1619           2 :             move32();
    1620           2 :             adj_buff_e[i] = 3;
    1621           2 :             move16();
    1622             :         }
    1623             :     }
    1624             : 
    1625      106849 :     adj_e = adj_buff_e[0];
    1626      106849 :     move16();
    1627             : 
    1628      547275 :     FOR( i = 1; i < lengthCy; i++ )
    1629             :     {
    1630      440426 :         if ( LT_16( adj_e, adj_buff_e[i] ) )
    1631             :         {
    1632       42965 :             adj_e = adj_buff_e[i];
    1633       42965 :             move16();
    1634             :         }
    1635             :     }
    1636             : 
    1637             : 
    1638      654124 :     FOR( i = 0; i < lengthCy; i++ )
    1639             :     {
    1640      547275 :         adj_fx[i] = L_shr( adj_fx_p[i], sub( adj_e, adj_buff_e[i] ) ); // Q(31-adj_e)
    1641      547275 :         move32();
    1642             :     }
    1643             : 
    1644      106849 :     diag_matrix_product_fx( adj_fx, adj_e, lengthCy, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    1645      106849 :     Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCy, lengthCx ) );
    1646      106849 :     *mixing_matrix_ret_e = mat_mult_buffer3_e;
    1647      106849 :     move16();
    1648             : 
    1649      106849 :     pop_wmops();
    1650             : 
    1651      106849 :     return out;
    1652             : }
    1653             : 
    1654             : 
    1655             : /*-------------------------------------------------------------------*
    1656             :  * computeMixingMatricesISM()
    1657             :  *
    1658             :  *
    1659             :  *-------------------------------------------------------------------*/
    1660      315180 : Word16 computeMixingMatricesISM_fx(
    1661             :     const Word16 num_inputs,
    1662             :     const Word16 num_responses,
    1663             :     const Word16 num_outputs,
    1664             :     const Word32 *responses_fx, /*Q(31-responses_e) */
    1665             :     const Word16 responses_e,
    1666             :     const Word32 *ener_fx, /*Q(31-ener_e) */
    1667             :     const Word16 ener_e,
    1668             :     const Word32 *Cx_diag_fx, /*Q(31-diag_e) */
    1669             :     const Word16 Cx_diag_e,
    1670             :     const Word32 *Cy_diag_fx, /*Q(31-diag_e) */
    1671             :     const Word16 Cy_diag_e,
    1672             :     const Word16 *Q_16fx, // Q15
    1673             :     const Word16 energy_compensation_flag,
    1674             :     const Word32 reg_Sx_fx,   /*Q0*/
    1675             :     const Word32 reg_ghat_fx, /*Q0*/
    1676             :     Word32 *mixing_matrix_fx, /*Q(31-mixing_matrix_e) */
    1677             :     Word16 *mixing_matrix_e )
    1678             : {
    1679             :     Word16 i, out;
    1680             :     Word16 lengthCx, lengthCy;
    1681             :     Word16 nL, nC;
    1682             : 
    1683             :     Word32 *Cy_tilde_p_fx;
    1684             :     Word32 *adj_fx;
    1685             :     Word32 limit_fx;
    1686             : 
    1687             :     Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1688             :     Word32 Q_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1689             :     Word32 Q_Cx_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1690             :     Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1691             :     Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
    1692             :     Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1693             :     Word32 Kx_reg_inv_fx[MAX_TRANSPORT_CHANNELS];
    1694             :     Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1695             :     Word32 Kx_fx[MAX_TRANSPORT_CHANNELS];
    1696             :     Word16 Ky_e, Q_e, Q_Cx_e, mat_mult_buffer1_e, G_hat_e, mat_mult_buffer2_e, Kx_reg_inv_e, adj_e, mat_mult_buffer3_e, Kx_e;
    1697             : 
    1698             :     Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
    1699             :     Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
    1700             :     Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS];
    1701             :     Word16 svd_s_buffer_fx_e;
    1702             :     Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
    1703             :     Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
    1704             : 
    1705             :     Word16 temp_e[MAX_OUTPUT_CHANNELS];
    1706      315180 :     push_wmops( "dirac_cov_mix_mat" );
    1707             : 
    1708      315180 :     out = EXIT_SUCCESS;
    1709      315180 :     move16();
    1710      315180 :     lengthCx = num_inputs;
    1711      315180 :     move16();
    1712      315180 :     lengthCy = num_outputs;
    1713      315180 :     move16();
    1714             : 
    1715     6073020 :     FOR( i = 0; i < lengthCy * lengthCx; i++ )
    1716             :     {
    1717     5757840 :         IF( EQ_16( Q_16fx[i], MAX_16 ) )
    1718             :         {
    1719     2575200 :             Q_fx[i] = MAX_32;
    1720     2575200 :             move32();
    1721             :         }
    1722             :         ELSE
    1723             :         {
    1724     3182640 :             Q_fx[i] = L_deposit_h( Q_16fx[i] );
    1725     3182640 :             move32();
    1726             :         }
    1727             :     }
    1728      315180 :     Q_e = 0;
    1729      315180 :     move16();
    1730      315180 :     set32_fx( svd_s_buffer_fx, 0, MAX_OUTPUT_CHANNELS );
    1731     5358060 :     FOR( i = 0; i < MAX_OUTPUT_CHANNELS; i++ )
    1732             :     {
    1733     5042880 :         set32_fx( svd_in_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
    1734     5042880 :         set32_fx( svd_u_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
    1735     5042880 :         set32_fx( svd_v_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
    1736             :     }
    1737             : 
    1738             :     /* Decomposition of Cy = Ky*Ky' */
    1739             :     /* Ky = responses*diag(ener) */
    1740      315180 :     matrix_diag_product_fx( responses_fx, responses_e, lengthCy, num_responses, 0, ener_fx, ener_e, num_responses, Ky_fx, &Ky_e );
    1741             : 
    1742             :     /* Decomposition of Cx -> Computing Kx */
    1743      315180 :     set16_fx( temp_e, Cx_diag_e, lengthCx );
    1744      315180 :     v_sqrt_fx( Cx_diag_fx, temp_e, Kx_fx, lengthCx );
    1745      315180 :     Kx_e = temp_e[0];
    1746      315180 :     move16();
    1747      630360 :     FOR( i = 1; i < lengthCx; i++ )
    1748             :     {
    1749      315180 :         Kx_e = s_max( Kx_e, temp_e[i] );
    1750             :     }
    1751      945540 :     FOR( i = 0; i < lengthCx; i++ )
    1752             :     {
    1753      630360 :         Kx_fx[i] = L_shr_r( Kx_fx[i], sub( Kx_e, temp_e[i] ) ); // Q(31-Kx_e)
    1754      630360 :         move32();
    1755             :     }
    1756             : 
    1757             :     /* Regularization of Sx */
    1758      315180 :     maximum_32_fx( Kx_fx, lengthCx, &limit_fx );
    1759      315180 :     limit_fx = Mpy_32_32( limit_fx, reg_Sx_fx ); // Cy_hat_diag_e + reg_ghat_e
    1760             : 
    1761      945540 :     FOR( i = 0; i < lengthCx; ++i )
    1762             :     {
    1763      630360 :         IF( GT_32( Kx_fx[i], limit_fx ) )
    1764             :         {
    1765      595280 :             svd_s_buffer_fx[i] = Kx_fx[i];
    1766      595280 :             move32();
    1767             :         }
    1768             :         ELSE
    1769             :         {
    1770       35080 :             svd_s_buffer_fx[i] = limit_fx;
    1771       35080 :             move32();
    1772             :         }
    1773             :     }
    1774      315180 :     svd_s_buffer_fx_e = Kx_e;
    1775      315180 :     move16();
    1776             : 
    1777      315180 :     limit_fx = 0;
    1778      315180 :     move32();
    1779             : 
    1780             :     /* regularized Kx-1 */
    1781             : 
    1782      945540 :     FOR( i = 0; i < lengthCx; ++i )
    1783             :     {
    1784      630360 :         IF( svd_s_buffer_fx[i] )
    1785             :         {
    1786             :             Word32 reg_fac;
    1787      630354 :             reg_fac = BASOP_Util_Divide3232_Scale_newton( MAX_32, svd_s_buffer_fx[i], &temp_e[i] );
    1788      630354 :             Kx_reg_inv_fx[i] = reg_fac;
    1789      630354 :             move32();
    1790      630354 :             temp_e[i] = sub( temp_e[i], svd_s_buffer_fx_e );
    1791      630354 :             move16();
    1792             :         }
    1793             :         ELSE
    1794             :         {
    1795             :             Word32 reg_fac;
    1796           6 :             reg_fac = BASOP_Util_Divide3232_Scale_newton( MAX_32, EPSILON_FX_M, &temp_e[i] );
    1797           6 :             Kx_reg_inv_fx[i] = reg_fac;
    1798           6 :             move32();
    1799           6 :             temp_e[i] = sub( temp_e[i], EPSILON_FX_E );
    1800           6 :             move16();
    1801             :         }
    1802             :     }
    1803      315180 :     Kx_reg_inv_e = temp_e[0];
    1804      315180 :     move16();
    1805      630360 :     FOR( i = 1; i < lengthCx; i++ )
    1806             :     {
    1807      315180 :         Kx_reg_inv_e = s_max( Kx_reg_inv_e, temp_e[i] );
    1808             :     }
    1809      945540 :     FOR( i = 0; i < lengthCx; i++ )
    1810             :     {
    1811      630360 :         Kx_reg_inv_fx[i] = L_shr_r( Kx_reg_inv_fx[i], sub( Kx_reg_inv_e, temp_e[i] ) ); // Q(31- Kx_reg_inv_e)
    1812      630360 :         move32();
    1813             :     }
    1814             : 
    1815             :     /************************ normalization matrix G hat **********************/
    1816             : 
    1817             :     /* Computing Q*Cx*Q' */
    1818             :     Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
    1819             :     Word16 Cy_hat_diag_e;
    1820             : 
    1821      315180 :     matrix_diag_product_fx( Q_fx, Q_e, lengthCy, lengthCx, 0, Cx_diag_fx, Cx_diag_e, lengthCx, Q_Cx_fx, &Q_Cx_e );
    1822             : 
    1823      315180 :     Word16 guard_bits = find_guarded_bits_fx( lengthCx + 1 );
    1824             : 
    1825     6073020 :     FOR( i = 0; i < lengthCy * lengthCx; ++i )
    1826             :     {
    1827     5757840 :         IF( GT_16( Q_Cx_e, Q_e ) )
    1828             :         {
    1829     5757782 :             Q_fx[i] = L_shr( Q_fx[i], guard_bits );
    1830     5757782 :             move32();
    1831             :         }
    1832             :         ELSE
    1833             :         {
    1834          58 :             Q_Cx_fx[i] = L_shr( Q_Cx_fx[i], guard_bits );
    1835          58 :             move32();
    1836             :         }
    1837             :     }
    1838             : 
    1839      315180 :     IF( GT_16( Q_Cx_e, Q_e ) )
    1840             :     {
    1841      315177 :         Q_e = add( Q_e, guard_bits );
    1842             :     }
    1843             :     ELSE
    1844             :     {
    1845           3 :         Q_Cx_e = add( Q_Cx_e, guard_bits );
    1846             :     }
    1847      315180 :     matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e );
    1848             : 
    1849             :     /* Computing Cy_hat_diag */
    1850     3194100 :     FOR( i = 0; i < lengthCy; ++i )
    1851             :     {
    1852     2878920 :         if ( GT_32( Cy_hat_diag_fx[i], limit_fx ) )
    1853             :         {
    1854      467644 :             limit_fx = Cy_hat_diag_fx[i];
    1855      467644 :             move32();
    1856             :         }
    1857             :     }
    1858             : 
    1859      315180 :     limit_fx = Mpy_32_32( limit_fx, reg_ghat_fx ); // Cy_hat_diag_e + reg_ghat_e
    1860             : 
    1861             :     /* Computing G_hat */
    1862     3194100 :     FOR( i = 0; i < lengthCy; ++i )
    1863             :     {
    1864     2878920 :         if ( GT_32( limit_fx, Cy_hat_diag_fx[i] ) ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
    1865             :         {
    1866       12522 :             Cy_hat_diag_fx[i] = limit_fx;
    1867       12522 :             move32();
    1868             :         }
    1869     2878920 :         IF( Cy_diag_fx[i] )
    1870             :         {
    1871     1875780 :             IF( Cy_hat_diag_fx[i] )
    1872             :             {
    1873     1875780 :                 G_hat_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], Cy_hat_diag_fx[i], &temp_e[i] );
    1874     1875780 :                 move32();
    1875     1875780 :                 temp_e[i] = add( temp_e[i], sub( Cy_diag_e, Cy_hat_diag_e ) );
    1876     1875780 :                 move16();
    1877     1875780 :                 G_hat_fx[i] = Sqrt32( G_hat_fx[i], &temp_e[i] );
    1878     1875780 :                 move32();
    1879             :             }
    1880             :             ELSE
    1881             :             {
    1882           0 :                 G_hat_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], EPSILON_FX_M, &temp_e[i] );
    1883           0 :                 move32();
    1884           0 :                 temp_e[i] = add( temp_e[i], sub( Cy_diag_e, EPSILON_FX_E ) );
    1885           0 :                 move16();
    1886           0 :                 G_hat_fx[i] = Sqrt32( G_hat_fx[i], &temp_e[i] );
    1887           0 :                 move32();
    1888             :             }
    1889             :         }
    1890             :         ELSE
    1891             :         {
    1892     1003140 :             G_hat_fx[i] = 0;
    1893     1003140 :             move32();
    1894     1003140 :             temp_e[i] = 0;
    1895     1003140 :             move16();
    1896             :         }
    1897             :     }
    1898      315180 :     G_hat_e = temp_e[0];
    1899      315180 :     move16();
    1900     2878920 :     FOR( i = 1; i < lengthCy; i++ )
    1901             :     {
    1902     2563740 :         G_hat_e = s_max( G_hat_e, temp_e[i] );
    1903             :     }
    1904     3194100 :     FOR( i = 0; i < lengthCy; i++ )
    1905             :     {
    1906     2878920 :         G_hat_fx[i] = L_shr_r( G_hat_fx[i], sub( G_hat_e, temp_e[i] ) ); // Q(31-G_hat_e)
    1907     2878920 :         move32();
    1908             :     }
    1909             : 
    1910             :     /************************ Formulate optimal P **********************/
    1911             : 
    1912             :     /* Computing the input matrix Kx'*Q'*G_hat'*Ky */
    1913      315180 :     diag_matrix_product_fx( Kx_fx, Kx_e, lengthCx, Q_fx, Q_e, lengthCy, lengthCx, 1, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
    1914             : 
    1915      315180 :     matrix_diag_product_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCx, lengthCy, 0, G_hat_fx, G_hat_e, lengthCy, mat_mult_buffer2_fx, &mat_mult_buffer2_e );
    1916             : 
    1917      315180 :     matrix_product_mant_exp_fx( mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCy, 0, Ky_fx, Ky_e, lengthCy, num_responses, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
    1918             : 
    1919      315180 :     IF( LT_16( lengthCx, num_responses ) )
    1920             :     {
    1921        3600 :         mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 1 );
    1922             : 
    1923        3600 :         nL = num_responses;
    1924        3600 :         move16();
    1925        3600 :         nC = lengthCx;
    1926        3600 :         move16();
    1927        3600 :         svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, svd_s_buffer_e, nL, nC );
    1928             :     }
    1929             :     ELSE
    1930             :     {
    1931      311580 :         mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 0 );
    1932             : 
    1933      311580 :         nL = lengthCx;
    1934      311580 :         move16();
    1935      311580 :         nC = num_responses;
    1936      311580 :         move16();
    1937      311580 :         svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, nL, nC );
    1938             :     }
    1939             : 
    1940             :     /* Actually Processing P */
    1941             : 
    1942             :     /* can be skipped: lambda is always column-truncated identity matrix, so this operation just truncates V to num_input_channel columns */
    1943      315180 :     svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, num_responses, lengthCx );
    1944      315180 :     mat_mult_buffer1_e = 0;
    1945      315180 :     move16();
    1946      315180 :     svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
    1947      315180 :     mat_mult_buffer2_e = 0;
    1948      315180 :     move16();
    1949             : 
    1950      315180 :     matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, num_responses, lengthCx, 0, mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    1951             : 
    1952             :     /************************ Formulate M **********************/
    1953             : 
    1954      315180 :     matrix_product_mant_exp_fx( Ky_fx, Ky_e, lengthCy, num_responses, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, num_responses, lengthCx, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
    1955             : 
    1956      315180 :     matrix_diag_product_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, Kx_reg_inv_fx, Kx_reg_inv_e, lengthCx, mixing_matrix_fx, mixing_matrix_e );
    1957             : 
    1958             :     /*********************** Energy Compensation ****************/
    1959             : 
    1960             :     /* Compute Cy_tilde = M*Cx*M' */
    1961      315180 :     matrix_diag_product_fx( mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 0, Cx_diag_fx, Cx_diag_e, lengthCx, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
    1962             : 
    1963      315180 :     matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 1, mat_mult_buffer2_fx, &mat_mult_buffer2_e );
    1964             : 
    1965      315180 :     IF( EQ_16( energy_compensation_flag, 1 ) )
    1966             :     {
    1967      315180 :         adj_fx = svd_s_buffer_fx;
    1968      315180 :         Cy_tilde_p_fx = mat_mult_buffer2_fx;
    1969     3194100 :         FOR( i = 0; i < lengthCy; ++i )
    1970             :         {
    1971             :             /* Avoid correction for very small energies, main diagonal elements of Cy_tilde_p may be negative */
    1972     2878920 :             IF( Cy_tilde_p_fx[( i + ( i * lengthCy ) )] < 0 )
    1973             :             {
    1974           0 :                 adj_fx[i] = MAX_32;
    1975           0 :                 move32();
    1976           0 :                 temp_e[i] = 0;
    1977           0 :                 move16();
    1978             :             }
    1979             :             ELSE
    1980             :             {
    1981     2878920 :                 IF( Cy_diag_fx[i] )
    1982             :                 {
    1983     1875780 :                     IF( Cy_tilde_p_fx[i + ( i * lengthCy )] )
    1984             :                     {
    1985     1875750 :                         adj_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], Cy_tilde_p_fx[i + ( i * lengthCy )], &temp_e[i] );
    1986     1875750 :                         move32();
    1987     1875750 :                         temp_e[i] = add( temp_e[i], sub( Cy_diag_e, mat_mult_buffer2_e ) );
    1988     1875750 :                         move16();
    1989     1875750 :                         adj_fx[i] = Sqrt32( adj_fx[i], &temp_e[i] );
    1990     1875750 :                         move32();
    1991             :                     }
    1992             :                     ELSE
    1993             :                     {
    1994          30 :                         adj_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], EPSILON_FX_M, &temp_e[i] );
    1995          30 :                         move32();
    1996          30 :                         temp_e[i] = add( temp_e[i], sub( Cy_diag_e, EPSILON_FX_E ) );
    1997          30 :                         move16();
    1998          30 :                         adj_fx[i] = Sqrt32( adj_fx[i], &temp_e[i] );
    1999          30 :                         move32();
    2000             :                     }
    2001             :                 }
    2002             :                 ELSE
    2003             :                 {
    2004     1003140 :                     adj_fx[i] = 0;
    2005     1003140 :                     move32();
    2006     1003140 :                     temp_e[i] = 0;
    2007     1003140 :                     move16();
    2008             :                 }
    2009             :             }
    2010             : 
    2011     2878920 :             Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, temp_e[i] ) );
    2012     2878920 :             IF( GT_32( adj_fx[i], temp ) )
    2013             :             {
    2014        1621 :                 adj_fx[i] = MAX_32;
    2015        1621 :                 move32();
    2016        1621 :                 temp_e[i] = 2;
    2017        1621 :                 move16();
    2018             :             }
    2019             :         }
    2020      315180 :         adj_e = temp_e[0];
    2021      315180 :         move16();
    2022     2878920 :         FOR( i = 1; i < lengthCy; i++ )
    2023             :         {
    2024     2563740 :             adj_e = s_max( adj_e, temp_e[i] );
    2025             :         }
    2026     3194100 :         FOR( i = 0; i < lengthCy; i++ )
    2027             :         {
    2028     2878920 :             adj_fx[i] = L_shr_r( adj_fx[i], sub( adj_e, temp_e[i] ) ); // Q(31-adj_e)
    2029     2878920 :             move32();
    2030             :         }
    2031             : 
    2032      315180 :         diag_matrix_product_fx( adj_fx, adj_e, lengthCy, mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    2033             : 
    2034      315180 :         Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCy, lengthCx ) );
    2035      315180 :         *mixing_matrix_e = mat_mult_buffer3_e;
    2036      315180 :         move16();
    2037             :     }
    2038             : 
    2039      315180 :     pop_wmops();
    2040             : 
    2041      315180 :     return out;
    2042             : }

Generated by: LCOV version 1.14