LCOV - code coverage report
Current view: top level - lib_dec - ivas_dirac_output_synthesis_cov_fx.c (source / functions) Hit Total Coverage
Test: Coverage on main -- dec/rend @ 4c82f1d24d39d0296b18d775f18a006f4c7d024b Lines: 836 876 95.4 %
Date: 2025-05-17 01:59:02 Functions: 9 9 100.0 %

          Line data    Source code
       1             : /******************************************************************************************************
       2             : 
       3             :    (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
       4             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
       5             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
       6             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
       7             :    contributors to this repository. All Rights Reserved.
       8             : 
       9             :    This software is protected by copyright law and by international treaties.
      10             :    The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
      11             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
      12             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
      13             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
      14             :    contributors to this repository retain full ownership rights in their respective contributions in
      15             :    the software. This notice grants no license of any kind, including but not limited to patent
      16             :    license, nor is any license granted by implication, estoppel or otherwise.
      17             : 
      18             :    Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
      19             :    contributions.
      20             : 
      21             :    This software is provided "AS IS", without any express or implied warranties. The software is in the
      22             :    development stage. It is intended exclusively for experts who have experience with such software and
      23             :    solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
      24             :    and fitness for a particular purpose are hereby disclaimed and excluded.
      25             : 
      26             :    Any dispute, controversy or claim arising under or in relation to providing this software shall be
      27             :    submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
      28             :    accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
      29             :    the United Nations Convention on Contracts on the International Sales of Goods.
      30             : 
      31             : *******************************************************************************************************/
      32             : 
      33             : #define MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE
      34             : 
      35             : #include <stdint.h>
      36             : #include <string.h>
      37             : #include <stdio.h>
      38             : #include <stdlib.h>
      39             : #include <assert.h>
      40             : #include <math.h>
      41             : #include "options.h"
      42             : #include "cnst.h"
      43             : #include "rom_enc.h"
      44             : #include "rom_com.h"
      45             : #include "prot_fx.h"
      46             : #include "ivas_stat_dec.h"
      47             : #include "ivas_cnst.h"
      48             : #include "ivas_rom_com.h"
      49             : #include "ivas_rom_dec.h"
      50             : #include "wmc_auto.h"
      51             : #include "rom_dec.h"
      52             : #include "ivas_prot_fx.h"
      53             : 
      54             : 
      55             : /*-------------------------------------------------------------------*
      56             :  * ivas_dirac_dec_output_synthesis_cov_open()
      57             :  *
      58             :  * Sets up the state and parameters for the Covariance Synthesis
      59             :  *-------------------------------------------------------------------*/
      60             : 
      61         297 : ivas_error ivas_dirac_dec_output_synthesis_cov_open_fx(
      62             :     DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params,   /* i/o: handle for the covariance synthesis parameters                                                        */
      63             :     DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state, /* i/o: hanlde for the covariance synthesis state                                                             */
      64             :     const Word16 max_band_decorr,                                     /* i  : uppermost frequency band where decorrelation is applied                                               */
      65             :     const Word16 interp_length,                                       /* i  : length for interpolating the mixing matrices in time slots                                            */
      66             :     const Word16 num_param_bands,                                     /* i  : number of parameter bands                                                                             */
      67             :     const Word16 num_param_bands_residual,                            /* i  : number of parameter bands with a residual mixing matrix (i.e. decorrelation                           */
      68             :     const Word16 nchan_in,                                            /* i  : number of input (transport) channels                                                                  */
      69             :     const Word16 nchan_out,                                           /* i  : number of output channels                                                                             */
      70             :     const Word32 *proto_matrix                                        /* i  : the prototype (upmix) matrix (only used if mode == 1)  Q(15-proto_matrix_e)                                               */
      71             : )
      72             : {
      73             :     Word16 idx;
      74             : 
      75         297 :     h_dirac_output_synthesis_params->max_band_decorr = max_band_decorr;
      76         297 :     move16();
      77             : 
      78             :     /*-----------------------------------------------------------------*
      79             :      * memory allocation
      80             :      *-----------------------------------------------------------------*/
      81             : 
      82             :     /* buffer length and interpolator */
      83         297 :     h_dirac_output_synthesis_params->alpha_synthesis_fx = NULL;
      84         297 :     IF( ( h_dirac_output_synthesis_params->proto_matrix_fx = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
      85             :     {
      86           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
      87             :     }
      88         297 :     h_dirac_output_synthesis_params->proto_matrix_len = imult1616( nchan_out, nchan_in );
      89         297 :     move16();
      90             :     /* cov buffers */
      91        3803 :     FOR( idx = 0; idx < num_param_bands; idx++ )
      92             :     {
      93        3506 :         IF( ( h_dirac_output_synthesis_state->cx_old_fx[idx] = (Word32 *) malloc( nchan_in * nchan_in * sizeof( Word32 ) ) ) == NULL )
      94             :         {
      95           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
      96             :         }
      97        3506 :         h_dirac_output_synthesis_state->cx_old_len = imult1616( nchan_in, nchan_in );
      98        3506 :         IF( ( h_dirac_output_synthesis_state->cy_old_fx[idx] = (Word32 *) malloc( nchan_out * nchan_out * sizeof( Word32 ) ) ) == NULL )
      99             :         {
     100           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     101             :         }
     102        3506 :         IF( ( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
     103             :         {
     104           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     105             :         }
     106        3506 :         set_zero_fx( h_dirac_output_synthesis_state->cx_old_fx[idx], imult1616( nchan_in, nchan_in ) );
     107        3506 :         set_zero_fx( h_dirac_output_synthesis_state->cy_old_fx[idx], imult1616( nchan_out, nchan_out ) );
     108        3506 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx], imult1616( nchan_out, nchan_in ) );
     109             : 
     110        3506 :         IF( ( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = (Word32 *) malloc( nchan_out * nchan_in * sizeof( Word32 ) ) ) == NULL )
     111             :         {
     112           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     113             :         }
     114        3506 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_fx[idx], imult1616( nchan_out, nchan_in ) );
     115        3506 :         h_dirac_output_synthesis_state->mixing_matrix_len = i_mult( nchan_out, nchan_in );
     116        3506 :         move16();
     117             :     }
     118       14611 :     FOR( ; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
     119             :     {
     120       14314 :         h_dirac_output_synthesis_state->cx_old_fx[idx] = NULL;
     121       14314 :         h_dirac_output_synthesis_state->cy_old_fx[idx] = NULL;
     122       14314 :         h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = NULL;
     123       14314 :         h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = NULL;
     124             :     }
     125             : 
     126        3053 :     FOR( idx = 0; idx < num_param_bands_residual; idx++ )
     127             :     {
     128        2756 :         IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = (Word32 *) malloc( imult1616( nchan_out, nchan_out ) * sizeof( Word32 ) ) ) == NULL )
     129             :         {
     130           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     131             :         }
     132        2756 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx], imult1616( nchan_out, nchan_out ) );
     133             : 
     134        2756 :         IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = (Word32 *) malloc( imult1616( nchan_out, nchan_out ) * sizeof( Word32 ) ) ) == NULL )
     135             :         {
     136           0 :             return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     137             :         }
     138        2756 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx], imult1616( nchan_out, nchan_out ) );
     139        2756 :         h_dirac_output_synthesis_state->mixing_matrix_res_len = i_mult( nchan_out, nchan_out );
     140        2756 :         move16();
     141             :     }
     142       15361 :     FOR( ; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
     143             :     {
     144       15064 :         h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = NULL;
     145       15064 :         h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = NULL;
     146             :     }
     147             : 
     148         297 :     IF( ( h_dirac_output_synthesis_state->cx_old_e = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     149             :     {
     150           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     151             :     }
     152         297 :     IF( ( h_dirac_output_synthesis_state->cy_old_e = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     153             :     {
     154           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     155             :     }
     156             : 
     157         297 :     set16_fx( h_dirac_output_synthesis_state->cx_old_e, 0, CLDFB_NO_CHANNELS_MAX );
     158         297 :     set16_fx( h_dirac_output_synthesis_state->cy_old_e, 0, CLDFB_NO_CHANNELS_MAX );
     159             : 
     160         297 :     IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     161             :     {
     162           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     163             :     }
     164         297 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_exp, 0, CLDFB_NO_CHANNELS_MAX );
     165             : 
     166         297 :     IF( ( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     167             :     {
     168           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     169             :     }
     170         297 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
     171             : 
     172         297 :     IF( ( h_dirac_output_synthesis_state->mixing_matrix_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     173             :     {
     174           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     175             :     }
     176         297 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_exp, 0, CLDFB_NO_CHANNELS_MAX );
     177             : 
     178         297 :     IF( ( h_dirac_output_synthesis_state->mixing_matrix_old_exp = (Word16 *) malloc( CLDFB_NO_CHANNELS_MAX * sizeof( Word16 ) ) ) == NULL )
     179             :     {
     180           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis matrix\n" ) );
     181             :     }
     182         297 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
     183             : 
     184             :     /*-----------------------------------------------------------------*
     185             :      * prepare processing parameters
     186             :      *-----------------------------------------------------------------*/
     187             : 
     188             :     /* compute interpolator */
     189         297 :     IF( ( h_dirac_output_synthesis_params->interpolator_fx = (Word16 *) malloc( interp_length * sizeof( Word16 ) ) ) == NULL )
     190             :     {
     191           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DirAC synthesis covariance\n" ) );
     192             :     }
     193        9801 :     FOR( idx = 1; idx <= interp_length; ++idx )
     194             :     {
     195        9504 :         h_dirac_output_synthesis_params->interpolator_fx[idx - 1] = div_s( idx, interp_length );
     196        9504 :         move16();
     197             :     }
     198         297 :     Copy32( proto_matrix, h_dirac_output_synthesis_params->proto_matrix_fx, imult1616( nchan_in, nchan_out ) );
     199         297 :     h_dirac_output_synthesis_params->proto_matrix_e = 5;
     200         297 :     move16();
     201         297 :     return IVAS_ERR_OK;
     202             : }
     203             : 
     204             : 
     205             : /*-------------------------------------------------------------------*
     206             :  * ivas_dirac_dec_output_synthesis_get_interpolator_fx()
     207             :  *
     208             :  *
     209             :  *-------------------------------------------------------------------*/
     210             : 
     211      318383 : void ivas_dirac_dec_output_synthesis_get_interpolator_fx(
     212             :     DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params, /* i/o: handle for the covariance synthesis parameters  */
     213             :     const UWord16 interp_length                                     /* i  : interpolator length                             */
     214             : )
     215             : {
     216             :     Word16 idx;
     217      318383 :     Word16 tmp, exp_diff = 0;
     218      318383 :     move16();
     219             : 
     220     1583648 :     FOR( idx = 1; idx <= interp_length; ++idx )
     221             :     {
     222     1265265 :         tmp = BASOP_Util_Divide3232_Scale( L_deposit_l( idx ), L_deposit_l( interp_length ), &exp_diff ); // (Q15 - exp_diff)
     223     1265265 :         h_dirac_output_synthesis_params->interpolator_fx[idx - 1] = shl_sat( tmp, exp_diff );             // Q15
     224     1265265 :         move16();
     225             :     }
     226             : 
     227      318383 :     return;
     228             : }
     229             : 
     230             : 
     231             : /*-------------------------------------------------------------------*
     232             :  * ivas_dirac_dec_output_synthesis_cov_init()
     233             :  *
     234             :  * initialize the states for the covariance synthesis
     235             :  *-------------------------------------------------------------------*/
     236         297 : void ivas_dirac_dec_output_synthesis_cov_init_fx(
     237             :     DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state, /* i/o: pointer to the state of the covariance synthesis                            */
     238             :     const Word16 nchan_in,                                            /* i  : number of input (tranport) channels                                         */
     239             :     const Word16 nchan_out,                                           /* i  : number of output channels                                                   */
     240             :     const Word16 n_param_bands,                                       /* i  : number of total parameter bands                                             */
     241             :     const Word16 n_param_bands_res                                    /* i  : number of parameter bands with a residual mixing matrix (i.e. decorrelation */
     242             : )
     243             : {
     244             : 
     245             :     Word16 idx;
     246             : 
     247             :     /* initialize buffers */
     248        3803 :     FOR( idx = 0; idx < n_param_bands; idx++ )
     249             :     {
     250        3506 :         set_zero_fx( h_dirac_output_synthesis_state->cx_old_fx[idx], imult1616( nchan_in, nchan_in ) );
     251        3506 :         set_zero_fx( h_dirac_output_synthesis_state->cy_old_fx[idx], imult1616( nchan_out, nchan_out ) );
     252        3506 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx], imult1616( nchan_out, nchan_in ) );
     253        3506 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_fx[idx], imult1616( nchan_out, nchan_in ) );
     254             :     }
     255             : 
     256        3053 :     FOR( idx = 0; idx < n_param_bands_res; idx++ )
     257             :     {
     258        2756 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx], imult1616( nchan_out, nchan_out ) );
     259        2756 :         set_zero_fx( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx], imult1616( nchan_out, nchan_out ) );
     260             :     }
     261             : 
     262             : 
     263         297 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
     264         297 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_exp, 0, CLDFB_NO_CHANNELS_MAX );
     265         297 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp, 0, CLDFB_NO_CHANNELS_MAX );
     266         297 :     set16_fx( h_dirac_output_synthesis_state->mixing_matrix_res_exp, 0, CLDFB_NO_CHANNELS_MAX );
     267             : 
     268         297 :     return;
     269             : }
     270             : 
     271             : 
     272             : /*-------------------------------------------------------------------*
     273             :  * ivas_dirac_dec_output_synthesis_cov_close()
     274             :  *
     275             :  * deallocate dynamic memory in the covariance synthesis state
     276             :  *-------------------------------------------------------------------*/
     277             : 
     278         297 : void ivas_dirac_dec_output_synthesis_cov_close_fx(
     279             :     DIRAC_OUTPUT_SYNTHESIS_PARAMS *h_dirac_output_synthesis_params,  /* i  : handle for the covariance synthesis parameters  */
     280             :     DIRAC_OUTPUT_SYNTHESIS_COV_STATE *h_dirac_output_synthesis_state /* i/o: handle for the covariance synthesis state       */
     281             : )
     282             : {
     283             :     Word16 idx;
     284             : 
     285             :     /*-----------------------------------------------------------------*
     286             :      * memory deallocation
     287             :      *-----------------------------------------------------------------*/
     288             : 
     289             :     /* free interpolator */
     290         297 :     IF( h_dirac_output_synthesis_params->interpolator_fx != NULL )
     291             :     {
     292         297 :         free( h_dirac_output_synthesis_params->interpolator_fx );
     293         297 :         h_dirac_output_synthesis_params->interpolator_fx = NULL;
     294             :     }
     295             : 
     296             :     /* free alpha */
     297         297 :     IF( h_dirac_output_synthesis_params->alpha_synthesis_fx != NULL )
     298             :     {
     299           0 :         free( h_dirac_output_synthesis_params->alpha_synthesis_fx );
     300           0 :         h_dirac_output_synthesis_params->alpha_synthesis_fx = NULL;
     301             :     }
     302             : 
     303             :     /* free proto_matrix */
     304         297 :     IF( h_dirac_output_synthesis_params->proto_matrix_fx != NULL )
     305             :     {
     306         297 :         free( h_dirac_output_synthesis_params->proto_matrix_fx );
     307         297 :         h_dirac_output_synthesis_params->proto_matrix_fx = NULL;
     308             :     }
     309             : 
     310             : 
     311         297 :     IF( h_dirac_output_synthesis_state->cx_old_e != NULL )
     312             :     {
     313         297 :         free( h_dirac_output_synthesis_state->cx_old_e );
     314         297 :         h_dirac_output_synthesis_state->cx_old_e = NULL;
     315             :     }
     316         297 :     IF( h_dirac_output_synthesis_state->cy_old_e != NULL )
     317             :     {
     318         297 :         free( h_dirac_output_synthesis_state->cy_old_e );
     319         297 :         h_dirac_output_synthesis_state->cy_old_e = NULL;
     320             :     }
     321             : 
     322             :     /* free cov buffers */
     323       18117 :     FOR( idx = 0; idx < CLDFB_NO_CHANNELS_MAX; idx++ )
     324             :     {
     325       17820 :         IF( h_dirac_output_synthesis_state->cx_old_fx[idx] != NULL )
     326             :         {
     327        3506 :             free( h_dirac_output_synthesis_state->cx_old_fx[idx] );
     328        3506 :             h_dirac_output_synthesis_state->cx_old_fx[idx] = NULL;
     329             :         }
     330             : 
     331       17820 :         IF( h_dirac_output_synthesis_state->cy_old_fx[idx] != NULL )
     332             :         {
     333        3506 :             free( h_dirac_output_synthesis_state->cy_old_fx[idx] );
     334        3506 :             h_dirac_output_synthesis_state->cy_old_fx[idx] = NULL;
     335             :         }
     336             : 
     337       17820 :         IF( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] != NULL )
     338             :         {
     339        3506 :             free( h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] );
     340        3506 :             h_dirac_output_synthesis_state->mixing_matrix_old_fx[idx] = NULL;
     341             :         }
     342             : 
     343       17820 :         IF( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] != NULL )
     344             :         {
     345        2756 :             free( h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] );
     346        2756 :             h_dirac_output_synthesis_state->mixing_matrix_res_old_fx[idx] = NULL;
     347             :         }
     348             : 
     349       17820 :         IF( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] != NULL )
     350             :         {
     351        3506 :             free( h_dirac_output_synthesis_state->mixing_matrix_fx[idx] );
     352        3506 :             h_dirac_output_synthesis_state->mixing_matrix_fx[idx] = NULL;
     353             :         }
     354             : 
     355       17820 :         IF( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] != NULL )
     356             :         {
     357        2756 :             free( h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] );
     358        2756 :             h_dirac_output_synthesis_state->mixing_matrix_res_fx[idx] = NULL;
     359             :         }
     360             :     }
     361             : 
     362         297 :     IF( h_dirac_output_synthesis_state->mixing_matrix_old_exp != NULL )
     363             :     {
     364         297 :         free( h_dirac_output_synthesis_state->mixing_matrix_old_exp );
     365         297 :         h_dirac_output_synthesis_state->mixing_matrix_old_exp = NULL;
     366             :     }
     367             : 
     368         297 :     IF( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp != NULL )
     369             :     {
     370         297 :         free( h_dirac_output_synthesis_state->mixing_matrix_res_old_exp );
     371         297 :         h_dirac_output_synthesis_state->mixing_matrix_res_old_exp = NULL;
     372             :     }
     373             : 
     374         297 :     IF( h_dirac_output_synthesis_state->mixing_matrix_exp != NULL )
     375             :     {
     376         297 :         free( h_dirac_output_synthesis_state->mixing_matrix_exp );
     377         297 :         h_dirac_output_synthesis_state->mixing_matrix_exp = NULL;
     378             :     }
     379             : 
     380         297 :     IF( h_dirac_output_synthesis_state->mixing_matrix_res_exp != NULL )
     381             :     {
     382         297 :         free( h_dirac_output_synthesis_state->mixing_matrix_res_exp );
     383         297 :         h_dirac_output_synthesis_state->mixing_matrix_res_exp = NULL;
     384             :     }
     385         297 :     return;
     386             : }
     387             : 
     388             : /*-------------------------------------------------------------------*
     389             :  * ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot()
     390             :  *
     391             :  * collect the multi channel input covariance for one filter bank time slot
     392             :  *-------------------------------------------------------------------*/
     393             : 
     394             : /*-------------------------------------------------------------------*
     395             :  * ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot()
     396             :  *
     397             :  * collect the multi channel input covariance for one filter bank time slot
     398             :  *-------------------------------------------------------------------*/
     399     1776450 : void ivas_dirac_dec_output_synthesis_cov_param_mc_collect_slot_fx(
     400             :     Word32 *RealBuffer_fx,                                                          /* i  : input channel filter bank samples (real part)          Q(31- RealBuffer_e)*/
     401             :     Word16 RealBuffer_e,                                                            /* i  : exponent input channel filter bank samples (real part)*/
     402             :     Word32 *ImagBuffer_fx,                                                          /* i  : input channel filter bank samples (imaginary part      Q(ImagBuffer_e)*/
     403             :     Word16 ImagBuffer_e,                                                            /* i  : exponent input channel filter bank samples (real part)*/
     404             :     Word32 cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS],      /* o  : accumulated input covariance (real part)              Q(31- cx_e)*/
     405             :     Word16 *cx_e,                                                                   /* i  : exponent for accumulated input covariance (real part) */
     406             :     Word32 cx_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS], /* o  : accumulated input covariance (imaginary part)         Q(31- cx_imag_e)*/
     407             :     Word16 *cx_imag_e,                                                              /* i : exponent accumulated input covariance (imag part)      */
     408             :     PARAM_MC_DEC_HANDLE hParamMC,                                                   /* i  : handle to Parametric MC state                         */
     409             :     const Word16 param_band,                                                        /* i  : parameter band                                        */
     410             :     const Word16 nchan_in                                                           /* i  : number of input channels                              */
     411             : )
     412             : {
     413             :     Word16 band_idx, ch_idx;
     414             :     Word16 brange[2];
     415             :     Word32 real_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS];
     416             :     Word16 real_in_e;
     417             :     Word32 imag_in_buffer_fx[PARAM_MC_MAX_BANDS_IN_PARAMETER_BAND * MAX_TRANSPORT_CHANNELS];
     418             :     Word16 imag_in_e;
     419             :     Word32 real_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
     420             :     Word32 imag_buffer_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
     421             :     Word16 output_e;
     422             :     Word16 tmp1_e, tmp2_e, shift_imag, shift_real;
     423             :     Word16 band, num_bands;
     424             :     Word16 cx_fx_norm, cx_imag_fx_norm;
     425             :     /* estimate input covariance */
     426             :     /* Already stack here instead of in the process_subframe */
     427             : 
     428             :     /* collect input frame */
     429     1776450 :     brange[0] = hParamMC->band_grouping[param_band];
     430     1776450 :     move16();
     431     1776450 :     brange[1] = hParamMC->band_grouping[param_band + 1];
     432     1776450 :     move16();
     433     1776450 :     num_bands = sub( brange[1], brange[0] );
     434             : 
     435     9379450 :     FOR( band_idx = 0; band_idx < num_bands; band_idx++ )
     436             :     {
     437     7603000 :         band = add( brange[0], band_idx );
     438    22843320 :         FOR( ch_idx = 0; ch_idx < nchan_in; ch_idx++ )
     439             :         {
     440    15240320 :             real_in_buffer_fx[band_idx + num_bands * ch_idx] = RealBuffer_fx[band + hParamMC->num_freq_bands * ch_idx];
     441    15240320 :             move32();
     442    15240320 :             imag_in_buffer_fx[band_idx + num_bands * ch_idx] = ImagBuffer_fx[band + hParamMC->num_freq_bands * ch_idx];
     443    15240320 :             move32();
     444             :         }
     445             :     }
     446             : 
     447     1776450 :     real_in_e = RealBuffer_e;
     448     1776450 :     move16();
     449     1776450 :     imag_in_e = ImagBuffer_e;
     450     1776450 :     move16();
     451             : 
     452     1776450 :     Word16 buf_len = imult1616( num_bands, nchan_in );
     453             : 
     454     1776450 :     shift_real = sub( L_norm_arr( real_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
     455     1776450 :     shift_imag = sub( L_norm_arr( imag_in_buffer_fx, buf_len ), find_guarded_bits_fx( add( num_bands, 1 ) ) );
     456             : 
     457     1776450 :     real_in_e = sub( real_in_e, shift_real );
     458     1776450 :     imag_in_e = sub( imag_in_e, shift_imag );
     459             : 
     460             : 
     461     1776450 :     output_e = s_max( real_in_e, imag_in_e );
     462             : 
     463     1776450 :     scale_sig32( real_in_buffer_fx, buf_len, sub( RealBuffer_e, output_e ) );
     464     1776450 :     scale_sig32( imag_in_buffer_fx, buf_len, sub( ImagBuffer_e, output_e ) );
     465             : 
     466     1776450 :     cmplx_matrix_square_fx( real_in_buffer_fx, imag_in_buffer_fx, num_bands, nchan_in, real_buffer_fx, imag_buffer_fx, output_e, &output_e );
     467     1776450 :     v_add_fixed_me( cx_fx, *cx_e, real_buffer_fx, output_e, cx_fx, &tmp1_e, imult1616( nchan_in, nchan_in ), 1 );
     468             : 
     469     1776450 :     v_add_fixed_me( cx_imag_fx, *cx_imag_e, imag_buffer_fx, output_e, cx_imag_fx, &tmp2_e, imult1616( nchan_in, nchan_in ), 1 );
     470             : 
     471     1776450 :     cx_fx_norm = L_norm_arr( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
     472     1776450 :     cx_imag_fx_norm = L_norm_arr( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
     473             : 
     474     1776450 :     scale_sig32( cx_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_fx_norm );
     475     1776450 :     scale_sig32( cx_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS, cx_imag_fx_norm );
     476             : 
     477     1776450 :     *cx_e = sub( tmp1_e, cx_fx_norm );
     478     1776450 :     move16();
     479     1776450 :     *cx_imag_e = sub( tmp2_e, cx_imag_fx_norm );
     480     1776450 :     move16();
     481             : 
     482     1776450 :     return;
     483             : }
     484             : 
     485             : /*-------------------------------------------------------------------*
     486             :  * ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot()
     487             :  *
     488             :  * synthesize one filter bank slot of multi channel output filter bank
     489             :  * samples with the covariance synthesis
     490             :  *-------------------------------------------------------------------*/
     491             : 
     492      140160 : void ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx(
     493             :     Word32 *Cldfb_RealBuffer_in_fx,                                                   /*Q6*/
     494             :     Word32 *Cldfb_ImagBuffer_in_fx,                                                   /*Q6*/
     495             :     Word32 Cldfb_RealBuffer_fx[][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX], /* o  : output channel filter bank samples (real part)                   Q6*/
     496             :     Word32 Cldfb_ImagBuffer_fx[][MAX_PARAM_SPATIAL_SUBFRAMES][CLDFB_NO_CHANNELS_MAX], /* o  : output channel filter bank samples (imaginary part)              Q6*/
     497             :     Word32 *mixing_matrix_fx[],                                                       /* i  : parameter band wise mixing matrices (direct part)                Q(31-mixing_matrix_e)*/
     498             :     Word16 *mixing_matrix_e,                                                          /* i  : parameter band wise mixing matrices (direct part)                */
     499             :     Word32 *mixing_matrix_res_fx[],                                                   /* i  : parameter band wise mixing matrices (residual part)              Q(31-mixing_matrix_res_e)*/
     500             :     Word16 *mixing_matrix_res_e,                                                      /* i  : parameter band wise mixing matrices (residual part)              */
     501             :     const UWord16 slot_idx_sfr,                                                       /* i  : time slot index for the current slot within the current subframe */
     502             :     const UWord16 slot_idx_tot,                                                       /* i  : time slot index for the current slot within the frame            */
     503             :     const Word16 nX,                                                                  /* i  : number of input channels                                         */
     504             :     const Word16 nY,                                                                  /* i  : number of output channels                                        */
     505             :     PARAM_MC_DEC_HANDLE hParamMC                                                      /* i  : handle to the Parametric MC decoder state                        */
     506             : )
     507             : {
     508             :     Word16 param_band_idx, band, ch_idx;
     509             :     Word16 have_residual;
     510             :     Word16 brange[2];
     511             :     DIRAC_OUTPUT_SYNTHESIS_COV_STATE h_synthesis_state;
     512             :     Word32 mixing_matrix_smooth_fx[MAX_CICP_CHANNELS * PARAM_MC_MAX_TRANSPORT_CHANS];
     513             :     Word16 mixing_matrix_smooth_e;
     514             :     Word32 mixing_matrix_res_smooth_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
     515      140160 :     Word16 mixing_matrix_res_smooth_e = 0;
     516      140160 :     move16();
     517             :     Word32 mixing_matrix_buffer_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
     518             :     Word16 mixing_matrix_buffer_e;
     519             :     Word32 input_f_real_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
     520             :     Word32 input_f_imag_fx[PARAM_MC_MAX_TRANSPORT_CHANS];
     521             : #ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE
     522             : 
     523             : #else
     524             :     Word32 output_f_real_fx[MAX_CICP_CHANNELS];
     525             :     Word32 output_f_imag_fx[MAX_CICP_CHANNELS];
     526             :     Word16 output_f_real_e;
     527             :     Word16 output_f_imag_e;
     528             : #endif
     529             :     Word32 diff_f_real_fx[MAX_CICP_CHANNELS];
     530             :     Word32 diff_f_imag_fx[MAX_CICP_CHANNELS];
     531             : 
     532      140160 :     h_synthesis_state = hParamMC->h_output_synthesis_cov_state;
     533             : 
     534      140160 :     set_zero_fx( input_f_real_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
     535      140160 :     set_zero_fx( input_f_imag_fx, PARAM_MC_MAX_TRANSPORT_CHANS );
     536             : #ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE
     537             : 
     538             : #else
     539             :     set_zero_fx( output_f_real_fx, MAX_CICP_CHANNELS );
     540             :     set_zero_fx( output_f_imag_fx, MAX_CICP_CHANNELS );
     541             : #endif
     542      140160 :     set_zero_fx( diff_f_real_fx, MAX_CICP_CHANNELS );
     543      140160 :     set_zero_fx( diff_f_imag_fx, MAX_CICP_CHANNELS );
     544             : 
     545     1950080 :     FOR( param_band_idx = 0; param_band_idx < hParamMC->num_param_bands_synth; param_band_idx++ )
     546             :     {
     547             :         /* final mixing */
     548     1809920 :         have_residual = 0;
     549     1809920 :         move16();
     550     1809920 :         brange[0] = hParamMC->band_grouping[param_band_idx];
     551     1809920 :         move16();
     552     1809920 :         brange[1] = hParamMC->band_grouping[( param_band_idx + 1 )];
     553     1809920 :         move16();
     554             : 
     555     1809920 :         if ( LT_16( brange[0], hParamMC->h_output_synthesis_params.max_band_decorr ) )
     556             :         {
     557     1454400 :             have_residual = 1;
     558     1454400 :             move16();
     559             :         }
     560             : 
     561     1809920 :         v_multc_fixed( mixing_matrix_fx[param_band_idx], L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ), mixing_matrix_smooth_fx, imult1616( nY, nX ) );
     562     1809920 :         mixing_matrix_smooth_e = mixing_matrix_e[param_band_idx]; // interpolator is W16
     563     1809920 :         move16();
     564             : 
     565     1809920 :         v_multc_fixed( h_synthesis_state.mixing_matrix_old_fx[param_band_idx], L_sub( ONE_IN_Q31, L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ) ), mixing_matrix_buffer_fx, imult1616( nY, nX ) );
     566     1809920 :         mixing_matrix_buffer_e = h_synthesis_state.mixing_matrix_old_exp[param_band_idx]; // interpolator is W16
     567     1809920 :         move16();
     568             : 
     569     1809920 :         v_add_fixed_me( mixing_matrix_smooth_fx, mixing_matrix_smooth_e, mixing_matrix_buffer_fx, mixing_matrix_buffer_e, mixing_matrix_smooth_fx, &mixing_matrix_smooth_e, imult1616( nY, nX ), 0 );
     570             : 
     571     1809920 :         IF( have_residual )
     572             :         {
     573             :             /* residual mixing matrix interpolation*/
     574             : 
     575     1454400 :             v_multc_fixed( mixing_matrix_res_fx[param_band_idx], L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ), mixing_matrix_res_smooth_fx, imult1616( nY, nY ) );
     576     1454400 :             mixing_matrix_res_smooth_e = mixing_matrix_res_e[param_band_idx]; // interpolator is W16
     577     1454400 :             move16();
     578             : 
     579     1454400 :             set_zero_fx( mixing_matrix_buffer_fx, imult1616( nY, nY ) );
     580     1454400 :             v_multc_fixed( h_synthesis_state.mixing_matrix_res_old_fx[param_band_idx], L_sub( ONE_IN_Q31, L_deposit_h( hParamMC->h_output_synthesis_params.interpolator_fx[slot_idx_tot] ) ), mixing_matrix_buffer_fx, imult1616( nY, nY ) );
     581     1454400 :             mixing_matrix_buffer_e = h_synthesis_state.mixing_matrix_res_old_exp[param_band_idx]; // interpolator is W16
     582     1454400 :             move16();
     583             : 
     584     1454400 :             v_add_fixed_me( mixing_matrix_res_smooth_fx, mixing_matrix_res_smooth_e, mixing_matrix_buffer_fx, mixing_matrix_buffer_e, mixing_matrix_res_smooth_fx, &mixing_matrix_res_smooth_e, imult1616( nY, nY ), 0 );
     585             :         }
     586             : 
     587             : 
     588     9553920 :         FOR( band = brange[0]; band < brange[1]; band++ )
     589             :         {
     590     7744000 :             assert( band >= 0 );
     591             : 
     592     7744000 :             IF( have_residual )
     593             :             {
     594             :                 /* collect diffuse prototypes */
     595     2803200 :                 assert( LT_16( band, hParamMC->h_output_synthesis_params.max_band_decorr ) );
     596    19833600 :                 FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
     597             :                 {
     598    17030400 :                     diff_f_real_fx[ch_idx] = Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band];
     599    17030400 :                     move32();
     600    17030400 :                     diff_f_imag_fx[ch_idx] = Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band];
     601    17030400 :                     move32();
     602             :                 }
     603             : 
     604             :                 /* apply residual mixing */
     605             : #ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE
     606             :                 {
     607             :                     Word16 shifter;
     608             : 
     609     2803200 :                     shifter = 31 - mixing_matrix_res_smooth_e;
     610    19833600 :                     FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
     611             :                     {
     612             :                         int i;
     613             :                         Word16 idx;
     614             :                         Word64 temp_real, temp_imag;
     615             : 
     616             : 
     617    17030400 :                         idx = ch_idx;
     618    17030400 :                         temp_real = 0;
     619    17030400 :                         temp_imag = 0;
     620    17030400 :                         move64();
     621    17030400 :                         move64();
     622   121209600 :                         for ( i = 0; i < nY; i++ )
     623             :                         {
     624   104179200 :                             temp_real = W_add( temp_real, W_mult0_32_32( mixing_matrix_res_smooth_fx[idx], diff_f_real_fx[i] ) );
     625   104179200 :                             temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_res_smooth_fx[idx], diff_f_imag_fx[i] ) );
     626   104179200 :                             idx += nY;
     627             :                         }
     628    17030400 :                         Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = W_extract_l( W_shr( temp_real, shifter ) );
     629    17030400 :                         Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = W_extract_l( W_shr( temp_imag, shifter ) );
     630             :                     }
     631             :                 }
     632             : 
     633             : 
     634             : #else
     635             : 
     636             :                 matrix_product_mant_exp_fx( mixing_matrix_res_smooth_fx, mixing_matrix_res_smooth_e, nY, nY, 0, diff_f_real_fx, 25, nY, 1, 0, output_f_real_fx, &output_f_real_e );
     637             :                 scale_sig32( output_f_real_fx, nY, sub( Q6, sub( Q31, output_f_real_e ) ) ); // Q6
     638             : 
     639             : 
     640             :                 matrix_product_mant_exp_fx( mixing_matrix_res_smooth_fx, mixing_matrix_res_smooth_e, nY, nY, 0, diff_f_imag_fx, 25, nY, 1, 0, output_f_imag_fx, &output_f_imag_e );
     641             :                 scale_sig32( output_f_imag_fx, nY, sub( Q6, sub( Q31, output_f_imag_e ) ) ); // Q6
     642             : 
     643             :                 FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
     644             :                 {
     645             :                     Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = output_f_real_fx[ch_idx]; // Q6
     646             :                     move32();
     647             :                     Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = output_f_imag_fx[ch_idx]; // Q6
     648             :                     move32();
     649             :                 }
     650             : #endif
     651             :             }
     652             :             ELSE
     653             :             {
     654    34969600 :                 FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
     655             :                 {
     656    30028800 :                     Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = 0;
     657    30028800 :                     move32();
     658    30028800 :                     Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = 0;
     659    30028800 :                     move32();
     660             :                 }
     661             :             }
     662             : 
     663             :             /* collect input signals, still in cldfb buffers */
     664    23270400 :             FOR( ch_idx = 0; ch_idx < nX; ch_idx++ )
     665             :             {
     666             : 
     667    15526400 :                 input_f_real_fx[ch_idx] = Cldfb_RealBuffer_in_fx[ch_idx * hParamMC->num_freq_bands + band]; // Q6
     668    15526400 :                 move32();
     669    15526400 :                 input_f_imag_fx[ch_idx] = Cldfb_ImagBuffer_in_fx[ch_idx * hParamMC->num_freq_bands + band]; // Q6
     670    15526400 :                 move32();
     671             :             }
     672             : 
     673             :             /* apply mixing matrix */
     674             : #ifdef MERGE_REQUEST_1564_SPEEDUP_ivas_dirac_dec_output_synthesis_cov_param_mc_synthesise_slot_fx_NONBE
     675             :             {
     676             :                 Word16 shifter;
     677     7744000 :                 shifter = 31 - mixing_matrix_smooth_e;
     678             : 
     679    54803200 :                 FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
     680             :                 {
     681             :                     int i;
     682             :                     Word16 idx;
     683             :                     Word64 temp_real, temp_imag;
     684             : 
     685             : 
     686    47059200 :                     idx = ch_idx;
     687    47059200 :                     temp_real = 0;
     688    47059200 :                     temp_imag = 0;
     689    47059200 :                     move64();
     690    47059200 :                     move64();
     691   141638400 :                     for ( i = 0; i < nX; i++ )
     692             :                     {
     693    94579200 :                         temp_real = W_add( temp_real, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_real_fx[i] ) );
     694    94579200 :                         temp_imag = W_add( temp_imag, W_mult0_32_32( mixing_matrix_smooth_fx[idx], input_f_imag_fx[i] ) );
     695    94579200 :                         idx += nY;
     696             :                     }
     697    47059200 :                     Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_real, shifter ) ) );
     698    47059200 :                     move32();
     699    47059200 :                     Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band], W_extract_l( W_shr( temp_imag, shifter ) ) );
     700    47059200 :                     move32();
     701             :                 }
     702             :             }
     703             : #else
     704             : 
     705             :             matrix_product_mant_exp_fx( mixing_matrix_smooth_fx, mixing_matrix_smooth_e, nY, nX, 0, input_f_real_fx, 25, nX, 1, 0, output_f_real_fx, &output_f_real_e );
     706             :             scale_sig32( output_f_real_fx, MAX_CICP_CHANNELS, sub( 6, sub( 31, output_f_real_e ) ) ); // Q6
     707             : 
     708             : 
     709             :             matrix_product_mant_exp_fx( mixing_matrix_smooth_fx, mixing_matrix_smooth_e, nY, nX, 0, input_f_imag_fx, 25, nX, 1, 0, output_f_imag_fx, &output_f_imag_e );
     710             :             scale_sig32( output_f_imag_fx, MAX_CICP_CHANNELS, sub( 6, sub( 31, output_f_imag_e ) ) ); // Q6
     711             : 
     712             :             /* collect output */
     713             :             FOR( ch_idx = 0; ch_idx < nY; ch_idx++ )
     714             :             {
     715             :                 Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_RealBuffer_fx[ch_idx][slot_idx_sfr][band], output_f_real_fx[ch_idx] );
     716             :                 move32();
     717             :                 Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band] = L_add( Cldfb_ImagBuffer_fx[ch_idx][slot_idx_sfr][band], output_f_imag_fx[ch_idx] );
     718             :                 move32();
     719             :             }
     720             : #endif
     721             :         }
     722             :     }
     723             : 
     724      140160 :     return;
     725             : }
     726             : 
     727             : /*-------------------------------------------------------------------*
     728             :  * computeMixingMatrices()
     729             :  *
     730             :  * compute a mixing matrix using the convariance synthesis approach
     731             :  *-------------------------------------------------------------------*/
     732             : 
     733      113120 : Word16 computeMixingMatrices_fx(
     734             :     const Word16 num_inputs,  /* i  : number of input channels                                                                                      */
     735             :     const Word16 num_outputs, /* i  : number of output channels                                                                                     */
     736             :     const Word32 *Cx,         /* i  : input channel covariance matrix Q(31-Cx_e)                                                                               */
     737             :     Word16 Cx_e,
     738             :     const Word32 *Cy, /* i  : target covariance matrix Q(31-Cy_e)                                                                               */
     739             :     Word16 Cy_e,
     740             :     const Word32 *Q, /* i  : prototype matrix (usually a upmix matrix) Q_fx_e                                                                     */
     741             :     Word16 Q_fx_e,
     742             :     const Word16 energy_compensation_flag, /* i  : flag indicating that the energy compensation should be performed (i.e. no residual mixing matrix will follow) */
     743             :     const Word32 reg_Sx_fx,                /* i  : regularization factor for the input channel singular values                                                   */
     744             :     Word16 reg_Sx_e,
     745             :     const Word32 reg_ghat_fx, /* i  : regularization factor for the normalization matrix  Q(31-reg_ghat_e)                                                         */
     746             :     Word16 reg_ghat_e,
     747             :     Word32 *mixing_matrix_fx, /* o  : resulting mixing matrix       Q(31-mixing_matrix_out_e)                                                                                 */
     748             :     Word16 *mixing_matrix_out_e,
     749             :     Word32 *Cr_fx, /* o  : residual covariance matrix Q(31-Cr_e)                                                                                    */
     750             :     Word16 *Cr_e )
     751             : {
     752             :     Word16 i, j;
     753             :     Word16 out;
     754             :     Word16 nL, nC;
     755      113120 :     Word16 lengthCx = num_inputs;
     756      113120 :     Word16 lengthCy = num_outputs;
     757             :     Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
     758             :     Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     759             :     Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     760             :     Word32 Cx_fx[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
     761             :     Word32 Cy_fx[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
     762             :     Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
     763             :     Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
     764             :     Word16 Cx_fx_e;
     765             :     Word16 Cy_fx_e;
     766             : 
     767             :     Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS];
     768             :     Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
     769             : 
     770             : 
     771             :     Word32 limit_fx;
     772             :     Word16 limit_e;
     773             : 
     774             :     Word32 L_tmp;
     775             :     Word16 tmp_e, tmp, exp;
     776             : 
     777             :     Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     778             :     Word32 Kx_fx[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
     779             : 
     780             :     Word16 Kx_fx_e[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
     781             :     Word16 Ky_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     782             : 
     783             :     Word32 Kx_reg_inv_fx[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
     784             :     Word16 Kx_reg_inv_e[MAX_TRANSPORT_CHANNELS * MAX_TRANSPORT_CHANNELS];
     785             : 
     786             :     Word32 Q_fx[PARAM_MC_MAX_TRANSPORT_CHANS * MAX_CICP_CHANNELS];
     787             :     Word16 Q_e;
     788             : 
     789             :     Word32 Q_Cx_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     790             :     Word16 Q_Cx_e;
     791             : 
     792             :     Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
     793             :     Word16 Cy_hat_diag_e;
     794             :     Word16 Cy_hat_diag_buff_e[MAX_OUTPUT_CHANNELS];
     795             :     Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
     796             :     Word16 G_hat_buff_e[MAX_OUTPUT_CHANNELS];
     797             : 
     798             :     Word16 mat_mult_buffer2_e, mat_mult_buffer3_e;
     799             : 
     800             :     Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
     801             : 
     802             :     Word16 mixing_matrix_e;
     803             : 
     804             :     Word16 Cr_fx_e;
     805             : 
     806             : 
     807             :     Word32 adj_fx[MAX_OUTPUT_CHANNELS];
     808             :     Word16 adj_e[MAX_OUTPUT_CHANNELS];
     809             :     Word32 *adj_fx_p;
     810             :     Word16 adj_fx_e;
     811             : 
     812             :     Word32 *Cr_p_fx, *Cy_tilde_p_fx, *Cy_p_fx;
     813      113120 :     push_wmops( "dirac_cov_mix_mat" );
     814             : 
     815      113120 :     out = EXIT_SUCCESS;
     816      113120 :     move16();
     817             : 
     818      113120 :     set32_fx( svd_s_buffer_fx, 0, MAX_OUTPUT_CHANNELS );
     819     1923040 :     FOR( i = 0; i < MAX_OUTPUT_CHANNELS; i++ )
     820             :     {
     821     1809920 :         set32_fx( svd_in_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
     822     1809920 :         set32_fx( svd_u_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
     823     1809920 :         set32_fx( svd_v_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
     824             :     }
     825             : 
     826             : 
     827      113120 :     Copy32( Q, Q_fx, imult1616( lengthCy, lengthCx ) );
     828      113120 :     Copy32( Cx, Cx_fx, imult1616( lengthCx, lengthCx ) );
     829      113120 :     Copy32( Cy, Cy_fx, imult1616( lengthCy, lengthCy ) );
     830             : 
     831      113120 :     Q_e = Q_fx_e;
     832      113120 :     move16();
     833      113120 :     Cx_fx_e = Cx_e;
     834      113120 :     move16();
     835      113120 :     Cy_fx_e = Cy_e;
     836      113120 :     move16();
     837             : 
     838             : 
     839             :     /*-----------------------------------------------------------------*
     840             :      * Decomposition of Cy
     841             :      *-----------------------------------------------------------------*/
     842             : 
     843             :     /* Processing the SVD */
     844             : 
     845      113120 :     mat2svdMat_fx( Cy_fx, svd_in_buffer_fx, lengthCy, lengthCy, 0 );
     846             : 
     847      113120 :     svd_fx( svd_in_buffer_fx, Cy_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCy, lengthCy );
     848      113120 :     Word16 max_e = -32;
     849             :     /* Computing Ky */
     850      688444 :     FOR( i = 0; i < lengthCy; ++i )
     851             :     {
     852     3533968 :         FOR( j = 0; j < lengthCy; ++j )
     853             :         {
     854     2958644 :             tmp_e = svd_s_buffer_e[j];
     855     2958644 :             move16();
     856     2958644 :             L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
     857     2958644 :             Ky_fx[i + ( j * lengthCy )] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
     858     2958644 :             move32();
     859     2958644 :             Ky_fx_e[i + ( j * lengthCy )] = tmp_e;
     860     2958644 :             move16();
     861     2958644 :             max_e = s_max( max_e, tmp_e );
     862             :         }
     863             :     }
     864     3071764 :     FOR( i = 0; i < lengthCy * lengthCy; ++i )
     865             :     {
     866     2958644 :         Ky_fx[i] = L_shr( Ky_fx[i], sub( max_e, Ky_fx_e[i] ) );
     867     2958644 :         move32();
     868     2958644 :         Ky_fx_e[i] = max_e;
     869     2958644 :         move16();
     870             :     }
     871             : 
     872             :     /*-----------------------------------------------------------------*
     873             :      * Decomposition of Cx
     874             :      *-----------------------------------------------------------------*/
     875             : 
     876             :     /* Processing the SVD */
     877             : 
     878      113120 :     mat2svdMat_fx( Cx_fx, svd_in_buffer_fx, lengthCx, lengthCx, 0 );
     879             : 
     880      113120 :     svd_fx( svd_in_buffer_fx, Cx_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCx, lengthCx );
     881      113120 :     max_e = -32;
     882      339920 :     FOR( i = 0; i < lengthCx; ++i )
     883             :     {
     884      682080 :         FOR( j = 0; j < lengthCx; ++j )
     885             :         {
     886      455280 :             tmp_e = svd_s_buffer_e[j];
     887      455280 :             move16();
     888      455280 :             L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
     889      455280 :             Kx_fx[( i + ( j * lengthCx ) )] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
     890      455280 :             move32();
     891      455280 :             Kx_fx_e[( i + ( j * lengthCx ) )] = tmp_e;
     892      455280 :             move16();
     893      455280 :             max_e = s_max( max_e, tmp_e );
     894             :         }
     895             :     }
     896      568400 :     FOR( i = 0; i < lengthCx * lengthCx; ++i )
     897             :     {
     898      455280 :         Kx_fx[i] = L_shr( Kx_fx[i], sub( max_e, Kx_fx_e[i] ) );
     899      455280 :         move32();
     900      455280 :         Kx_fx_e[i] = max_e;
     901      455280 :         move16();
     902             :     }
     903             : 
     904      339920 :     FOR( i = 0; i < lengthCx; ++i )
     905             :     {
     906      226800 :         tmp_e = svd_s_buffer_e[i];
     907      226800 :         move16();
     908      226800 :         svd_s_buffer_fx[i] = Sqrt32( svd_s_buffer_fx[i], &tmp_e ); // Q(31-tmp_e)
     909      226800 :         move32();
     910      226800 :         svd_s_buffer_e[i] = tmp_e;
     911      226800 :         move16();
     912             :     }
     913             : 
     914             :     /*-----------------------------------------------------------------*
     915             :      * Regularization of Sx
     916             :      *-----------------------------------------------------------------*/
     917             : 
     918      113120 :     limit_fx = svd_s_buffer_fx[0];
     919      113120 :     move32();
     920      113120 :     limit_e = svd_s_buffer_e[0];
     921      113120 :     move16();
     922      226800 :     FOR( i = 1; i < lengthCx; i++ )
     923             :     {
     924      113680 :         IF( BASOP_Util_Cmp_Mant32Exp( svd_s_buffer_fx[i], svd_s_buffer_e[i], limit_fx, limit_e ) > 0 )
     925             :         {
     926           0 :             limit_fx = svd_s_buffer_fx[i];
     927           0 :             move32();
     928           0 :             limit_e = svd_s_buffer_e[i];
     929           0 :             move16();
     930             :         }
     931             :     }
     932             : 
     933      113120 :     L_tmp = Mpy_32_32( limit_fx, reg_Sx_fx );
     934      113120 :     limit_e = add( limit_e, reg_Sx_e );
     935      113120 :     limit_fx = L_add( L_tmp, EPSILON_FX );
     936             : 
     937      339920 :     FOR( i = 0; i < lengthCx; ++i )
     938             :     {
     939      226800 :         IF( BASOP_Util_Cmp_Mant32Exp( svd_s_buffer_fx[i], svd_s_buffer_e[i], limit_fx, limit_e ) < 0 )
     940             :         {
     941       49196 :             svd_s_buffer_fx[i] = limit_fx;
     942       49196 :             move32();
     943       49196 :             svd_s_buffer_e[i] = limit_e;
     944       49196 :             move16();
     945             :         }
     946             :     }
     947             : 
     948             : 
     949      113120 :     limit_fx = 0;
     950      113120 :     move32();
     951      113120 :     limit_e = 0;
     952      113120 :     move16();
     953             : 
     954             : 
     955             :     /*-----------------------------------------------------------------*
     956             :      * regularized Kx-1
     957             :      *-----------------------------------------------------------------*/
     958             : 
     959      339920 :     FOR( i = 0; i < lengthCx; ++i )
     960             :     {
     961             :         Word16 scale, reg_fac_fx;
     962      226800 :         reg_fac_fx = BASOP_Util_Divide3232_Scale( 1, svd_s_buffer_fx[i], &scale );
     963      226800 :         scale = add( scale, sub( Q31, svd_s_buffer_e[i] ) );
     964      682080 :         FOR( j = 0; j < lengthCx; ++j )
     965             :         {
     966      455280 :             Kx_reg_inv_fx[i + j * lengthCx] = Mpy_32_16_1( svd_u_buffer_fx[j][i], reg_fac_fx ); // Q(31-scale)
     967      455280 :             move32();
     968      455280 :             Kx_reg_inv_e[i + j * lengthCx] = scale;
     969      455280 :             move16();
     970             :         }
     971             :     }
     972             : 
     973             :     /*-----------------------------------------------------------------*
     974             :      * normalization matrix G hat
     975             :      *-----------------------------------------------------------------*/
     976             : 
     977             :     /* Computing Q*Cx*Q' */
     978             : 
     979      113120 :     matrix_product_mant_exp_fx( Q_fx, Q_e, lengthCy, lengthCx, 0, Cx_fx, Cx_fx_e, lengthCx, lengthCx, 0, Q_Cx_fx, &Q_Cx_e );
     980             : 
     981      113120 :     matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e );
     982             : 
     983             : 
     984      688444 :     FOR( i = 0; i < lengthCy; ++i )
     985             :     {
     986      575324 :         IF( BASOP_Util_Cmp_Mant32Exp( Cy_hat_diag_fx[i], Cy_hat_diag_e, limit_fx, limit_e ) > 0 )
     987             :         {
     988      289583 :             limit_fx = Cy_hat_diag_fx[i];
     989      289583 :             move32();
     990      289583 :             limit_e = Cy_hat_diag_e;
     991      289583 :             move16();
     992             :         }
     993             :     }
     994      113120 :     L_tmp = Mpy_32_32( limit_fx, reg_ghat_fx ); // limit_e+ reg_ghat_e
     995      113120 :     limit_fx = L_add( L_tmp, EPSILON_FX );
     996      113120 :     limit_e = add( limit_e, reg_ghat_e );
     997             : 
     998      688444 :     FOR( i = 0; i < lengthCy; ++i )
     999             :     {
    1000      575324 :         Cy_hat_diag_buff_e[i] = Cy_hat_diag_e;
    1001      575324 :         move16();
    1002      575324 :         IF( BASOP_Util_Cmp_Mant32Exp( limit_fx, limit_e, Cy_hat_diag_fx[i], Cy_hat_diag_buff_e[i] ) > 0 ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
    1003             :         {
    1004         210 :             Cy_hat_diag_fx[i] = limit_fx;
    1005         210 :             move32();
    1006         210 :             Cy_hat_diag_buff_e[i] = limit_e;
    1007         210 :             move16();
    1008             :         }
    1009             : 
    1010      575324 :         tmp = BASOP_Util_Divide3232_Scale( Cy_fx[( i + ( i * lengthCy ) )], Cy_hat_diag_fx[i], &exp );
    1011      575324 :         exp = add( exp, sub( Cy_fx_e, Cy_hat_diag_buff_e[i] ) );
    1012      575324 :         L_tmp = Sqrt32( L_deposit_h( tmp ), &exp );
    1013      575324 :         G_hat_fx[i] = L_tmp;
    1014      575324 :         move32();
    1015      575324 :         G_hat_buff_e[i] = exp;
    1016      575324 :         move16();
    1017             :     }
    1018             : 
    1019             : 
    1020             :     /*-----------------------------------------------------------------*
    1021             :      * Formulate optimal P
    1022             :      *-----------------------------------------------------------------*/
    1023             : 
    1024             :     /* Computing the input matrix Kx'*Q'*G_hat'*Ky */
    1025             : 
    1026             :     Word16 mat_mult_buffer1_fx_e;
    1027             : 
    1028             :     Word16 mat_mult_buffer2_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1029             : 
    1030      113120 :     matrix_product_mant_exp_fx( Kx_fx, Kx_fx_e[0], lengthCx, lengthCx, 1, Q_fx, Q_e, lengthCy, lengthCx, 1, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
    1031             : 
    1032      113120 :     matrix_diag_product_fx_2( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e, lengthCx, lengthCy, 0, G_hat_fx, G_hat_buff_e, lengthCy, mat_mult_buffer2_fx, mat_mult_buffer2_fx_e );
    1033             : 
    1034      113120 :     matrix_product_mant_exp_fx( mat_mult_buffer2_fx, mat_mult_buffer2_fx_e[0], lengthCx, lengthCy, 0, Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
    1035             : 
    1036      113120 :     IF( LT_16( lengthCx, lengthCy ) )
    1037             :     {
    1038      113120 :         mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 1 );
    1039      113120 :         nL = lengthCy;
    1040      113120 :         move16();
    1041      113120 :         nC = lengthCx;
    1042      113120 :         move16();
    1043      113120 :         svd_fx( svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, svd_s_buffer_e, nL, nC );
    1044             :     }
    1045             :     ELSE
    1046             :     {
    1047           0 :         mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 0 );
    1048           0 :         nL = lengthCx;
    1049           0 :         move16();
    1050           0 :         nC = lengthCy;
    1051           0 :         move16();
    1052           0 :         svd_fx( svd_in_buffer_fx, mat_mult_buffer1_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, nL, nC );
    1053             :     }
    1054             : 
    1055             :     /* Actually Processing P */
    1056             : 
    1057             :     /* can be skipped: lambda is always column-truncated identity matrix, so this operation just
    1058             :        truncates V to num_input_channel columns */
    1059             : 
    1060      113120 :     svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, lengthCy, lengthCx );
    1061      113120 :     svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
    1062             : 
    1063      113120 :     mat_mult_buffer1_fx_e = 0;
    1064      113120 :     move16();
    1065      113120 :     mat_mult_buffer2_e = 0;
    1066      113120 :     move16();
    1067             : 
    1068      113120 :     matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e, lengthCy, lengthCx, 0,
    1069             :                                 mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1,
    1070             :                                 mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    1071             : 
    1072             :     /************************ Formulate M **********************/
    1073             : 
    1074      113120 :     matrix_product_mant_exp_fx( Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, lengthCy, lengthCx, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_fx_e );
    1075             : 
    1076             :     Word16 mixing_matrix_fx_e[MAX_CICP_CHANNELS * PARAM_MC_MAX_TRANSPORT_CHANS];
    1077             : 
    1078             :     Word16 mat_mult_buffer1_fx_e1[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1079      113120 :     set16_fx( mat_mult_buffer1_fx_e1, mat_mult_buffer1_fx_e, MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS );
    1080             : 
    1081      113120 :     matrix_product_mant_exp( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1, lengthCy, lengthCx, 0, Kx_reg_inv_fx, Kx_reg_inv_e, lengthCx, lengthCx, 0, mixing_matrix_fx, mixing_matrix_fx_e );
    1082             : 
    1083             :     /*-----------------------------------------------------------------*
    1084             :      * Formulate Cr
    1085             :      *-----------------------------------------------------------------*/
    1086             : 
    1087             :     /* Compute Cy_tilde = M*Cx*M' */
    1088             : 
    1089             : 
    1090             :     Word16 Cx_e_arr[PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS];
    1091      113120 :     set16_fx( Cx_e_arr, Cx_fx_e, PARAM_MC_MAX_TRANSPORT_CHANS * PARAM_MC_MAX_TRANSPORT_CHANS );
    1092      113120 :     matrix_product_mant_exp( mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 0, Cx_fx, Cx_e_arr, lengthCx, lengthCx, 0, mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1 );
    1093             : 
    1094      113120 :     matrix_product_mant_exp( mat_mult_buffer1_fx, mat_mult_buffer1_fx_e1, lengthCy, lengthCx, 0, mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 1, mat_mult_buffer2_fx, mat_mult_buffer2_fx_e );
    1095             : 
    1096      113120 :     exp = mixing_matrix_fx_e[0];
    1097      113120 :     move16();
    1098     1156848 :     FOR( i = 1; i < lengthCy * lengthCx; i++ )
    1099             :     {
    1100     1043728 :         if ( LT_16( exp, mixing_matrix_fx_e[i] ) )
    1101             :         {
    1102       81545 :             exp = mixing_matrix_fx_e[i];
    1103       81545 :             move16();
    1104             :         }
    1105             :     }
    1106             : 
    1107     1269968 :     FOR( i = 0; i < lengthCy * lengthCx; i++ )
    1108             :     {
    1109     1156848 :         mixing_matrix_fx[i] = L_shr( mixing_matrix_fx[i], sub( exp, mixing_matrix_fx_e[i] ) ); // Q(31-exp)
    1110     1156848 :         move32();
    1111             :     }
    1112             : 
    1113      113120 :     mixing_matrix_e = exp;
    1114      113120 :     move16();
    1115             : 
    1116      113120 :     Cr_p_fx = Cr_fx;
    1117      113120 :     Cy_p_fx = Cy_fx;
    1118      113120 :     Cy_tilde_p_fx = mat_mult_buffer2_fx;
    1119             :     Word16 Cr_e_arr[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
    1120      688444 :     FOR( i = 0; i < lengthCy; ++i )
    1121             :     {
    1122     3533968 :         FOR( j = 0; j < lengthCy; ++j )
    1123             :         {
    1124             : 
    1125     2958644 :             *( Cr_p_fx ) = BASOP_Util_Add_Mant32Exp( *( Cy_p_fx ), Cy_fx_e, L_negate( *( Cy_tilde_p_fx ) ), mat_mult_buffer2_fx_e[i * lengthCy + j], &Cr_e_arr[i * lengthCy + j] );
    1126     2958644 :             move32();
    1127     2958644 :             Cr_p_fx++;
    1128     2958644 :             Cy_p_fx++;
    1129     2958644 :             Cy_tilde_p_fx++;
    1130             :         }
    1131             : 
    1132             :         /* Avoid Meaningless negative main diagonal elements */
    1133      575324 :         IF( Cr_fx[i + ( i * lengthCy )] < 0 )
    1134             :         {
    1135       62266 :             Cr_fx[i + ( i * lengthCy )] = 0;
    1136       62266 :             move32();
    1137       62266 :             Cr_e_arr[i + ( i * lengthCy )] = 0;
    1138       62266 :             move16();
    1139             :         }
    1140             :     }
    1141             : 
    1142      113120 :     exp = Cr_e_arr[0];
    1143      113120 :     move16();
    1144     2958644 :     FOR( i = 1; i < lengthCy * lengthCy; i++ )
    1145             :     {
    1146     2845524 :         if ( LT_16( exp, Cr_e_arr[i] ) )
    1147             :         {
    1148      103413 :             exp = Cr_e_arr[i];
    1149      103413 :             move16();
    1150             :         }
    1151             :     }
    1152             : 
    1153     3071764 :     FOR( i = 0; i < lengthCy * lengthCy; i++ )
    1154             :     {
    1155     2958644 :         Cr_fx[i] = L_shr( Cr_fx[i], sub( exp, Cr_e_arr[i] ) ); // Q(31-exp)
    1156     2958644 :         move32();
    1157             :     }
    1158             : 
    1159      113120 :     Cr_fx_e = exp;
    1160      113120 :     move16();
    1161             : 
    1162      113120 :     exp = mat_mult_buffer2_fx_e[0];
    1163      113120 :     move16();
    1164     2958644 :     FOR( i = 1; i < lengthCy * lengthCy; i++ )
    1165             :     {
    1166     2845524 :         if ( LT_16( exp, mat_mult_buffer2_fx_e[i] ) )
    1167             :         {
    1168      156985 :             exp = mat_mult_buffer2_fx_e[i];
    1169      156985 :             move16();
    1170             :         }
    1171             :     }
    1172             : 
    1173     3071764 :     FOR( i = 0; i < lengthCy * lengthCy; i++ )
    1174             :     {
    1175     2958644 :         mat_mult_buffer2_fx[i] = L_shr( mat_mult_buffer2_fx[i], sub( exp, mat_mult_buffer2_fx_e[i] ) ); // Q(31-exp)
    1176     2958644 :         move32();
    1177             :     }
    1178             : 
    1179      113120 :     mat_mult_buffer2_e = exp;
    1180      113120 :     move16();
    1181             : 
    1182             :     /*-----------------------------------------------------------------*
    1183             :      * Energy Compensation
    1184             :      *-----------------------------------------------------------------*/
    1185             : 
    1186      113120 :     IF( EQ_16( energy_compensation_flag, 1 ) )
    1187             :     {
    1188       22220 :         adj_fx_p = svd_s_buffer_fx;
    1189       22220 :         Cy_tilde_p_fx = mat_mult_buffer2_fx;
    1190             : 
    1191      135100 :         FOR( i = 0; i < lengthCy; ++i )
    1192             :         {
    1193             :             /* Avoid correction for very small energies,
    1194             :                main diagonal elements of Cy_tilde_p may be negative */
    1195      112880 :             IF( Cy_tilde_p_fx[i + ( i * lengthCy )] < 0 )
    1196             :             {
    1197           0 :                 adj_fx_p[i] = 1073741824; // 1.0f in Q30
    1198           0 :                 move32();
    1199           0 :                 adj_e[i] = 1;
    1200           0 :                 move16();
    1201             :             }
    1202             :             ELSE
    1203             :             {
    1204      112880 :                 tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + ( i * lengthCy )], L_add( Cy_tilde_p_fx[i + ( i * lengthCy )], EPSILON_FX ), &exp );
    1205      112880 :                 exp = add( exp, sub( Cy_fx_e, mat_mult_buffer2_e ) );
    1206      112880 :                 L_tmp = L_deposit_h( tmp );
    1207      112880 :                 L_tmp = Sqrt32( L_tmp, &exp );
    1208      112880 :                 adj_fx_p[i] = L_tmp;
    1209      112880 :                 move32();
    1210      112880 :                 adj_e[i] = exp;
    1211      112880 :                 move16();
    1212             :             }
    1213             : 
    1214      112880 :             Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, adj_e[i] ) );
    1215      112880 :             IF( GT_32( adj_fx_p[i], temp ) )
    1216             :             {
    1217           4 :                 adj_fx_p[i] = 1073741824; // 1.0f in Q30
    1218           4 :                 move32();
    1219           4 :                 adj_e[i] = 3;
    1220           4 :                 move16();
    1221             :             }
    1222             :         }
    1223             : 
    1224       22220 :         exp = adj_e[0];
    1225       22220 :         move16();
    1226      112880 :         FOR( i = 1; i < lengthCy; i++ )
    1227             :         {
    1228       90660 :             if ( LT_16( exp, adj_e[i] ) )
    1229             :             {
    1230        1050 :                 exp = adj_e[i];
    1231        1050 :                 move16();
    1232             :             }
    1233             :         }
    1234             : 
    1235      135100 :         FOR( i = 0; i < lengthCy; i++ )
    1236             :         {
    1237      112880 :             adj_fx[i] = L_shr( adj_fx_p[i], sub( exp, adj_e[i] ) ); // Q(31-exp)
    1238      112880 :             move32();
    1239             :         }
    1240       22220 :         adj_fx_e = exp;
    1241       22220 :         move16();
    1242             : 
    1243       22220 :         diag_matrix_product_fx( adj_fx, adj_fx_e, lengthCy, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    1244             : 
    1245       22220 :         Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCx, lengthCy ) ); // Q(31-mat_mult_buffer3_e)
    1246       22220 :         mixing_matrix_e = mat_mult_buffer3_e;
    1247       22220 :         move16();
    1248             :     }
    1249             : 
    1250      113120 :     *mixing_matrix_out_e = mixing_matrix_e;
    1251      113120 :     move16();
    1252      113120 :     *Cr_e = Cr_fx_e;
    1253      113120 :     move16();
    1254      113120 :     pop_wmops();
    1255             : 
    1256      113120 :     return out;
    1257             : }
    1258             : 
    1259             : /*-------------------------------------------------------------------*
    1260             :  * computeMixingMatricesResidual()
    1261             :  *
    1262             :  * compute a residual mixing matrix using the covariance synthesis approach
    1263             :  *-------------------------------------------------------------------*/
    1264             : 
    1265       90900 : Word16 computeMixingMatricesResidual_fx(
    1266             :     const Word32 num_outputs, /* i  : number of output channels                                           */
    1267             :     const Word32 *Cx_fx,      /* i  : vector containing the diagonal diffuse prototype covariance Q(31-Cx_e)         */
    1268             :     const Word16 Cx_e,
    1269             :     const Word32 *Cy_fx, /* i  : matrix containing the missing cov (Cr from computeMixingMatrices()) Q(31-Cy_fx_e) */
    1270             :     const Word16 Cy_fx_e,
    1271             :     const Word32 reg_Sx_fx, /* i  : regularization factor for the input channel singular values Q(31-reg_Sx_e)         */
    1272             :     const Word16 reg_Sx_e,
    1273             :     const Word32 reg_ghat_fx, /* i  : regularization factor for the normalization matrix Q(31-reg_ghat_e)                  */
    1274             :     const Word16 reg_ghat_e,
    1275             :     Word32 *mixing_matrix_fx, /* o  : resulting residual mixing matrix Q(31-mixing_matrix_ret_e)                                    */
    1276             :     Word16 *mixing_matrix_ret_e )
    1277             : {
    1278             :     Word16 i, j;
    1279             :     Word16 out, lengthCx, lengthCy;
    1280       90900 :     out = EXIT_SUCCESS;
    1281       90900 :     move16();
    1282       90900 :     lengthCx = extract_l( num_outputs );
    1283       90900 :     lengthCy = extract_l( num_outputs );
    1284             : 
    1285             :     Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
    1286             :     Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1287             :     Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
    1288             :     Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS]; // Q31 out
    1289             : 
    1290             :     Word16 mat_mult_buffer1_buff_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1291             : 
    1292       90900 :     push_wmops( "dirac_cov_mix_mat_r" );
    1293             : 
    1294             :     Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1295             :     Word32 adj_fx[MAX_OUTPUT_CHANNELS];
    1296             :     Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1297       90900 :     Word16 mixing_matrix_e = 0, mat_mult_buffer1_e, adj_e, mat_mult_buffer3_e, mat_mult_buffer2_e;
    1298       90900 :     move16();
    1299             : 
    1300       90900 :     Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS] = { 0 };
    1301             :     Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
    1302             :     Word32 L_tmp;
    1303             :     Word16 tmp_e;
    1304             :     Word16 tmp, scale;
    1305             :     Word32 div_tmp;
    1306             :     Word16 exp;
    1307             : 
    1308             :     Word32 Kx_fx[MAX_OUTPUT_CHANNELS];
    1309             :     Word16 Ky_fx_e[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1310             :     Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1311             :     Word16 Kx_fx_e[MAX_OUTPUT_CHANNELS];
    1312             : 
    1313             :     Word32 limit_fx;
    1314             :     Word16 limit_e;
    1315             : 
    1316             :     Word32 Kx_reg_inv_fx[MAX_OUTPUT_CHANNELS];
    1317             :     Word16 Kx_reg_inv_e[MAX_OUTPUT_CHANNELS];
    1318             : 
    1319             :     Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
    1320             :     Word16 Cy_hat_diag_e;
    1321             : 
    1322             :     Word16 Cy_hat_diag_fx_e[MAX_OUTPUT_CHANNELS];
    1323             :     Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
    1324             :     Word16 G_hat_e[MAX_OUTPUT_CHANNELS];
    1325             : 
    1326             :     Word32 *adj_fx_p;
    1327             :     Word16 adj_buff_e[MAX_OUTPUT_CHANNELS];
    1328             : 
    1329             :     Word32 Cy_tilde_fx[MAX_OUTPUT_CHANNELS];
    1330             :     Word16 Cy_tilde_e;
    1331             : 
    1332             :     /*-----------------------------------------------------------------*
    1333             :      * Decomposition of Cy
    1334             :      *-----------------------------------------------------------------*/
    1335             : 
    1336             :     /* Processing the SVD */
    1337             : 
    1338             :     /* linear array to svd buffer */
    1339       90900 :     mat2svdMat_fx( Cy_fx, svd_in_buffer_fx, lengthCy, lengthCy, 0 );
    1340             : 
    1341       90900 :     svd_fx( svd_in_buffer_fx, Cy_fx_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCy, lengthCy );
    1342             : 
    1343             :     /* Computing Ky */
    1344       90900 :     Word16 max_e = -32;
    1345      553344 :     FOR( i = 0; i < lengthCy; ++i )
    1346             :     {
    1347     2841348 :         FOR( j = 0; j < lengthCy; ++j )
    1348             :         {
    1349     2378904 :             tmp_e = svd_s_buffer_e[j];
    1350     2378904 :             move16();
    1351     2378904 :             L_tmp = Sqrt32( svd_s_buffer_fx[j], &tmp_e );
    1352     2378904 :             Ky_fx[i + j * lengthCy] = Mpy_32_32( svd_u_buffer_fx[i][j], L_tmp ); // Q(31-tmp_e)
    1353     2378904 :             move32();
    1354     2378904 :             Ky_fx_e[i + j * lengthCy] = tmp_e;
    1355     2378904 :             move16();
    1356     2378904 :             max_e = s_max( max_e, tmp_e );
    1357             :         }
    1358             :     }
    1359             : 
    1360     2469804 :     FOR( i = 0; i < lengthCy * lengthCy; ++i )
    1361             :     {
    1362     2378904 :         Ky_fx[i] = L_shr( Ky_fx[i], sub( max_e, Ky_fx_e[i] ) );
    1363     2378904 :         move32();
    1364     2378904 :         Ky_fx_e[i] = max_e;
    1365     2378904 :         move16();
    1366             :     }
    1367             : 
    1368             :     /*-----------------------------------------------------------------*
    1369             :      * Decomposition of Cx
    1370             :      *-----------------------------------------------------------------*/
    1371             : 
    1372             :     /* Processing the SVD of Cx*/
    1373             :     /* Cx is a diagonal matrix, so SVD would lead to the sorted diagonal as S and u
    1374             :      *  would be just indicating the sorting index, so go straight to Kx as the
    1375             :      * square root of the diagonal of Cx */
    1376             : 
    1377             :     /* Computing Kx */
    1378       90900 :     max_e = -32;
    1379      553344 :     FOR( i = 0; i < lengthCx; ++i )
    1380             :     {
    1381      462444 :         exp = Cx_e;
    1382      462444 :         move16();
    1383      462444 :         Kx_fx[i] = Sqrt32( Cx_fx[i], &exp );
    1384      462444 :         move32();
    1385      462444 :         Kx_fx_e[i] = exp;
    1386      462444 :         move16();
    1387      462444 :         max_e = s_max( max_e, exp );
    1388             :     }
    1389             : 
    1390      553344 :     FOR( i = 0; i < lengthCx; ++i )
    1391             :     {
    1392      462444 :         Kx_fx[i] = L_shr( Kx_fx[i], sub( max_e, Kx_fx_e[i] ) );
    1393      462444 :         move32();
    1394      462444 :         Kx_fx_e[i] = max_e;
    1395      462444 :         move16();
    1396             :     }
    1397             : 
    1398             :     /*-----------------------------------------------------------------*
    1399             :      * Regularization of Sx
    1400             :      *-----------------------------------------------------------------*/
    1401             : 
    1402       90900 :     limit_fx = Kx_fx[0];
    1403       90900 :     move32();
    1404             : 
    1405      462444 :     FOR( i = 1; i < lengthCx; i++ )
    1406             :     {
    1407      371544 :         IF( GT_32( Kx_fx[i], limit_fx ) )
    1408             :         {
    1409      141665 :             limit_fx = Kx_fx[i];
    1410      141665 :             move32();
    1411             :         }
    1412             :     }
    1413             : 
    1414       90900 :     L_tmp = Mpy_32_32( limit_fx, reg_Sx_fx ); // limit_e + reg_Sx_e
    1415       90900 :     L_tmp = L_add( L_tmp, EPSILLON_FX );
    1416       90900 :     limit_fx = L_tmp;
    1417       90900 :     move16();
    1418       90900 :     limit_e = add( Kx_fx_e[0], reg_Sx_e );
    1419             : 
    1420      553344 :     FOR( i = 0; i < lengthCx; ++i )
    1421             :     {
    1422      462444 :         IF( BASOP_Util_Cmp_Mant32Exp( Kx_fx[i], Kx_fx_e[i], limit_fx, limit_e ) > 0 )
    1423             :         {
    1424      461375 :             div_tmp = Kx_fx[i];
    1425      461375 :             move32();
    1426      461375 :             exp = Kx_fx_e[i];
    1427      461375 :             move16();
    1428             :         }
    1429             :         ELSE
    1430             :         {
    1431        1069 :             div_tmp = limit_fx;
    1432        1069 :             move32();
    1433        1069 :             exp = limit_e;
    1434        1069 :             move16();
    1435             :         }
    1436      462444 :         tmp = BASOP_Util_Divide3232_Scale( 1073741824, div_tmp, &scale ); // 1073741824 -> 1.0f in Q30
    1437      462444 :         scale = add( scale, sub( Q1, exp ) );
    1438             : 
    1439      462444 :         Kx_reg_inv_fx[i] = L_deposit_h( tmp );
    1440      462444 :         move32();
    1441      462444 :         Kx_reg_inv_e[i] = scale;
    1442      462444 :         move16();
    1443             :     }
    1444             : 
    1445       90900 :     limit_fx = 0;
    1446       90900 :     move32();
    1447       90900 :     limit_e = 0;
    1448       90900 :     move16();
    1449             : 
    1450             :     /*-----------------------------------------------------------------*
    1451             :      * regularized Kx-1
    1452             :      *-----------------------------------------------------------------*/
    1453             : 
    1454             :     /*-----------------------------------------------------------------*
    1455             :      * normalization matrix G hat
    1456             :      *-----------------------------------------------------------------*/
    1457             : 
    1458             :     /* Computing Cy_hat_diag */
    1459       90900 :     Copy32( Cx_fx, Cy_hat_diag_fx, extract_l( num_outputs ) ); // Q(31-Cx_e)
    1460       90900 :     Cy_hat_diag_e = Cx_e;
    1461       90900 :     move16();
    1462             : 
    1463      553344 :     FOR( i = 0; i < lengthCy; ++i )
    1464             :     {
    1465      462444 :         IF( BASOP_Util_Cmp_Mant32Exp( Cy_hat_diag_fx[i], Cy_hat_diag_e, limit_fx, limit_e ) > 0 )
    1466             :         {
    1467      232531 :             limit_fx = Cy_hat_diag_fx[i];
    1468      232531 :             move32();
    1469      232531 :             limit_e = Cy_hat_diag_e;
    1470      232531 :             move16();
    1471             :         }
    1472             :     }
    1473             : 
    1474       90900 :     L_tmp = Mpy_32_32( limit_fx, reg_ghat_fx ); // Q(limit_e+reg_ghat_e)
    1475       90900 :     limit_fx = L_add( L_tmp, EPSILON_FX );
    1476       90900 :     limit_e = add( limit_e, reg_ghat_e );
    1477             : 
    1478             :     /* Computing G_hat */
    1479      553344 :     FOR( i = 0; i < lengthCy; ++i )
    1480             :     {
    1481      462444 :         Cy_hat_diag_fx_e[i] = Cy_hat_diag_e;
    1482      462444 :         move16();
    1483      462444 :         IF( BASOP_Util_Cmp_Mant32Exp( limit_fx, limit_e, Cy_hat_diag_fx[i], Cy_hat_diag_e ) > 0 ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
    1484             :         {
    1485         210 :             Cy_hat_diag_fx[i] = limit_fx;
    1486         210 :             move32();
    1487         210 :             Cy_hat_diag_fx_e[i] = limit_e;
    1488         210 :             move16();
    1489             :         }
    1490      462444 :         tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + i * lengthCy], Cy_hat_diag_fx[i], &scale );
    1491      462444 :         scale = add( scale, sub( Cy_fx_e, Cy_hat_diag_fx_e[i] ) );
    1492      462444 :         L_tmp = Sqrt32( L_deposit_h( tmp ), &scale );
    1493             : 
    1494      462444 :         G_hat_fx[i] = L_tmp;
    1495      462444 :         move32();
    1496      462444 :         G_hat_e[i] = scale;
    1497      462444 :         move16();
    1498             :     }
    1499             : 
    1500             :     /*-----------------------------------------------------------------*
    1501             :      * Formulate optimal P
    1502             :      *-----------------------------------------------------------------*/
    1503             : 
    1504      553344 :     FOR( i = 0; i < num_outputs; i++ )
    1505             :     {
    1506      462444 :         L_tmp = Mpy_32_32( Kx_fx[i], G_hat_fx[i] ); // Q(31-(Kx_fx_e+G_hag_e))
    1507      462444 :         Kx_fx[i] = L_tmp;
    1508      462444 :         move32();
    1509      462444 :         Kx_fx_e[i] = add( Kx_fx_e[i], G_hat_e[i] );
    1510      462444 :         move16();
    1511             :     }
    1512             : 
    1513      553344 :     FOR( i = 0; i < num_outputs; i++ )
    1514             :     {
    1515             :         Word32 fac_fx;
    1516      462444 :         fac_fx = Kx_fx[i];
    1517      462444 :         move32();
    1518             : 
    1519     2841348 :         FOR( j = 0; j < num_outputs; j++ )
    1520             :         {
    1521     2378904 :             L_tmp = Mpy_32_32( Ky_fx[i + j * num_outputs], fac_fx ); // Q(31-(Ky_fx_e+Kx_fx_e))
    1522     2378904 :             mat_mult_buffer1_fx[i + j * num_outputs] = L_tmp;
    1523     2378904 :             move32();
    1524     2378904 :             mat_mult_buffer1_buff_e[i + j * num_outputs] = extract_l( L_add( Ky_fx_e[i + j * num_outputs], Kx_fx_e[i] ) );
    1525     2378904 :             move16();
    1526             :         }
    1527             :     }
    1528             : 
    1529       90900 :     mat_mult_buffer1_e = mat_mult_buffer1_buff_e[0];
    1530       90900 :     move16();
    1531     2378904 :     FOR( i = 1; i < num_outputs * num_outputs; i++ )
    1532             :     {
    1533     2288004 :         if ( LT_16( mat_mult_buffer1_e, mat_mult_buffer1_buff_e[i] ) )
    1534             :         {
    1535       24305 :             mat_mult_buffer1_e = mat_mult_buffer1_buff_e[i];
    1536       24305 :             move16();
    1537             :         }
    1538             :     }
    1539             : 
    1540     2469804 :     FOR( i = 0; i < num_outputs * num_outputs; i++ )
    1541             :     {
    1542     2378904 :         mat_mult_buffer1_fx[i] = L_shr( mat_mult_buffer1_fx[i], sub( mat_mult_buffer1_e, mat_mult_buffer1_buff_e[i] ) ); // Q(31-mat_mult_buffer1_e)
    1543     2378904 :         move32();
    1544             :     }
    1545             : 
    1546       90900 :     mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, lengthCy, 0 );
    1547             : 
    1548       90900 :     svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, lengthCx, lengthCy );
    1549             : 
    1550             :     /* Actually Processing P */
    1551             : 
    1552       90900 :     svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, lengthCy, lengthCx );
    1553       90900 :     svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
    1554       90900 :     mat_mult_buffer1_e = 0;
    1555       90900 :     move16();
    1556       90900 :     mat_mult_buffer2_e = 0;
    1557       90900 :     move16();
    1558             : 
    1559       90900 :     matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0,
    1560             :                                 mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1,
    1561             :                                 mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    1562             : 
    1563             :     /*-----------------------------------------------------------------*
    1564             :      * Formulate M
    1565             :      *-----------------------------------------------------------------*/
    1566             : 
    1567             : 
    1568       90900 :     matrix_product_mant_exp_fx( Ky_fx, Ky_fx_e[0], lengthCy, lengthCy, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, lengthCy, lengthCx, 0, mat_mult_buffer1_fx, mat_mult_buffer1_buff_e );
    1569       90900 :     set16_fx( mat_mult_buffer1_buff_e, mat_mult_buffer1_buff_e[0], MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS );
    1570             : 
    1571             :     Word16 mixing_matrix_fx_e[MAX_CICP_CHANNELS * MAX_CICP_CHANNELS];
    1572             : 
    1573      553344 :     FOR( i = 0; i < num_outputs; i++ )
    1574             :     {
    1575             :         Word32 fac_fx;
    1576      462444 :         fac_fx = Kx_reg_inv_fx[i];
    1577      462444 :         move32();
    1578             : 
    1579     2841348 :         FOR( j = 0; j < num_outputs; j++ )
    1580             :         {
    1581     2378904 :             L_tmp = Mpy_32_32( mat_mult_buffer1_fx[j + i * num_outputs], fac_fx ); // Q(31-mat_mult_buffer1_e+Kx_reg_inv_e)
    1582     2378904 :             mixing_matrix_fx[j + i * num_outputs] = L_tmp;
    1583     2378904 :             move32();
    1584     2378904 :             mixing_matrix_fx_e[j + i * num_outputs] = add( mat_mult_buffer1_buff_e[j + i * num_outputs], Kx_reg_inv_e[i] );
    1585     2378904 :             move16();
    1586             :         }
    1587             :     }
    1588             : 
    1589             :     /*-----------------------------------------------------------------*
    1590             :      * Formulate Cr
    1591             :      *-----------------------------------------------------------------*/
    1592             : 
    1593             :     /* Compute Cy_tilde = M*Cx*M' */
    1594             : 
    1595             :     Word16 Cx_e_arr[MAX_CICP_CHANNELS];
    1596       90900 :     set16_fx( Cx_e_arr, Cx_e, MAX_CICP_CHANNELS );
    1597             : 
    1598       90900 :     matrix_diag_product_fx_1( mixing_matrix_fx, mixing_matrix_fx_e, lengthCy, lengthCx, 0, Cx_fx, Cx_e_arr, lengthCx, mat_mult_buffer1_fx, mat_mult_buffer1_buff_e );
    1599             : 
    1600       90900 :     exp = mixing_matrix_fx_e[0];
    1601       90900 :     move16();
    1602     2378904 :     FOR( i = 1; i < num_outputs * num_outputs; i++ )
    1603             :     {
    1604     2288004 :         if ( LT_16( exp, mixing_matrix_fx_e[i] ) )
    1605             :         {
    1606        9848 :             exp = mixing_matrix_fx_e[i];
    1607        9848 :             move16();
    1608             :         }
    1609             :     }
    1610             : 
    1611     2469804 :     FOR( i = 0; i < num_outputs * num_outputs; i++ )
    1612             :     {
    1613     2378904 :         mixing_matrix_fx[i] = L_shr( mixing_matrix_fx[i], sub( exp, mixing_matrix_fx_e[i] ) ); // Q(31-exp)
    1614     2378904 :         move32();
    1615             :     }
    1616       90900 :     mixing_matrix_e = exp;
    1617       90900 :     move16();
    1618             : 
    1619       90900 :     exp = mat_mult_buffer1_buff_e[0];
    1620       90900 :     move16();
    1621     2378904 :     FOR( i = 1; i < num_outputs * num_outputs; i++ )
    1622             :     {
    1623     2288004 :         if ( LT_16( exp, mat_mult_buffer1_buff_e[i] ) )
    1624             :         {
    1625        9848 :             exp = mat_mult_buffer1_buff_e[i];
    1626        9848 :             move16();
    1627             :         }
    1628             :     }
    1629             : 
    1630     2469804 :     FOR( i = 0; i < num_outputs * num_outputs; i++ )
    1631             :     {
    1632     2378904 :         mat_mult_buffer1_fx[i] = L_shr( mat_mult_buffer1_fx[i], sub( exp, mat_mult_buffer1_buff_e[i] ) ); // Q(31-exp)
    1633     2378904 :         move32();
    1634             :     }
    1635       90900 :     mat_mult_buffer1_e = exp;
    1636       90900 :     move16();
    1637             : 
    1638       90900 :     matrix_product_diag_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 1, Cy_tilde_fx, &Cy_tilde_e );
    1639             : 
    1640             :     /*-----------------------------------------------------------------*
    1641             :      * Energy Compensation
    1642             :      *-----------------------------------------------------------------*/
    1643             : 
    1644       90900 :     adj_fx_p = svd_s_buffer_fx;
    1645             : 
    1646      553344 :     FOR( i = 0; i < lengthCy; ++i )
    1647             :     {
    1648      462444 :         tmp = BASOP_Util_Divide3232_Scale( Cy_fx[i + ( lengthCy * i )], L_add( Cy_tilde_fx[i], EPSILON_FX ), &scale );
    1649      462444 :         scale = add( scale, sub( Cy_fx_e, Cy_tilde_e ) );
    1650      462444 :         adj_fx_p[i] = Sqrt32( L_deposit_h( tmp ), &scale );
    1651      462444 :         move32();
    1652      462444 :         adj_buff_e[i] = scale;
    1653      462444 :         move16();
    1654      462444 :         Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, scale ) );
    1655      462444 :         IF( GT_32( adj_fx_p[i], temp ) ) // 1073741824 -> 1.0f in Q30
    1656             :         {
    1657           1 :             adj_fx_p[i] = 1073741824; // 1.0f in Q30
    1658           1 :             move32();
    1659           1 :             adj_buff_e[i] = 3;
    1660           1 :             move16();
    1661             :         }
    1662             :     }
    1663             : 
    1664       90900 :     adj_e = adj_buff_e[0];
    1665       90900 :     move16();
    1666             : 
    1667      462444 :     FOR( i = 1; i < lengthCy; i++ )
    1668             :     {
    1669      371544 :         if ( LT_16( adj_e, adj_buff_e[i] ) )
    1670             :         {
    1671       36546 :             adj_e = adj_buff_e[i];
    1672       36546 :             move16();
    1673             :         }
    1674             :     }
    1675             : 
    1676             : 
    1677      553344 :     FOR( i = 0; i < lengthCy; i++ )
    1678             :     {
    1679      462444 :         adj_fx[i] = L_shr( adj_fx_p[i], sub( adj_e, adj_buff_e[i] ) ); // Q(31-adj_e)
    1680      462444 :         move32();
    1681             :     }
    1682             : 
    1683       90900 :     diag_matrix_product_fx( adj_fx, adj_e, lengthCy, mixing_matrix_fx, mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    1684       90900 :     Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCy, lengthCx ) );
    1685       90900 :     *mixing_matrix_ret_e = mat_mult_buffer3_e;
    1686       90900 :     move16();
    1687             : 
    1688       90900 :     pop_wmops();
    1689             : 
    1690       90900 :     return out;
    1691             : }
    1692             : 
    1693             : 
    1694             : /*-------------------------------------------------------------------*
    1695             :  * computeMixingMatricesISM()
    1696             :  *
    1697             :  *
    1698             :  *-------------------------------------------------------------------*/
    1699      315180 : Word16 computeMixingMatricesISM_fx(
    1700             :     const Word16 num_inputs,
    1701             :     const Word16 num_responses,
    1702             :     const Word16 num_outputs,
    1703             :     const Word32 *responses_fx, /*Q(31-responses_e) */
    1704             :     const Word16 responses_e,
    1705             :     const Word32 *ener_fx, /*Q(31-ener_e) */
    1706             :     const Word16 ener_e,
    1707             :     const Word32 *Cx_diag_fx, /*Q(31-diag_e) */
    1708             :     const Word16 Cx_diag_e,
    1709             :     const Word32 *Cy_diag_fx, /*Q(31-diag_e) */
    1710             :     const Word16 Cy_diag_e,
    1711             :     const Word16 *Q_16fx, // Q15
    1712             :     const Word16 energy_compensation_flag,
    1713             :     const Word32 reg_Sx_fx,   /*Q0*/
    1714             :     const Word32 reg_ghat_fx, /*Q0*/
    1715             :     Word32 *mixing_matrix_fx, /*Q(31-mixing_matrix_e) */
    1716             :     Word16 *mixing_matrix_e )
    1717             : {
    1718             :     Word16 i, out;
    1719             :     Word16 lengthCx, lengthCy;
    1720             :     Word16 nL, nC;
    1721             : 
    1722             :     Word32 *Cy_tilde_p_fx;
    1723             :     Word32 *adj_fx;
    1724             :     Word32 limit_fx;
    1725             : 
    1726             :     Word32 Ky_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1727             :     Word32 Q_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1728             :     Word32 Q_Cx_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1729             :     Word32 mat_mult_buffer1_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1730             :     Word32 G_hat_fx[MAX_OUTPUT_CHANNELS];
    1731             :     Word32 mat_mult_buffer2_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1732             :     Word32 Kx_reg_inv_fx[MAX_TRANSPORT_CHANNELS];
    1733             :     Word32 mat_mult_buffer3_fx[MAX_OUTPUT_CHANNELS * MAX_OUTPUT_CHANNELS];
    1734             :     Word32 Kx_fx[MAX_TRANSPORT_CHANNELS];
    1735             :     Word16 Ky_e, Q_e, Q_Cx_e, mat_mult_buffer1_e, G_hat_e, mat_mult_buffer2_e, Kx_reg_inv_e, adj_e, mat_mult_buffer3_e, Kx_e;
    1736             : 
    1737             :     Word32 svd_in_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
    1738             :     Word32 svd_u_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
    1739             :     Word32 svd_s_buffer_fx[MAX_OUTPUT_CHANNELS];
    1740             :     Word16 svd_s_buffer_fx_e;
    1741             :     Word16 svd_s_buffer_e[MAX_OUTPUT_CHANNELS];
    1742             :     Word32 svd_v_buffer_fx[MAX_OUTPUT_CHANNELS][MAX_OUTPUT_CHANNELS];
    1743             : 
    1744             :     Word16 temp_e[MAX_OUTPUT_CHANNELS];
    1745      315180 :     push_wmops( "dirac_cov_mix_mat" );
    1746             : 
    1747      315180 :     out = EXIT_SUCCESS;
    1748      315180 :     move16();
    1749      315180 :     lengthCx = num_inputs;
    1750      315180 :     move16();
    1751      315180 :     lengthCy = num_outputs;
    1752      315180 :     move16();
    1753             : 
    1754     6073020 :     FOR( i = 0; i < lengthCy * lengthCx; i++ )
    1755             :     {
    1756     5757840 :         IF( EQ_16( Q_16fx[i], MAX_16 ) )
    1757             :         {
    1758     2575200 :             Q_fx[i] = MAX_32;
    1759     2575200 :             move32();
    1760             :         }
    1761             :         ELSE
    1762             :         {
    1763     3182640 :             Q_fx[i] = L_deposit_h( Q_16fx[i] );
    1764     3182640 :             move32();
    1765             :         }
    1766             :     }
    1767      315180 :     Q_e = 0;
    1768      315180 :     move16();
    1769      315180 :     set32_fx( svd_s_buffer_fx, 0, MAX_OUTPUT_CHANNELS );
    1770     5358060 :     FOR( i = 0; i < MAX_OUTPUT_CHANNELS; i++ )
    1771             :     {
    1772     5042880 :         set32_fx( svd_in_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
    1773     5042880 :         set32_fx( svd_u_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
    1774     5042880 :         set32_fx( svd_v_buffer_fx[i], 0, MAX_OUTPUT_CHANNELS );
    1775             :     }
    1776             : 
    1777             :     /* Decomposition of Cy = Ky*Ky' */
    1778             :     /* Ky = responses*diag(ener) */
    1779      315180 :     matrix_diag_product_fx( responses_fx, responses_e, lengthCy, num_responses, 0, ener_fx, ener_e, num_responses, Ky_fx, &Ky_e );
    1780             : 
    1781             :     /* Decomposition of Cx -> Computing Kx */
    1782      315180 :     set16_fx( temp_e, Cx_diag_e, lengthCx );
    1783      315180 :     v_sqrt_fx( Cx_diag_fx, temp_e, Kx_fx, lengthCx );
    1784      315180 :     Kx_e = temp_e[0];
    1785      315180 :     move16();
    1786      630360 :     FOR( i = 1; i < lengthCx; i++ )
    1787             :     {
    1788      315180 :         Kx_e = s_max( Kx_e, temp_e[i] );
    1789             :     }
    1790      945540 :     FOR( i = 0; i < lengthCx; i++ )
    1791             :     {
    1792      630360 :         Kx_fx[i] = L_shr_r( Kx_fx[i], sub( Kx_e, temp_e[i] ) ); // Q(31-Kx_e)
    1793      630360 :         move32();
    1794             :     }
    1795             : 
    1796             :     /* Regularization of Sx */
    1797      315180 :     maximum_32_fx( Kx_fx, lengthCx, &limit_fx );
    1798      315180 :     limit_fx = Mpy_32_32( limit_fx, reg_Sx_fx ); // Cy_hat_diag_e + reg_ghat_e
    1799             : 
    1800      945540 :     FOR( i = 0; i < lengthCx; ++i )
    1801             :     {
    1802      630360 :         IF( GT_32( Kx_fx[i], limit_fx ) )
    1803             :         {
    1804      595151 :             svd_s_buffer_fx[i] = Kx_fx[i];
    1805      595151 :             move32();
    1806             :         }
    1807             :         ELSE
    1808             :         {
    1809       35209 :             svd_s_buffer_fx[i] = limit_fx;
    1810       35209 :             move32();
    1811             :         }
    1812             :     }
    1813      315180 :     svd_s_buffer_fx_e = Kx_e;
    1814      315180 :     move16();
    1815             : 
    1816      315180 :     limit_fx = 0;
    1817      315180 :     move32();
    1818             : 
    1819             :     /* regularized Kx-1 */
    1820             : 
    1821      945540 :     FOR( i = 0; i < lengthCx; ++i )
    1822             :     {
    1823      630360 :         IF( svd_s_buffer_fx[i] )
    1824             :         {
    1825             :             Word32 reg_fac;
    1826      630356 :             reg_fac = BASOP_Util_Divide3232_Scale_newton( MAX_32, svd_s_buffer_fx[i], &temp_e[i] );
    1827      630356 :             Kx_reg_inv_fx[i] = reg_fac;
    1828      630356 :             move32();
    1829      630356 :             temp_e[i] = sub( temp_e[i], svd_s_buffer_fx_e );
    1830      630356 :             move16();
    1831             :         }
    1832             :         ELSE
    1833             :         {
    1834             :             Word32 reg_fac;
    1835           4 :             reg_fac = BASOP_Util_Divide3232_Scale_newton( MAX_32, EPSILON_FX_M, &temp_e[i] );
    1836           4 :             Kx_reg_inv_fx[i] = reg_fac;
    1837           4 :             move32();
    1838           4 :             temp_e[i] = sub( temp_e[i], EPSILON_FX_E );
    1839           4 :             move16();
    1840             :         }
    1841             :     }
    1842      315180 :     Kx_reg_inv_e = temp_e[0];
    1843      315180 :     move16();
    1844      630360 :     FOR( i = 1; i < lengthCx; i++ )
    1845             :     {
    1846      315180 :         Kx_reg_inv_e = s_max( Kx_reg_inv_e, temp_e[i] );
    1847             :     }
    1848      945540 :     FOR( i = 0; i < lengthCx; i++ )
    1849             :     {
    1850      630360 :         Kx_reg_inv_fx[i] = L_shr_r( Kx_reg_inv_fx[i], sub( Kx_reg_inv_e, temp_e[i] ) ); // Q(31- Kx_reg_inv_e)
    1851      630360 :         move32();
    1852             :     }
    1853             : 
    1854             :     /************************ normalization matrix G hat **********************/
    1855             : 
    1856             :     /* Computing Q*Cx*Q' */
    1857             :     Word32 Cy_hat_diag_fx[MAX_OUTPUT_CHANNELS];
    1858             :     Word16 Cy_hat_diag_e;
    1859             : 
    1860      315180 :     matrix_diag_product_fx( Q_fx, Q_e, lengthCy, lengthCx, 0, Cx_diag_fx, Cx_diag_e, lengthCx, Q_Cx_fx, &Q_Cx_e );
    1861             : 
    1862      315180 :     Word16 guard_bits = find_guarded_bits_fx( lengthCx + 1 );
    1863             : 
    1864     6073020 :     FOR( i = 0; i < lengthCy * lengthCx; ++i )
    1865             :     {
    1866     5757840 :         IF( GT_16( Q_Cx_e, Q_e ) )
    1867             :         {
    1868     5757796 :             Q_fx[i] = L_shr( Q_fx[i], guard_bits );
    1869     5757796 :             move32();
    1870             :         }
    1871             :         ELSE
    1872             :         {
    1873          44 :             Q_Cx_fx[i] = L_shr( Q_Cx_fx[i], guard_bits );
    1874          44 :             move32();
    1875             :         }
    1876             :     }
    1877             : 
    1878      315180 :     IF( GT_16( Q_Cx_e, Q_e ) )
    1879             :     {
    1880      315178 :         Q_e = add( Q_e, guard_bits );
    1881             :     }
    1882             :     ELSE
    1883             :     {
    1884           2 :         Q_Cx_e = add( Q_Cx_e, guard_bits );
    1885             :     }
    1886      315180 :     matrix_product_diag_fx( Q_Cx_fx, Q_Cx_e, lengthCy, lengthCx, 0, Q_fx, Q_e, lengthCy, lengthCx, 1, Cy_hat_diag_fx, &Cy_hat_diag_e );
    1887             : 
    1888             :     /* Computing Cy_hat_diag */
    1889     3194100 :     FOR( i = 0; i < lengthCy; ++i )
    1890             :     {
    1891     2878920 :         if ( GT_32( Cy_hat_diag_fx[i], limit_fx ) )
    1892             :         {
    1893      467357 :             limit_fx = Cy_hat_diag_fx[i];
    1894      467357 :             move32();
    1895             :         }
    1896             :     }
    1897             : 
    1898      315180 :     limit_fx = Mpy_32_32( limit_fx, reg_ghat_fx ); // Cy_hat_diag_e + reg_ghat_e
    1899             : 
    1900             :     /* Computing G_hat */
    1901     3194100 :     FOR( i = 0; i < lengthCy; ++i )
    1902             :     {
    1903     2878920 :         if ( GT_32( limit_fx, Cy_hat_diag_fx[i] ) ) /* Computing Cy_hat_diag = max(Cy_hat_diag,limit) */
    1904             :         {
    1905       13691 :             Cy_hat_diag_fx[i] = limit_fx;
    1906       13691 :             move32();
    1907             :         }
    1908     2878920 :         IF( Cy_diag_fx[i] )
    1909             :         {
    1910     1875173 :             IF( Cy_hat_diag_fx[i] )
    1911             :             {
    1912     1875173 :                 G_hat_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], Cy_hat_diag_fx[i], &temp_e[i] );
    1913     1875173 :                 move32();
    1914     1875173 :                 temp_e[i] = add( temp_e[i], sub( Cy_diag_e, Cy_hat_diag_e ) );
    1915     1875173 :                 move16();
    1916     1875173 :                 G_hat_fx[i] = Sqrt32( G_hat_fx[i], &temp_e[i] );
    1917     1875173 :                 move32();
    1918             :             }
    1919             :             ELSE
    1920             :             {
    1921           0 :                 G_hat_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], EPSILON_FX_M, &temp_e[i] );
    1922           0 :                 move32();
    1923           0 :                 temp_e[i] = add( temp_e[i], sub( Cy_diag_e, EPSILON_FX_E ) );
    1924           0 :                 move16();
    1925           0 :                 G_hat_fx[i] = Sqrt32( G_hat_fx[i], &temp_e[i] );
    1926           0 :                 move32();
    1927             :             }
    1928             :         }
    1929             :         ELSE
    1930             :         {
    1931     1003747 :             G_hat_fx[i] = 0;
    1932     1003747 :             move32();
    1933     1003747 :             temp_e[i] = 0;
    1934     1003747 :             move16();
    1935             :         }
    1936             :     }
    1937      315180 :     G_hat_e = temp_e[0];
    1938      315180 :     move16();
    1939     2878920 :     FOR( i = 1; i < lengthCy; i++ )
    1940             :     {
    1941     2563740 :         G_hat_e = s_max( G_hat_e, temp_e[i] );
    1942             :     }
    1943     3194100 :     FOR( i = 0; i < lengthCy; i++ )
    1944             :     {
    1945     2878920 :         G_hat_fx[i] = L_shr_r( G_hat_fx[i], sub( G_hat_e, temp_e[i] ) ); // Q(31-G_hat_e)
    1946     2878920 :         move32();
    1947             :     }
    1948             : 
    1949             :     /************************ Formulate optimal P **********************/
    1950             : 
    1951             :     /* Computing the input matrix Kx'*Q'*G_hat'*Ky */
    1952      315180 :     diag_matrix_product_fx( Kx_fx, Kx_e, lengthCx, Q_fx, Q_e, lengthCy, lengthCx, 1, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
    1953             : 
    1954      315180 :     matrix_diag_product_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCx, lengthCy, 0, G_hat_fx, G_hat_e, lengthCy, mat_mult_buffer2_fx, &mat_mult_buffer2_e );
    1955             : 
    1956      315180 :     matrix_product_mant_exp_fx( mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCy, 0, Ky_fx, Ky_e, lengthCy, num_responses, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
    1957             : 
    1958      315180 :     IF( LT_16( lengthCx, num_responses ) )
    1959             :     {
    1960        3600 :         mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 1 );
    1961             : 
    1962        3600 :         nL = num_responses;
    1963        3600 :         move16();
    1964        3600 :         nC = lengthCx;
    1965        3600 :         move16();
    1966        3600 :         svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_v_buffer_fx, svd_s_buffer_fx, svd_u_buffer_fx, svd_s_buffer_e, nL, nC );
    1967             :     }
    1968             :     ELSE
    1969             :     {
    1970      311580 :         mat2svdMat_fx( mat_mult_buffer1_fx, svd_in_buffer_fx, lengthCx, num_responses, 0 );
    1971             : 
    1972      311580 :         nL = lengthCx;
    1973      311580 :         move16();
    1974      311580 :         nC = num_responses;
    1975      311580 :         move16();
    1976      311580 :         svd_fx( svd_in_buffer_fx, mat_mult_buffer1_e, svd_u_buffer_fx, svd_s_buffer_fx, svd_v_buffer_fx, svd_s_buffer_e, nL, nC );
    1977             :     }
    1978             : 
    1979             :     /* Actually Processing P */
    1980             : 
    1981             :     /* can be skipped: lambda is always column-truncated identity matrix, so this operation just truncates V to num_input_channel columns */
    1982      315180 :     svdMat2mat_fx( svd_v_buffer_fx, mat_mult_buffer1_fx, num_responses, lengthCx );
    1983      315180 :     mat_mult_buffer1_e = 0;
    1984      315180 :     move16();
    1985      315180 :     svdMat2mat_fx( svd_u_buffer_fx, mat_mult_buffer2_fx, lengthCx, lengthCx );
    1986      315180 :     mat_mult_buffer2_e = 0;
    1987      315180 :     move16();
    1988             : 
    1989      315180 :     matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, num_responses, lengthCx, 0, mat_mult_buffer2_fx, mat_mult_buffer2_e, lengthCx, lengthCx, 1, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    1990             : 
    1991             :     /************************ Formulate M **********************/
    1992             : 
    1993      315180 :     matrix_product_mant_exp_fx( Ky_fx, Ky_e, lengthCy, num_responses, 0, mat_mult_buffer3_fx, mat_mult_buffer3_e, num_responses, lengthCx, 0, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
    1994             : 
    1995      315180 :     matrix_diag_product_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, Kx_reg_inv_fx, Kx_reg_inv_e, lengthCx, mixing_matrix_fx, mixing_matrix_e );
    1996             : 
    1997             :     /*********************** Energy Compensation ****************/
    1998             : 
    1999             :     /* Compute Cy_tilde = M*Cx*M' */
    2000      315180 :     matrix_diag_product_fx( mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 0, Cx_diag_fx, Cx_diag_e, lengthCx, mat_mult_buffer1_fx, &mat_mult_buffer1_e );
    2001             : 
    2002      315180 :     matrix_product_mant_exp_fx( mat_mult_buffer1_fx, mat_mult_buffer1_e, lengthCy, lengthCx, 0, mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 1, mat_mult_buffer2_fx, &mat_mult_buffer2_e );
    2003             : 
    2004      315180 :     IF( EQ_16( energy_compensation_flag, 1 ) )
    2005             :     {
    2006      315180 :         adj_fx = svd_s_buffer_fx;
    2007      315180 :         Cy_tilde_p_fx = mat_mult_buffer2_fx;
    2008     3194100 :         FOR( i = 0; i < lengthCy; ++i )
    2009             :         {
    2010             :             /* Avoid correction for very small energies, main diagonal elements of Cy_tilde_p may be negative */
    2011     2878920 :             IF( Cy_tilde_p_fx[( i + ( i * lengthCy ) )] < 0 )
    2012             :             {
    2013           0 :                 adj_fx[i] = MAX_32;
    2014           0 :                 move32();
    2015           0 :                 temp_e[i] = 0;
    2016           0 :                 move16();
    2017             :             }
    2018             :             ELSE
    2019             :             {
    2020     2878920 :                 IF( Cy_diag_fx[i] )
    2021             :                 {
    2022     1875173 :                     IF( Cy_tilde_p_fx[i + ( i * lengthCy )] )
    2023             :                     {
    2024     1874497 :                         adj_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], Cy_tilde_p_fx[i + ( i * lengthCy )], &temp_e[i] );
    2025     1874497 :                         move32();
    2026     1874497 :                         temp_e[i] = add( temp_e[i], sub( Cy_diag_e, mat_mult_buffer2_e ) );
    2027     1874497 :                         move16();
    2028     1874497 :                         adj_fx[i] = Sqrt32( adj_fx[i], &temp_e[i] );
    2029     1874497 :                         move32();
    2030             :                     }
    2031             :                     ELSE
    2032             :                     {
    2033         676 :                         adj_fx[i] = BASOP_Util_Divide3232_Scale_newton( Cy_diag_fx[i], EPSILON_FX_M, &temp_e[i] );
    2034         676 :                         move32();
    2035         676 :                         temp_e[i] = add( temp_e[i], sub( Cy_diag_e, EPSILON_FX_E ) );
    2036         676 :                         move16();
    2037         676 :                         adj_fx[i] = Sqrt32( adj_fx[i], &temp_e[i] );
    2038         676 :                         move32();
    2039             :                     }
    2040             :                 }
    2041             :                 ELSE
    2042             :                 {
    2043     1003747 :                     adj_fx[i] = 0;
    2044     1003747 :                     move32();
    2045     1003747 :                     temp_e[i] = 0;
    2046     1003747 :                     move16();
    2047             :                 }
    2048             :             }
    2049             : 
    2050     2878920 :             Word32 temp = W_shl_sat_l( W_deposit32_l( 4 ), sub( 31, temp_e[i] ) );
    2051     2878920 :             IF( GT_32( adj_fx[i], temp ) )
    2052             :             {
    2053        5516 :                 adj_fx[i] = MAX_32;
    2054        5516 :                 move32();
    2055        5516 :                 temp_e[i] = 2;
    2056        5516 :                 move16();
    2057             :             }
    2058             :         }
    2059      315180 :         adj_e = temp_e[0];
    2060      315180 :         move16();
    2061     2878920 :         FOR( i = 1; i < lengthCy; i++ )
    2062             :         {
    2063     2563740 :             adj_e = s_max( adj_e, temp_e[i] );
    2064             :         }
    2065     3194100 :         FOR( i = 0; i < lengthCy; i++ )
    2066             :         {
    2067     2878920 :             adj_fx[i] = L_shr_r( adj_fx[i], sub( adj_e, temp_e[i] ) ); // Q(31-adj_e)
    2068     2878920 :             move32();
    2069             :         }
    2070             : 
    2071      315180 :         diag_matrix_product_fx( adj_fx, adj_e, lengthCy, mixing_matrix_fx, *mixing_matrix_e, lengthCy, lengthCx, 0, mat_mult_buffer3_fx, &mat_mult_buffer3_e );
    2072             : 
    2073      315180 :         Copy32( mat_mult_buffer3_fx, mixing_matrix_fx, imult1616( lengthCy, lengthCx ) );
    2074      315180 :         *mixing_matrix_e = mat_mult_buffer3_e;
    2075      315180 :         move16();
    2076             :     }
    2077             : 
    2078      315180 :     pop_wmops();
    2079             : 
    2080      315180 :     return out;
    2081             : }

Generated by: LCOV version 1.14