ADMB Documentation  11.5.3197
 All Classes Files Functions Variables Typedefs Friends Defines
df1b2im3.cpp
Go to the documentation of this file.
00001 /*
00002  * $Id$
00003  *
00004  * Author: David Fournier
00005  * Copyright (c) 2008-2012 Regents of the University of California
00006  */
00011 #  include <admodel.h>
00012 #  include <df1b2fun.h>
00013 #  include <adrndeff.h>
00014 
00019 double calculate_importance_sample_block_diagonal(const dvector& x,
00020   const dvector& u0,const dmatrix& Hess,const dvector& _xadjoint,
00021   const dvector& _uadjoint,const dmatrix& _Hessadjoint,
00022   function_minimizer * pmin)
00023 {
00024   ADUNCONST(dvector,xadjoint)
00025   ADUNCONST(dvector,uadjoint)
00026   //ADUNCONST(dmatrix,Hessadjoint)
00027   const int xs=x.size();
00028   const int us=u0.size();
00029   gradient_structure::set_NO_DERIVATIVES();
00030   int nsc=pmin->lapprox->num_separable_calls;
00031   const ivector lrea = (*pmin->lapprox->num_local_re_array)(1,nsc);
00032   int hroom =  int(sum(square(lrea)));
00033   int nvar=x.size()+u0.size()+hroom;
00034   independent_variables y(1,nvar);
00035 
00036   // need to set random effects active together with whatever
00037   // init parameters should be active in this phase
00038   initial_params::set_inactive_only_random_effects();
00039   initial_params::set_active_random_effects();
00040   /*int onvar=*/initial_params::nvarcalc();
00041   initial_params::xinit(y);    // get the initial values into the
00042   // do we need this next line?
00043   y(1,xs)=x;
00044 
00045   // contribution for quadratic prior
00046   if (quadratic_prior::get_num_quadratic_prior()>0)
00047   {
00048     //Hess+=quadratic_prior::get_cHessian_contribution();
00049     int & vxs = (int&)(xs);
00050     quadratic_prior::get_cHessian_contribution(Hess,vxs);
00051   }
00052  // Here need hooks for sparse matrix structures
00053 
00054   dvar3_array & block_diagonal_vhessian=
00055     *pmin->lapprox->block_diagonal_vhessian;
00056   block_diagonal_vhessian.initialize();
00057   dvar3_array& block_diagonal_ch=
00058     *pmin->lapprox->block_diagonal_vch;
00059     //dvar3_array(*pmin->lapprox->block_diagonal_ch);
00060   int ii=xs+us+1;
00061   d3_array& bdH=(*pmin->lapprox->block_diagonal_hessian);
00062   for (int ic=1;ic<=nsc;ic++)
00063   {
00064     int lus=lrea(ic);
00065     for (int i=1;i<=lus;i++)
00066       for (int j=1;j<=lus;j++)
00067         y(ii++)=bdH(ic)(i,j);
00068   }
00069 
00070   dvector g(1,nvar);
00071   gradcalc(0,g);
00072   gradient_structure::set_YES_DERIVATIVES();
00073   dvar_vector vy=dvar_vector(y);
00074   //initial_params::stddev_vscale(d,vy);
00075   ii=xs+us+1;
00076   if (initial_df1b2params::have_bounded_random_effects)
00077   {
00078     cerr << "can't do importance sampling with bounded random effects"
00079      " at present" << endl;
00080     ad_exit(1);
00081   }
00082   else
00083   {
00084     for (int ic=1;ic<=nsc;ic++)
00085     {
00086       int lus=lrea(ic);
00087       for (int i=1;i<=lus;i++)
00088       {
00089         for (int j=1;j<=lus;j++)
00090         {
00091           block_diagonal_vhessian(ic,i,j)=vy(ii++);
00092         }
00093       }
00094       block_diagonal_ch(ic)=
00095         choleski_decomp(inv(block_diagonal_vhessian(ic)));
00096     }
00097   }
00098 
00099    int nsamp=pmin->lapprox->num_importance_samples;
00100 
00101    dvariable vf=0.0;
00102 
00103    dvar_vector sample_value(1,nsamp);
00104    sample_value.initialize();
00105 
00106    dvar_vector tau(1,us);;
00107    for (int is=1;is<=nsamp;is++)
00108    {
00109      int offset=0;
00110      pmin->lapprox->importance_sampling_counter=is;
00111      for (int ic=1;ic<=nsc;ic++)
00112      {
00113        int lus=lrea(ic);
00114        tau(offset+1,offset+lus).shift(1)=block_diagonal_ch(ic)*
00115          pmin->lapprox->epsilon(is)(offset+1,offset+lus).shift(1);
00116        offset+=lus;
00117      }
00118 
00119      // have to reorder the terms to match the block diagonal hessian
00120      imatrix & ls=*(pmin->lapprox->block_diagonal_re_list);
00121      int mmin=ls.indexmin();
00122      int mmax=ls.indexmax();
00123 
00124      ii=1;
00125      for (int i=mmin;i<=mmax;i++)
00126      {
00127        int cmin=ls(i).indexmin();
00128        int cmax=ls(i).indexmax();
00129        for (int j=cmin;j<=cmax;j++)
00130        {
00131          vy(ls(i,j))+=tau(ii++);
00132        }
00133      }
00134      if (ii-1 != us)
00135      {
00136        cerr << "error in interface" << endl;
00137        ad_exit(1);
00138      }
00139      initial_params::reset(vy);    // get the values into the model
00140      ii=1;
00141      for (int i=mmin;i<=mmax;i++)
00142      {
00143        int cmin=ls(i).indexmin();
00144        int cmax=ls(i).indexmax();
00145        for (int j=cmin;j<=cmax;j++)
00146        {
00147          vy(ls(i,j))-=tau(ii++);
00148        }
00149      }
00150 
00151      *objective_function_value::pobjfun=0.0;
00152      //int istop=0;
00153      //if (is==65) { }
00154      pmin->AD_uf_outer();
00155 
00156      if (pmin->lapprox->use_outliers==0)
00157      {
00158        double neps=0.5*norm2(pmin->lapprox->epsilon(is));
00159 
00160        (*pmin->lapprox->importance_sampling_values)(is)=
00161            neps-value(*objective_function_value::pobjfun);
00162 
00163        (*pmin->lapprox->importance_sampling_weights)(is)=neps;
00164 
00165         sample_value(is)=*objective_function_value::pobjfun
00166           -neps;
00167      }
00168      else
00169      {
00170        dvector& e=pmin->lapprox->epsilon(is);
00171        double neps=-sum(log(.95*exp(-0.5*square(e))+
00172           0.0166666667*exp(-square(e)/18.0)));
00173 
00174        (*pmin->lapprox->importance_sampling_values)(is)=
00175          neps-value(*objective_function_value::pobjfun);
00176 
00177        sample_value(is)=*objective_function_value::pobjfun
00178          -neps;
00179      }
00180    }
00181 
00182    nsc=pmin->lapprox->num_separable_calls;
00183    dmatrix weights(1,nsc,1,nsamp);
00184    for (int is=1;is<=nsamp;is++)
00185    {
00186      int offset=0;
00187      for (int ic=1;ic<=nsc;ic++)
00188      {
00189        int lus=lrea(ic);
00190        dvector e= pmin->lapprox->epsilon(is)(offset+1,offset+lus).shift(1);
00191        offset+=lus;
00192        if (pmin->lapprox->use_outliers==0)
00193        {
00194          weights(ic,is)=0.5*norm2(e);
00195        }
00196        else
00197        {
00198          weights(ic,is)=-sum(log(.95*exp(-0.5*square(e))+
00199           0.0166666667*exp(-square(e)/18.0)));
00200        }
00201      }
00202    }
00203    dvar_vector lcomp(1,nsc);
00204    for (int isc=1;isc<=nsc;isc++)
00205    {
00206      dvar_vector & comps=
00207        (*pmin->lapprox->importance_sampling_components)(isc);
00208 
00209      dvar_vector diff=comps-weights(isc);
00210      double dmin=min(value(diff));
00211      lcomp(isc)=-dmin+log(mean(exp(-diff+dmin)));
00212 
00213      for (int is=1;is<=nsamp;is++)
00214      {
00215      }
00216    }
00217 
00218    if (laplace_approximation_calculator::
00219      print_importance_sampling_weights_flag==1)
00220    {
00221      double min_vf=min(value(sample_value));
00222      dvector tmp=exp(value(sample_value)-min_vf);
00223      cout << "The unsorted normalized importance samplng weights are " << endl
00224           << tmp << endl;
00225      cout << "The sorted normalized importance samplng weights are " << endl
00226           << sort(tmp) << endl;
00227      ad_exit(1);
00228    }
00229 
00230 
00231    double min_vf=min(value(sample_value));
00232    vf=min_vf-log(mean(exp(min_vf-sample_value)));
00233    vf-=us*0.91893853320467241;
00234 
00235    int sgn=0;
00236    dvariable ld=0.0;
00237    if (ad_comm::no_ln_det_choleski_flag)
00238    {
00239      for (int ic=1;ic<=nsc;ic++)
00240      {
00241        ld+=ln_det(block_diagonal_vhessian(ic),sgn);
00242      }
00243      ld*=0.5;
00244    }
00245    else
00246    {
00247      for (int ic=1;ic<=nsc;ic++)
00248      {
00249        ld+=ln_det_choleski(block_diagonal_vhessian(ic));
00250      }
00251      ld*=0.5;
00252    }
00253 
00254    vf+=ld;
00255 
00256    double f=value(vf);
00257    gradcalc(nvar,g);
00258 
00259    // put uhat back into the model
00260    gradient_structure::set_NO_DERIVATIVES();
00261    vy(xs+1,xs+us).shift(1)=u0;
00262    initial_params::reset(vy);    // get the values into the model
00263    gradient_structure::set_YES_DERIVATIVES();
00264 
00265   ii=1;
00266   for (int i=1;i<=xs;i++)
00267     xadjoint(i)=g(ii++);
00268   for (int i=1;i<=us;i++)
00269     uadjoint(i)=g(ii++);
00270   for (int ic=1;ic<=nsc;ic++)
00271   {
00272     int lus=lrea(ic);
00273     for (int i=1;i<=lus;i++)
00274     {
00275       for (int j=1;j<=lus;j++)
00276       {
00277         (*pmin->lapprox->block_diagonal_vhessianadjoint)(ic)(i,j)=g(ii++);
00278       }
00279     }
00280   }
00281   return f;
00282 }