ADMB Documentation: test_trust.cpp (* $Id$) Source File

Go to the documentation of this file.
00001 /*
00002  * $Id$
00003  *
00004  * Author: David Fournier
00005  * Copyright (c) 2008-2012 Regents of the University of California
00006  */
00011 #  include <fvar.hpp>
00012 #  include <admodel.h>
00013 #  include <df1b2fun.h>
00014 #  include <adrndeff.h>
00015 double evaluate_function(const dvector& x,function_minimizer * pfmin);
00016 void get_second_ders(int xs,int us,const init_df1b2vector y,dmatrix& Hess,
00017   dmatrix& Dux, df1b2_gradlist * f1b2gradlist,function_minimizer * pfmin,
00018   laplace_approximation_calculator* lap);
00019 double calculate_laplace_approximation(const dvector& x,const dvector& u0,
00020   const dmatrix& Hess,const dvector& _xadjoint,const dvector& _uadjoint,
00021   const dmatrix& _Hessadjoint,function_minimizer * pmin);
00022 
00023 double calculate_importance_sample(const dvector& x,const dvector& u0,
00024   const dmatrix& Hess,const dvector& _xadjoint,const dvector& _uadjoint,
00025   const dmatrix& _Hessadjoint,function_minimizer * pmin);
00026 
00027 dmatrix choleski_decomp_positive(const dmatrix& M,double b);
00028 
00029 //dvector laplace_approximation_calculator::default_calculations
00030  // (const dvector& _x,const double& _f,function_minimizer * pfmin)
00031 
00032 #if defined(USE_ADPVM)
00033 
00037 dvector laplace_approximation_calculator::test_trust_region_method
00038   (const dvector& _x,const double& _f,function_minimizer * pfmin)
00039 {
00040   // for use when there is no separability
00041   ADUNCONST(dvector,x)
00042   //ADUNCONST(double,f)
00043   //int i,j;
00044   int i;
00045 
00046   initial_params::set_inactive_only_random_effects();
00047   gradient_structure::set_NO_DERIVATIVES();
00048   initial_params::reset(x);    // get current x values into the model
00049   gradient_structure::set_YES_DERIVATIVES();
00050 
00051  /*
00052   initial_params::set_active_only_random_effects();
00053   int lmn_flag=0;
00054   if (ad_comm::time_flag)
00055   {
00056     if (ad_comm::ptm1)
00057     {
00058       ad_comm::ptm1->get_elapsed_time_and_reset();
00059     }
00060     if (ad_comm::ptm)
00061     {
00062       ad_comm::ptm->get_elapsed_time_and_reset();
00063     }
00064   }
00065   if (ad_comm::time_flag)
00066   {
00067     if (ad_comm::ptm)
00068     {
00069       double time=ad_comm::ptm->get_elapsed_time();
00070       if (ad_comm::global_logfile)
00071       {
00072         (*ad_comm::global_logfile) << " Time pos 0 "
00073           << time << endl;
00074       }
00075     }
00076   }
00077 
00078   double maxg;
00079   double maxg_save;
00080   dvector uhat_old(1,usize);
00081   double f_from_1=0.0;
00082   if (!inner_lmnflag)
00083   {
00084     if (!ADqd_flag)
00085     {
00086       uhat=get_uhat_quasi_newton(x,pfmin);
00087       maxg=fabs(fmc1.gmax);
00088       f_from_1=fmc1.bestf;
00089     }
00090     else
00091       uhat=get_uhat_quasi_newton_qd(x,pfmin);
00092   }
00093   else
00094   {
00095     uhat=get_uhat_lm_newton(x,pfmin);
00096   }
00097 
00098   if (ad_comm::time_flag)
00099   {
00100     if (ad_comm::ptm)
00101     {
00102       double time=ad_comm::ptm->get_elapsed_time_and_reset();
00103       if (ad_comm::global_logfile)
00104       {
00105         (*ad_comm::global_logfile) << " Time in inner optimization "
00106           << time << endl;
00107       }
00108     }
00109   }
00110  */
00111   for (i=1;i<=xsize;i++)
00112   {
00113     y(i)=x(i);
00114   }
00115   for (i=1;i<=usize;i++)
00116   {
00117     y(i+xsize)=uhat(i);
00118   }
00119   int n=xsize+usize;
00120   dvector xx(1,n);
00121   for (i=1;i<=n;i++)
00122   {
00123     xx(i)=value(y(i));
00124   }
00125   double bestf=do_one_feval(xx,pfmin);
00126   double Delta=10;
00127   double lambda;
00128   cout << "input Delta" << endl;
00129   cin >> Delta;
00130   cout << "input lambda" << endl;
00131   cin >> lambda;
00132   int outer_iter=0;
00133   int max_iteration=10;
00134   do
00135   {
00136     outer_iter++;
00137     //int ierr=0;
00138     //int niters=0;
00139     dvector g(1,n);
00140     dmatrix H(1,n,1,n);
00141 
00142     // test newton raphson
00143     get_complete_hessian(H,g,pfmin);
00144 
00145     double tol=1.e-6;
00146     //int itmax=1000;
00147     //int itol=1;
00148     //int iter=0;
00149     //double err=0;
00150     //lambda=1;
00151 
00152     //cout << "input Delta" << endl;
00153     //cin >> Delta;
00154     //cout << "input lambda" << endl;
00155     //cin >> lambda;
00156 
00157     int i;
00158 
00159     for (i=1;i<=n;i++)
00160     {
00161       H(i,i)+=lambda;
00162     }
00163 
00164     cout << "initial fun value - " << double(0.5)*xx*(H*xx)+xx*g;
00165     double truef=do_one_feval(xx,pfmin);
00166     cout << "real function value = " << truef << endl;
00167     double estdiff;
00168     double truediff;
00169     int iflag=0;
00170     int inner_iter=0;
00171     //int oldbest=(int)bestf;
00172     int maxfn=15;
00173     dvector xret=lincg(xx,g,H,tol,Delta,pfmin,truef,estdiff,
00174      truediff,bestf,iflag,inner_iter,maxfn);
00175     cout << " norm(g) = " << norm(g)
00176          << "  norm(H*xret+g) = " << norm(H*xret+g)
00177          << "  norm(H*xret-g) = " << norm(H*xret-g)
00178          << " inner_iter = " << inner_iter << endl;
00179     if (iflag==1)
00180     {
00181       Delta/=5.0;
00182     }
00183     for (i=1;i<=n;i++)
00184     {
00185       y(i)+=xret(i);
00186     }
00187     for (i=1;i<=n;i++)
00188     {
00189       xx(i)=value(y(i));
00190     }
00191 
00192     f1b2gradlist->reset();
00193     f1b2gradlist->list.initialize();
00194     f1b2gradlist->list2.initialize();
00195     f1b2gradlist->list3.initialize();
00196     f1b2gradlist->nlist.initialize();
00197     f1b2gradlist->nlist2.initialize();
00198     f1b2gradlist->nlist3.initialize();
00199   }
00200   while(outer_iter<=max_iteration);
00201   return xx;
00202 }
00203 #endif
00204 
00209 void laplace_approximation_calculator::get_complete_hessian
00210   (dmatrix& H,dvector& g,function_minimizer * pfmin)
00211 {
00212   int i,j,ip;
00213 
00214   if (ad_comm::time_flag)
00215   {
00216     if (ad_comm::ptm)
00217     {
00218         (*ad_comm::global_logfile) << " Starting Newton-Raphson "
00219           <<  endl;
00220     }
00221   }
00222 
00223   for (ip=1;ip<=num_der_blocks;ip++)
00224   {
00225     df1b2variable::minder=minder(ip);
00226     df1b2variable::maxder=maxder(ip);
00227 
00228     set_u_dot(ip);
00229 
00230     // do we need to reallocate memory for df1b2variables?
00231     check_for_need_to_reallocate(ip);
00232     df1b2_gradlist::set_yes_derivatives();
00233     //cout << re_objective_function_value::pobjfun << endl;
00234     //cout << re_objective_function_value::pobjfun->ptr << endl;
00235     (*re_objective_function_value::pobjfun)=double(0.0);
00236 
00237 #if defined(USE_DDOUBLE)
00238 #undef double
00239     df1b2variable pen=double(0.0);
00240     df1b2variable zz=double(0.0);
00241 #define double dd_real
00242 #else
00243     df1b2variable pen=0.0;
00244     df1b2variable zz=0.0;
00245 #endif
00246     initial_df1b2params::reset(y,pen);
00247      //cout << setprecision(15) << y << endl;
00248     if (initial_df1b2params::separable_flag)
00249     {
00250       H.initialize();
00251       grad.initialize();
00252     }
00253 
00254     double time1 = 0;
00255     if (ad_comm::time_flag)
00256     {
00257       if (ad_comm::ptm)
00258       {
00259         time1=ad_comm::ptm->get_elapsed_time();
00260       }
00261     }
00262 
00263     pfmin->user_function();
00264 
00265     if (ad_comm::time_flag)
00266     {
00267       if (ad_comm::ptm)
00268       {
00269         if (ad_comm::global_logfile)
00270         {
00271           double time=ad_comm::ptm->get_elapsed_time();
00272           (*ad_comm::global_logfile) << "       Time in user_function() "
00273             <<  ip << "  " << time-time1 << endl;
00274         }
00275       }
00276     }
00277 
00278     re_objective_function_value::fun_without_pen
00279       =value(*re_objective_function_value::pobjfun);
00280 
00281     (*re_objective_function_value::pobjfun)+=pen;
00282     (*re_objective_function_value::pobjfun)+=zz;
00283 
00284     //cout << setprecision(15) << *re_objective_function_value::pobjfun << endl;
00285     //cout << setprecision(15) << pen << endl;
00286     if (!initial_df1b2params::separable_flag)
00287     {
00288       set_dependent_variable(*re_objective_function_value::pobjfun);
00289       df1b2_gradlist::set_no_derivatives();
00290       df1b2variable::passnumber=1;
00291       df1b2_gradcalc1();
00292       int mind=y(1).minder;
00293       int jmin=max(mind,1);
00294       int jmax=min(y(1).maxder,xsize+usize);
00295       for (i=1;i<=xsize+usize;i++)
00296         for (j=jmin;j<=jmax;j++)
00297           H(i,j)=y(i).u_bar[j-mind];
00298 
00299     // ****************************************************************
00300     // ****************************************************************
00301     // ****************************************************************
00302     // temporary shit
00303      /*
00304       for (i=1;i<=usize;i++)
00305       {
00306         for (j=jmin;j<=jmax;j++)
00307         {
00308           //Hess(i,j-xsize)=y(i+xsize).u_bar[j-mind];
00309           y(i+xsize).get_u_bar_tilde()[j-mind]=Hessadjoint(i,j-xsize);
00310         }
00311       }
00312       cout << Hess << endl;
00313       df1b2variable::passnumber=2;
00314       df1b2_gradcalc1();
00315 
00316       df1b2variable::passnumber=3;
00317       df1b2_gradcalc1();
00318      */
00319     // ****************************************************************
00320     // ****************************************************************
00321     // ****************************************************************
00322     // ****************************************************************
00323       for (j=jmin;j<=jmax;j++)
00324         g(j)= re_objective_function_value::pobjfun->u_dot[j-mind];
00325     }
00326     if (ip<num_der_blocks)
00327       f1b2gradlist->reset();
00328   }
00329 
00330 
00331   // just to match master pvm routine
00332   if (ad_comm::time_flag)
00333   {
00334     if (ad_comm::ptm)
00335     {
00336       /*double time=*/ad_comm::ptm->get_elapsed_time();
00337     }
00338   }
00339 }
00340 
00341 
00342 
00343 #define EPS 1.0e-14
00344 
00345 
00346 
00347 /*
00348 int main(int argc,char * argv[])
00349 {
00350   int n=4;
00351   dmatrix H(1,n,1,n);
00352 
00353   double lambda;
00354   lambda=1.0;
00355 
00356   dvector g(1,n);
00357   dvector x(1,n);
00358   double tol=1.e-6;
00359   int itmax=1000;
00360   int itol=1;
00361   int iter=0;
00362   double err=0;
00363   double Delta=20;
00364   H.initialize();
00365   lambda=1;
00366 
00367   g=1.0;
00368   x=0.0;
00369   H(1,1)=1;
00370   H(2,2)=2;
00371   H(3,3)=3;
00372   H(4,4)=3;
00373   //cout << "input lambda" << endl;
00374   //cin >> lambda;
00375 
00376   for (int i=1;i<=n;i++)
00377   {
00378     H(i,i)+=lambda;
00379   }
00380   cout << "initial fun value - " << 0.5*x*(H*x)+x*g;
00381   double truef=do_one_feval(x_k,pfmin);
00382   cout << "real function value = " << truef << endl;
00383   dvector xret=lincg(x,g,H,tol,Delta,truef,estdiff);
00384 
00385   cout << H*xret << "   " <<  g << endl;
00386   exit(0);
00387 
00388 }
00389 */
00390 
00391 /*
00392 dvector laplace_approximation_calculator::lincg(dvector& x,
00393   dvector& c, dmatrix& H,double tol,double Delta,function_minimizer * pfmin,
00394   double& truef,double& estdiff,double& truediff,double& bestf,
00395   int& iflag,int& inner_iter,int maxfn)
00396 {
00397   iflag=0;
00398   int n=c.indexmax();
00399   dvector g_k(1,n);
00400   dvector p_k(1,n);
00401   dvector x_k(1,n);
00402   dvector xbest_k(1,n);
00403   double alpha_k;
00404   double beta_k;
00405   double n2gk=0.0;
00406   x_k=0.0;
00407   g_k=c;
00408   //g_k=H*x+c;
00409   p_k=-g_k;
00410   n2gk=norm2(g_k);
00411   int improve_flag=0;
00412   inner_iter=0;
00413   xbest_k=0;
00414 
00415   do
00416   {
00417     if (++inner_iter > maxfn)
00418       return xbest_k;
00419 
00420     dvector v=H*p_k;
00421     double tmp2=p_k*v;
00422     cout << tmp2 << endl;
00423 
00424     if  (tmp2 <=0.0)
00425     {
00426       cout << "matrix not positive definite " << tmp2 << endl;
00427      // find point at boubndary
00428 
00429      double a=norm2(p_k);
00430      double b=2.0*x_k*p_k;
00431      double cc=norm2(x_k)-square(Delta);
00432      double d=sqrt(b*b-4*a*cc);
00433      double r1=(-b+d)/(2.0*a);
00434      double r2=(-b-d)/(2.0*a);
00435 
00436      cout << " the roots are " << r1 << "  " << r2 << endl;
00437      if (r1>0)
00438      {
00439        x_k=x_k+r1*p_k;
00440      }
00441      else
00442      {
00443        x_k=x_k+r2*p_k;
00444      }
00445      cout << "function value = "  << 0.5*(x_k*(H*x_k)) + x_k*c <<
00446        " norm(x_k) =  " << norm(x_k) << endl;
00447      break;
00448     }
00449 
00450     alpha_k=n2gk/(tmp2);
00451     x_k+=alpha_k*p_k;
00452 
00453     double nxk=norm(x_k);
00454     if (nxk>Delta)
00455     {
00456       // we have goneoutside the trust region
00457       cout << " we have gone outside the trust region " << endl;
00458       // find point at boubndary
00459 
00460       double a=norm2(p_k);
00461       double b=2.0*x_k*p_k;
00462       double cc=norm2(x_k)-square(Delta);
00463       double d=b*b-4*a*cc;
00464       if (d<0)
00465         cout <<" d = " << d << endl;
00466       d=sqrt(d);
00467       double r1=(-b+d)/(2.0*a);
00468       double r2=(-b-d)/(2.0*a);
00469 
00470       cout << " the roots are " << r1 << "  " << r2 << endl;
00471       dvector y_1=x_k+r1*p_k;
00472       dvector y_2=x_k+r2*p_k;
00473       if (fabs(r1)<fabs(r2))
00474       {
00475         x_k=x_k+r1*p_k;
00476       }
00477       else
00478       {
00479         x_k=x_k+r2*p_k;
00480       }
00481       cout << norm(y_1) << " " << norm(y_2) << endl;
00482       cout << "r1 function value = "  << 0.5*(y_1*(H*y_1)) + y_1*c <<
00483           "  r2 functiomn value = "  << 0.5*(y_2*(H*y_2)) + y_2*c <<
00484               " normp_k = " << norm(p_k)  << sqrt(n2gk) << endl;
00485       break;
00486     }
00487 
00488     g_k+= alpha_k*v;
00489     double tmp=n2gk;
00490     n2gk=norm2(g_k);
00491     beta_k=n2gk/tmp;
00492     p_k=-g_k+beta_k*p_k;
00493     cout << "estimated function value = "
00494          << truef + 0.5*(x_k*(H*x_k)) + x_k*c << endl;
00495     double tf=do_one_feval(x+x_k,pfmin);
00496     cout << "real function value = " << tf << endl;
00497     if (tf<=bestf)
00498     {
00499      improve_flag=1;
00500      xbest_k=x_k;
00501      cout << "norm(H*xbest_k+c)" << norm(H*xbest_k+c)  << endl;
00502      bestf=tf;
00503     }
00504     else
00505     {
00506       if (improve_flag==0) iflag=1;
00507       return xbest_k;
00508     }
00509   }
00510   while (n2gk>tol);
00511   estdiff=0.5*(x_k*(H*x_k)) + x_k*c;
00512   cout << "estimated function value = "
00513     << truef + estdiff << endl;
00514   double tf=do_one_feval(x+x_k,pfmin);
00515   truediff=tf-truef;
00516   truef=tf;
00517   cout << "real function value = " << tf << endl;
00518   return xbest_k;
00519 }
00520 */
00521 
00526 double laplace_approximation_calculator::do_one_feval
00527   (const dvector& x,function_minimizer * pfmin)
00528 {
00529   double f=0.0;
00530   //double fb=1.e+100;
00531   dvector g(1,usize);
00532   dvector ub(1,usize);
00533   initial_params::set_active_random_effects();
00534   int nvar=initial_params::nvarcalc();
00535   int nvar1=initial_params::nvarcalc_all();
00536   cout << nvar << " " << nvar1 << endl;
00537   gradient_structure::set_NO_DERIVATIVES();
00538   dvariable vf=0.0;
00539   dvariable pen=initial_params::reset(dvar_vector(x));
00540   *objective_function_value::pobjfun=0.0;
00541   pfmin->AD_uf_inner();
00542   vf+=*objective_function_value::pobjfun;
00543   vf+=pen;
00544   f=value(vf);
00545   return f;
00546 }
00547 
00548 /*
00549 dvector laplace_approximation_calculator::lincg(dvector& x,
00550   dvector& c, dmatrix& H,double tol,double Delta,function_minimizer * pfmin,
00551   double& truef,double& estdiff,double& truediff,double& bestf,
00552   int& iflag,int& inner_iter,int maxfn)
00553 {
00554   iflag=0;
00555   int n=c.indexmax();
00556   dvector g_k(1,n);
00557   dvector p_k(1,n);
00558   dvector x_k(1,n);
00559   dvector xbest_k(1,n);
00560   double alpha_k;
00561   double beta_k;
00562   double n2gk=0.0;
00563   x_k=0.0;
00564   g_k=c;
00565   p_k=-g_k;
00566   n2gk=norm2(g_k);
00567   int improve_flag=0;
00568   inner_iter=0;
00569   xbest_k=0;
00570   cout << norm2(H*p_k+c) << endl;
00571 
00572   do
00573   {
00574     dvector v=H*p_k;
00575     double tmp2=p_k*v;
00576     alpha_k=n2gk/(tmp2);
00577     x_k+=alpha_k*p_k;
00578     double nxk=norm(x_k);
00579     g_k+= alpha_k*v;
00580     double tmp=n2gk;
00581     n2gk=norm2(g_k);
00582     beta_k=n2gk/tmp;
00583     p_k=-g_k+beta_k*p_k;
00584     cout << norm2(H*p_k+c) << endl;
00585   }
00586   while(1);
00587 }
00588 */
00589 
00590 /*
00591 dvector laplace_approximation_calculator::lincg(dvector& xinit,
00592   dvector& c, dmatrix& H,double tol,double Delta,function_minimizer * pfmin,
00593   double& truef,double& estdiff,double& truediff,double& bestf,
00594   int& iflag,int& inner_iter,int maxfn)
00595 {
00596   iflag=0;
00597   int n=c.indexmax();
00598   dmatrix g(0,20,1,n);
00599   dmatrix q(0,20,1,n);
00600   dmatrix p(0,20,1,n);
00601   dmatrix x(0,20,1,n);
00602   dmatrix r(0,20,1,n);
00603   dvector alpha(0,20);
00604   dvector beta(0,20);
00605   int k=0;
00606   x(0)=0.0;
00607   g(0)=H*x(0)+c;
00608   p(0)=-g(0);
00609   cout << norm(H*x(k)+c) << endl;
00610 
00611   do
00612   {
00613     alpha(k)=norm2(g(k))/(p(k)*H*p(k));
00614     x(k+1)=x(k)+alpha(k)*p(k);
00615     g(k+1)=g(k)+alpha(k)*H*p(k);
00616     beta(k)=norm2(g(k+1))/norm2(g(k));
00617     p(k+1)=-g(k+1)+beta(k)*p(k);
00618     k++;
00619     cout << norm(H*x(k)+c) << endl;
00620   }
00621   while(1);
00622 }
00623 */
00624 
00625 #if defined(USE_ADPVM)
00626 
00630 dvector laplace_approximation_calculator::lincg(dvector& xinit,
00631   dvector& c, dmatrix& H1,double tol,double Delta,function_minimizer * pfmin,
00632   double& truef,double& estdiff,double& truediff,double& bestf,
00633   int& iflag,int& inner_iter,int maxfn)
00634 {
00635   iflag=0;
00636   int n=c.indexmax();
00637   dmatrix r(1,20,1,n);
00638   dmatrix p(1,20,1,n);
00639   dmatrix x(1,20,1,n);
00640   dvector alpha(1,20);
00641   dvector beta(1,20);
00642   dmatrix H=-H1;
00643   int k=1;
00644   x(1)=0.0;
00645   r(1)=-c;
00646   p(1)=r(1);
00647   cout << norm(H*x(k)+c) << endl;
00648   cout << norm(H*x(k)-c) << endl;
00649 
00650   do
00651   {
00652     dvector w=H*p(k);
00653     alpha(k)=(r(k)*H*r(k))/norm2(w);
00654     r(k+1)=r(k)-alpha(k)*(H*p(k));
00655     beta(k)=(r(k+1)*H*r(k+1))/(r(k)*H*r(k));
00656     p(k+1)=r(k)+beta(k)*p(k);
00657     x(k+1)=x(k)+alpha(k)*p(k);
00658     k++;
00659     cout << norm(H*x(k)+c) << endl;
00660     cout << norm(H*x(k)-c) << endl;
00661   }
00662   while(1);
00663   return 0;
00664 }
00665 #endif