ADMB Documentation  11.5.3197
 All Classes Files Functions Variables Typedefs Friends Defines
dmultinom.cpp
Go to the documentation of this file.
00001 #include "statsLib.h"
00002 
00015 dvariable dmultinom(const dvector& x, const dvar_vector& p)
00016 {
00017   if(x.indexmin() != p.indexmin() || x.indexmax() != p.indexmax())
00018   {
00019     cerr << "Index bounds do not macth in"
00020     " dmultinom(const dvector& x, const dvar_vector& p)\n";
00021     ad_exit(1);
00022   }
00023 
00024   double n=sum(x);
00025   return -gammln(n+1.)+sum(gammln(x+1.))-x*log(p/sum(p));
00026 }
00027 
00028 double neff(const dvector& obs, const dvar_vector& pred)
00029 {
00030   dvector resid=value(obs-pred);
00031   dvector var=value(elem_prod(1.-pred,pred));
00032   return sum(var)/norm2(resid);
00033 }
00034 
00035 // dvariable dmultinom(const dmatrix o, const dvar_matrix& p,dvar_matrix& nu,double& tau2,const double minp, double &neff)
00036 // {
00037 //  RETURN_ARRAYS_INCREMENT();
00038 //  dvariable nloglike = dmultinom(o,p,nu,tau2,minp);
00039 //  int t = o.rowmin();
00040 //  int T = o.rowmax();
00041 //  dvector n(t,T);
00042 //  for(int i = t; i <= T; i++ )
00043 //  {
00044 //    n(i) = neff(o(i),p(i));
00045 //  }
00046 //  neff = mean(n(i));
00047 //  RETURN_ARRAYS_DECREMENT();
00048 //  return(nloglike);
00049 // }
00050 dvariable dmultinom(const dmatrix o, const dvar_matrix& p,dvar_matrix& nu,double& tau2,const double minp)
00051 { // returns the negative loglikelihood
00052   /*
00053      uses Martell dmvlogistic code for grouping age classes
00054      with observed proportions < minp
00055      NB minp must be greater than 0, otherwise algorithm returns
00056      an error if one of the observed proportions is zero.
00057      tau2 returns the median absolute standardized residual
00058 
00059   FIX ME SM I'm getting an array out of Bounds error in here for gear3
00060     has to do with the if statement (if minp >1.-4) because Ncount is only
00061     1.  I've commented the if statement out for now.
00062   */
00063   RETURN_ARRAYS_INCREMENT();
00064   int i,j,k,n;
00065   int a = o.colmin();
00066   int A=o.colmax();
00067   int t=o.rowmin();
00068   int T=o.rowmax();
00069   dvector tmptau(1,A*T);  // vector of residuals
00070     int Ncount=1;
00071     dvariable Nsamp;           // multinomial sample size
00072   //FIXME NB Make proc_err into a switch in the control file
00073   //add this likelihood description to the documentation.
00074     dvariable proc_err=0.009;   // allow for process error in the pred.age freq...fixed value based on HCAM assessments
00075   nu.initialize();
00076   dvariable nloglike=0.;
00077     //ofstream ofs("check.tmp");
00078 
00079   for(i=t; i<=T; i++)
00080   {
00081     //cout<<"Ok to here 1"<<endl;
00082     Nsamp=sum(o(i))/(1.+proc_err*sum(o(i)));
00083     n=0;
00084     dvector oo = o(i)/sum(o(i));
00085     dvar_vector pp = p(i)/sum(p(i));
00086 
00087     //count # of observations greater than minp (2% is a reasonable number)
00088     for(j=a;j<=A;j++)
00089       if(oo(j) > minp)n++;
00090 
00091     ivector iiage(1,n);
00092     dvector o1(1,n); o1.initialize();
00093     dvar_vector p1(1,n); p1.initialize();
00094     k=1;
00095     for(j=a;j<=A;j++)
00096     {
00097       if(oo(j)<=minp)
00098       {
00099         o1(k)+=oo(j);
00100         p1(k)+=pp(j);
00101       }
00102       else
00103       {
00104         o1(k)+=oo(j);
00105         p1(k)+=pp(j);
00106         if(k<=n)iiage(k)=j;   //ivector for the grouped residuals
00107         if(k<n) k++;
00108       }
00109     }
00110     /*
00111     //assign residuals to nu based on iiage index
00112     dvar_vector t1 = elem_div(o1-p1,sqrt(elem_prod(p1,1.-p1)/Nsamp));
00113     for(j=1;j<=n;j++)
00114     {
00115       nu(i)(iiage(j))=t1(j);
00116       tmptau(Ncount++)=sqrt(square(value(t1(j))));
00117     }
00118     */
00119 
00120     //CHANGED Later addition from Viv to prevent crashes if
00121     //min(p1) is very small.
00122     //if(min(p1)>1e-4)
00123     {
00124       dvar_vector t1 = elem_div(o1-p1,sqrt(elem_prod(p1,1.-p1)/Nsamp));
00125       for(j=1;j<=n;j++)
00126       {
00127         nu(i)(iiage(j))=t1(j);
00128         tmptau(Ncount++)=sqrt(square(value(t1(j))));
00129       }
00130     }
00131     //end of addition.
00132     // negative log Mulitinomial with constant is:
00133     // r = -1.*(gammln(Nsamp+1)+sum(Nsamp*o1(log(p1))-gammln(Nsamp+1)));
00134     // TODO add calculation for effective sample size.
00135     /*
00136       TODO Neff = sum(elem_prod(p1,1.-p1))/sum(square(o1-p1));
00137       for each year.  Plot the Nsamp vs Neff and look for a 1:1 slope.
00138     */
00139 
00140     nloglike+=sum(-1.*elem_prod(Nsamp*o1,log(p1))+
00141           elem_prod(Nsamp*o1,log(o1)));
00142     //cout<<"Ok to here 2"<<endl;
00143   }
00144 
00145   dvector w=sort(tmptau(1,Ncount-1));
00146   //cout<<"All good "<<Ncount<<endl;
00147   tau2=w(int(Ncount/2.)); //median absolute residual (expected value of 0.67ish)
00148 
00149   RETURN_ARRAYS_DECREMENT();
00150   return(nloglike);
00151 }
00152 
00153