/** Reduction kernel function: compute the gamma value for conjugate gradient (coordinate of line minimization),
* for the calculated vs observed Poisson log-likelihood.
* Returns numerator and denominator of the coefficient in a float2 value.
*/
float2 Gamma(__global float *iobs, __global float2 *vPO, __global float2 *vPdO, __global float2 *vdPO, __global float2 *vdPdO,
             const int i, const float scale, const float background, const float dbackg, const int npsi)
{
  if((i/NXY) >= npsi) return (float2)(0,0);
  // Scaling is used to avoid overflows
  const float obs = iobs[i]*scale*scale; // Observed intensity
  float2 PO  [NBMODE];
  float2 PdO [NBMODE];
  float2 dPO [NBMODE];
  float2 dPdO[NBMODE];

  const float db = dbackg * scale * scale;

  for(int imode=0;imode<NBMODE;imode++)
  {
      PO[imode]   = vPO  [i + imode*NXYZ]*(float2)(scale,scale);
      PdO[imode]  = vPdO [i + imode*NXYZ]*(float2)(scale,scale);
      dPO[imode]  = vdPO [i + imode*NXYZ]*(float2)(scale,scale);
      dPdO[imode] = vdPdO[i + imode*NXYZ]*(float2)(scale,scale);
  }

  float R_PO_OdP_Pdo = 0;
  float sumPO2 = 0;
  float OdP_PdO_R = 0;
  for(int imode=0;imode<NBMODE;imode++)
  {
    const float2 a = dPO[imode] + PdO[imode];
    R_PO_OdP_Pdo += PO[imode].x * a.x + PO[imode].y * a.y;
    sumPO2 += dot(PO[imode], PO[imode]);
    OdP_PdO_R += dot(dPO[imode],dPO[imode]) + dot(PdO[imode],PdO[imode])
                 + 2*(PO[imode].x * dPdO[imode].x + PO[imode].y * dPdO[imode].y + PdO[imode].x * dPO[imode].x + PdO[imode].y * dPO[imode].y);
  }
  sumPO2 = fmax(sumPO2 + background*scale*scale, 1e-20f); // Should only happen with null frames used for 16-padding
  const float f = 1 - obs/sumPO2;

  // This is written to avoid overflows, not calculating (R_PO_OdP_Pdo*R_PO_OdP_Pdo)/(sumPO2*sumPO2)

  return (float2)(-(2*R_PO_OdP_Pdo + db) * f,
                  2*(OdP_PdO_R * f + obs * (4*(R_PO_OdP_Pdo/sumPO2) * ((db + R_PO_OdP_Pdo) / sumPO2) + pown(db/sumPO2,2)) / 2));
}

/** Reduction kernel function: compute the gamma value for conjugate gradient (coordinate of line minimization),
* for the calculated vs observed Poisson log-likelihood.
* Returns numerator and denominator of the coefficient in a float2 value.
* Masked pixels (when mask value is not 0) are ignored.
*/
float2 GammaMask(__global float *iobs, __global float2 *vPO, __global float2 *vPdO, __global float2 *vdPO, __global float2 *vdPdO,
                 __global char* mask, const int i, const float scale, const int npsi)
{
  if(mask[i%NXY] == 0) return  Gamma(iobs, vPO, vPdO, vdPO, vdPdO, i, scale, 0.0f, 0.0f, npsi);
  return (float2)(0,0);
}

/** Reduction kernel function: compute the gamma value for conjugate gradient (coordinate of line minimization),
* for the calculated vs observed Poisson log-likelihood.
* Returns numerator and denominator of the coefficient in a float2 value.
* Kernel with constant background.
*/
float2 GammaBackground(__global float *iobs, __global float2 *vPO, __global float2 *vPdO, __global float2 *vdPO, __global float2 *vdPdO,
                 __global float* background, const int i, const float scale, const int npsi)
{
  return  Gamma(iobs, vPO, vPdO, vdPO, vdPdO, i, scale, background[i%NXY], 0.0f, npsi);
}

/** Reduction kernel function: compute the gamma value for conjugate gradient (coordinate of line minimization),
* for the calculated vs observed Poisson log-likelihood.
* Returns numerator and denominator of the coefficient in a float2 value.
* Kernel with constant background and mask.
*/
float2 GammaBackgroundMask(__global float *iobs, __global float2 *vPO, __global float2 *vPdO, __global float2 *vdPO, __global float2 *vdPdO,
                 __global float* background, __global char* mask, const int i, const float scale, const int npsi)
{
  if(mask[i%NXY] == 0) return  Gamma(iobs, vPO, vPdO, vdPO, vdPdO, i, scale, background[i%NXY], 0.0f, npsi);
  return (float2)(0,0);
}

/** Reduction kernel function: compute the gamma value for conjugate gradient (coordinate of line minimization),
* for the calculated vs observed Poisson log-likelihood.
* Returns numerator and denominator of the coefficient in a float2 value.
* Kernel with background gradient.
*/
float2 GammaBackgroundGrad(__global float *iobs, __global float2 *vPO, __global float2 *vPdO, __global float2 *vdPO, __global float2 *vdPdO,
                 __global float* background, __global float* dbackground, const int i, const float scale, const int npsi)
{
  return  Gamma(iobs, vPO, vPdO, vdPO, vdPdO, i, scale, background[i%NXY], dbackground[i%NXY], npsi);
}

/** Reduction kernel function: compute the gamma value for conjugate gradient (coordinate of line minimization),
* for the calculated vs observed Poisson log-likelihood.
* Returns numerator and denominator of the coefficient in a float2 value.
* Kernel with background gradient and mask.
*/
float2 GammaBackgroundGradMask(__global float *iobs, __global float2 *vPO, __global float2 *vPdO, __global float2 *vdPO, __global float2 *vdPdO,
                 __global float* background, __global float* dbackground, __global char* mask, const int i, const float scale, const int npsi)
{
  if(mask[i%NXY] == 0)  return  Gamma(iobs, vPO, vPdO, vdPO, vdPdO, i, scale, background[i%NXY], dbackground[i%NXY], npsi);
  return (float2)(0,0);
}
