#define PROGNAME "stats"
#define VERSION "1.4a"
#define DATE  "10.05.2007"
#define AUTHOR "Nils Bluemer"

/* program stats (successor of statnew), computes average, error etc.
   expects input on stdin in 1st column
   compile with cc -lm -o stats stats.c
   NEW (1.3): better variance estimator (factor N/N-1)
   NEW (1.4): better covariance estimator (no bias for uncorrelated data) 
   Corrected in 1.4: factor in corrected error (for -a) */

/* TODO (1.3): set number of histogram bins */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#define BUFSIZE 100
#define NUMHIST 40
#define MAXDATA 100001
#define MAXKORR 1000
#define MIN(a,b) (a<b?a:b)

/***************************************************************/

void error(char error_text[])
/* standard error handler */
{
    fprintf(stderr,"\nstats run-time error\n");
    fprintf(stderr,"--%s--\n",error_text);
    fprintf(stderr,"for general help use option -h\n");
    fprintf(stderr,"...now exiting to system...\n");
    exit(1);
}


/***************************************************************/

void average(char form){
  double data[MAXDATA];
  double korr[MAXDATA]; 
  char buf[BUFSIZE];
  int i,n,numdata,numkorr;
  double sum, sqsum,nsum,av, var,korrtime,trans;

  numdata=0;
  sum=0.0;
  sqsum=0.0;
  nsum=0;
  do {
    if (fgets(buf,BUFSIZE,stdin)!=NULL){
      numdata++;
      sscanf(buf,"%lf",&data[numdata]);
      sum +=data[numdata];
      sqsum += data[numdata]*data[numdata];
      nsum +=numdata*data[numdata];
    }
  } while (feof(stdin)==0);
  if (numdata>MAXDATA) error ("too many data points");
  if (numdata<2) error ("not enough data");
  av=sum/numdata;
  var=(sqsum-numdata*av*av)/(numdata-1);
  /* compute transient: slope of best linear fit on data[n] */
  /* a = (<xy>-<x><y>)/(<x^2>-<x>^2), here: x=numdata, y=data[numdata] */
  trans=(nsum/numdata-0.5*(numdata+1)*av)/
    ((numdata+.5)*(numdata+1)/3-0.25*(numdata+1)*(numdata+1));
  if (form=='l')
    printf("Average: %10.8g   variance: %10.8g   error: %10.8g\n",av,var,sqrt(var/numdata));

  numkorr=numdata/3;

  korr[0]=1.0;
  if (form=='c'){
    printf("# Autocorrelation function: (i,c(i))\n");
      printf ("%4d %10lf\n",0,korr[0]); 
  }
  for (i=1;i<=numkorr;i++){
    korr[i]=0.0;
    for (n=1;n<=numdata-i;n++)
      korr[i]+=(data[n]-av)*(data[n+i]-av);
    korr[i]=(korr[i]/(var*(numdata-i))+1.0/numdata)/
      ((1-1.0/numdata)*(1-1.0/numdata));
    if (form=='c')
      printf ("%4d %10lf\n",i,korr[i]); 
  }
  korrtime=1;
  n =1;
  while ((korr[n]>0)&&(n<=numkorr)){
    korrtime += 2*korr[n];
    n++;
  }
  if (form=='l') 
    printf ("Korrelation time: %10.8g   corrected error: %10.8g   transient: %10.8g\n",
	    korrtime, sqrt(var/numdata)*sqrt(korrtime),trans);
  if (form=='s')
    printf ("%10.8G  %10.8G  %10.8G\n", av, sqrt(var/numdata)*sqrt(korrtime), trans);
}

/***************************************************************/

void iaverage(char form){ /* for unequally spaced samples */
  double datax[MAXDATA];
  double datay[MAXDATA];
  double korr[MAXKORR]; 
  double korrs[MAXKORR]; 
  double kappaloc[MAXDATA];
  int korrw[MAXKORR];
  char buf[BUFSIZE];
  int i,j,n,numdata,numkorr;
  double sum, sqsum,nsum,av, var,korrtime,trans;
  double dxmin, dxmax, dxkorr, Neff;
  int cut;

  numdata=0;
  sum=0.0;
  sqsum=0.0;
  nsum=0;
  do {
    if (fgets(buf,BUFSIZE,stdin)!=NULL){
      numdata++;
      sscanf(buf,"%lf %lf",&datax[numdata],&datay[numdata]);
/*       printf("Nr %d: %f\n",nr,datay[numdata]); */
      sum +=datay[numdata];
      sqsum += datay[numdata]*datay[numdata];
      nsum +=numdata*datay[numdata];
      if ((numdata == 2)||((numdata > 2)&&
			   (datax[numdata]-datax[numdata-1]<dxmin)))
	dxmin = datax[numdata]-datax[numdata-1];
    }
  } while (feof(stdin)==0);
  if (numdata>MAXDATA) error ("too many data points");
  if (numdata<2) error ("not enough data");
  if (dxmin<0) error ("data not monotonous");
  dxmax = datax[numdata]-datax[1];

  av=sum/numdata;
  var=sqsum/numdata-av*av;
  /* compute transient: slope of best linear fit on datay[n] */
  /* a = (<xy>-<x><y>)/(<x^2>-<x>^2), here: x=numdata, y=datay[numdata] 
  trans=(nsum/numdata-0.5*(numdata+1)*av)/((numdata+.5)*(numdata+1)/3-0.25*(numdata+1)*(numdata+1));*/
  if (form=='l')
    printf("Average: %lf, error: %lf\n",av,sqrt(var/(numdata-1)));

  numkorr= MIN(dxmax/(2*dxmin),MAXKORR);
  dxkorr = MIN(dxmin/1.5, dxmax/numkorr);
  if (form=='c')
    printf("# Autocorrelation function: (t,c(t),#points(t))\n");
  for (n=0;n<=numkorr;n++){
    korr[n]=0.0;
    korrw[n]=0;
  }
  
  for (i=1;i<=numdata-1;i++){
    j = i;
    while ((j<numdata)&&(datax[++j]-datax[i]<dxkorr*numkorr)){
      n = (int)floor((datax[j]-datax[i])/dxkorr);
      korr[n] += (datay[i]-av)*(datay[j]-av)/var;
      korrw[n]++;
    }
  }

  if (form=='c')
    for (n=0;n<=numkorr;n++)
      if (korrw[n]>0)
	printf ("%10lf %10lf %4d\n",(n+0.5)*dxkorr,korr[n]/korrw[n],korrw[n]); 

  if (form=='s') { /* 1:2:3:2:1 smoothed */
    korrs[0] = (3*numdata*1.0+3*korr[0]+2*korr[1]+korr[2])/
      (3*numdata+3*korrw[0]+2*korrw[1]+korrw[2]);
    korrs[1] = (1*numdata*1.0+2*korr[0]+3*korr[1]+2*korr[2]+korr[3])/
      (1*numdata+2*korrw[0]+3*korrw[1]+2*korrw[2]+korrw[3]);
    for (n=2;n<=numkorr-2;n++)
      korrs[n] = (korr[n-2]+2*korr[n-1]+3*korr[n]+2*korr[n+1]+korr[n+2])/
	(korrw[n-2]+2*korrw[n-1]+3*korrw[n]+2*korrw[n+1]+korrw[n+2]);
    printf ("%10lf %10lf\n",0.0,1.0);
    for (n=0;n<=numkorr-2;n++)
      printf ("%10lf %10lf \n",(n+0.5)*dxkorr,korrs[n]); 

    korrtime = dxkorr;
    cut = 1;
    while ((korrs[cut]>0)&&(cut<=numkorr)){
      korrtime += 2*korrs[n]*dxkorr;
      cut++;
    }
    cut--;

    for (i=1;i<=numdata;i++)
      kappaloc[i]=1.0;

    for (i=1;i<=numdata;i++)
      for (j=i+1;j<=numdata;j++){
	n = (int)floor((datax[j]-datax[i])/dxkorr);
	if (n<=cut){
	  kappaloc[i]+= korrs[n];
	  kappaloc[j]+= korrs[n];
	}
      }

/*     for (i=1;i<=numdata;i++) 
         printf ("%10lf\n", kappaloc[i]); */

    Neff = 0.0;
    for (i=1;i<=numdata;i++)
      Neff += 1.0/kappaloc[i];
    printf ("# N: %d, Neff: %lf, corrected error: %lf, transient: %lf\n",
	    numdata, Neff, sqrt(var/(Neff-1)),trans);

  } 
}

/***************************************************************/

void printhelp ()
{
 printf("**********************************************************\n");
 printf("%s: statistics utility (average, histogram, etc.)\n",PROGNAME);
 printf("Version: %s, %s by %s\n",VERSION,DATE,AUTHOR);
 printf("input:  standardin, 1 column\n");
 printf("options: -a average\n");
 printf("         -s average, short output\n");
 printf("         -c autocorrelation function (used for averaging)\n");
 printf("         -i histogram\n");
 printf("         -r average, uneven spacing\n");
 printf("         -w weighted histogram (input here: y, delta_y)\n");
 printf("         -h this help\n");
}

/***************************************************************/

void histogram ()
{
  double data[MAXDATA];
  double hist[NUMHIST];
  char buf[BUFSIZE];
  int i,n,numdata;
  double min, max, x;

  numdata=0;
  do {
    if (fgets(buf,BUFSIZE,stdin)!=NULL){
      numdata++;
      sscanf(buf,"%lf",&data[numdata]);
/*       printf("Nr %d: %f\n",nr,data[numdata]); */
    }
  } while (feof(stdin)==0);
/*   printf ("stat/histogram: read %d lines\n",numdata); */
  if (numdata>MAXDATA) error ("too many data points");
  if (numdata<2) error ("not enough data");
  min = data[1];
  max = data[1];
  for (i=2;i<=numdata;i++){
    if (data[i]<min) min=data[i];
    if (data[i]>max) max = data[i];
  }
/*   printf ("min: %lf, max: %lf\n",min,max); */
  for (i=0;i<NUMHIST;i++) hist[i]==0.0;
  for (i=1;i<=numdata;i++){
    x = NUMHIST/(max-min)*(data[i]-min)*0.999999;
    hist[(int)floor(x)] += 1.0*NUMHIST/((max-min)*numdata);
/*     printf("x: %lf\n",x); */
  }
  for (i=0;i<NUMHIST;i++) 
    printf ("%8.6lf   %8.6lf\n",(i+0.5)/NUMHIST*(max-min)+min, hist[i]);
  
}

/***************************************************************/

void whistogram () /*weighted histogram*/
{
  double data[MAXDATA];
  double datasig[MAXDATA];
  double hist[NUMHIST];
  char buf[BUFSIZE];
  int i,n,numdata;
  double min, max, x, signorm;

  numdata=0;
  do {
    if (fgets(buf,BUFSIZE,stdin)!=NULL){
      numdata++;
      sscanf(buf,"%lf %lf",&data[numdata],&datasig[numdata]);
      if (datasig[numdata]==0) error ("error cannot be zero for weighting!");
    }
  } while (feof(stdin)==0);
  if (numdata>MAXDATA) error ("too many data points");
  if (numdata<2) error ("not enough data");
  min = data[1];
  max = data[1];
  signorm = 1.0/(datasig[1]*datasig[1]);
  for (i=2;i<=numdata;i++){
    if (data[i]<min) min=data[i];
    if (data[i]>max) max = data[i];
    signorm += 1.0/(datasig[i]*datasig[i]);
  }
/*   printf ("min: %lf, max: %lf\n",min,max); */
  for (i=0;i<NUMHIST;i++) hist[i]==0.0;
  for (i=1;i<=numdata;i++){
    x = NUMHIST/(max-min)*(data[i]-min)*0.999999;
    hist[(int)floor(x)] += 1.0*NUMHIST/(datasig[i]*datasig[i]*signorm*
					(max-min));
/*     printf("x: %lf\n",x); */
  }
  for (i=0;i<NUMHIST;i++) 
    printf ("%8.6lf   %8.6lf\n",(i+0.5)/NUMHIST*(max-min)+min, hist[i]);
  
}

/***************************************************************/

int main (int argc, char *argv[])
{
  char c;
  while (--argc > 0 && (*++argv)[0] == '-')
    while (c= *++argv[0])
	   switch (c) {
	   case 'a':
	     average('l');
	     break;
	   case 's':
	     average('s');
	     break;
	   case 'c':
	     average('c');
	     break;
	   case 'r':
	     iaverage('s');
	     break;
	   case 'i':
	     histogram();
	     break;
	   case 'w':
	     whistogram();
	     break;
	   case 'h':
	     printhelp();
	     exit(0);
	   default:
	     error("No valid choice");
	   }
}


syntax highlighted by Code2HTML, v. 0.9.1