#define PROGNAME "stats"
#define VERSION "1.4a"
#define DATE "10.05.2007"
#define AUTHOR "Nils Bluemer"
/* program stats (successor of statnew), computes average, error etc.
expects input on stdin in 1st column
compile with cc -lm -o stats stats.c
NEW (1.3): better variance estimator (factor N/N-1)
NEW (1.4): better covariance estimator (no bias for uncorrelated data)
Corrected in 1.4: factor in corrected error (for -a) */
/* TODO (1.3): set number of histogram bins */
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#define BUFSIZE 100
#define NUMHIST 40
#define MAXDATA 100001
#define MAXKORR 1000
#define MIN(a,b) (a<b?a:b)
/***************************************************************/
void error(char error_text[])
/* standard error handler */
{
fprintf(stderr,"\nstats run-time error\n");
fprintf(stderr,"--%s--\n",error_text);
fprintf(stderr,"for general help use option -h\n");
fprintf(stderr,"...now exiting to system...\n");
exit(1);
}
/***************************************************************/
void average(char form){
double data[MAXDATA];
double korr[MAXDATA];
char buf[BUFSIZE];
int i,n,numdata,numkorr;
double sum, sqsum,nsum,av, var,korrtime,trans;
numdata=0;
sum=0.0;
sqsum=0.0;
nsum=0;
do {
if (fgets(buf,BUFSIZE,stdin)!=NULL){
numdata++;
sscanf(buf,"%lf",&data[numdata]);
sum +=data[numdata];
sqsum += data[numdata]*data[numdata];
nsum +=numdata*data[numdata];
}
} while (feof(stdin)==0);
if (numdata>MAXDATA) error ("too many data points");
if (numdata<2) error ("not enough data");
av=sum/numdata;
var=(sqsum-numdata*av*av)/(numdata-1);
/* compute transient: slope of best linear fit on data[n] */
/* a = (<xy>-<x><y>)/(<x^2>-<x>^2), here: x=numdata, y=data[numdata] */
trans=(nsum/numdata-0.5*(numdata+1)*av)/
((numdata+.5)*(numdata+1)/3-0.25*(numdata+1)*(numdata+1));
if (form=='l')
printf("Average: %10.8g variance: %10.8g error: %10.8g\n",av,var,sqrt(var/numdata));
numkorr=numdata/3;
korr[0]=1.0;
if (form=='c'){
printf("# Autocorrelation function: (i,c(i))\n");
printf ("%4d %10lf\n",0,korr[0]);
}
for (i=1;i<=numkorr;i++){
korr[i]=0.0;
for (n=1;n<=numdata-i;n++)
korr[i]+=(data[n]-av)*(data[n+i]-av);
korr[i]=(korr[i]/(var*(numdata-i))+1.0/numdata)/
((1-1.0/numdata)*(1-1.0/numdata));
if (form=='c')
printf ("%4d %10lf\n",i,korr[i]);
}
korrtime=1;
n =1;
while ((korr[n]>0)&&(n<=numkorr)){
korrtime += 2*korr[n];
n++;
}
if (form=='l')
printf ("Korrelation time: %10.8g corrected error: %10.8g transient: %10.8g\n",
korrtime, sqrt(var/numdata)*sqrt(korrtime),trans);
if (form=='s')
printf ("%10.8G %10.8G %10.8G\n", av, sqrt(var/numdata)*sqrt(korrtime), trans);
}
/***************************************************************/
void iaverage(char form){ /* for unequally spaced samples */
double datax[MAXDATA];
double datay[MAXDATA];
double korr[MAXKORR];
double korrs[MAXKORR];
double kappaloc[MAXDATA];
int korrw[MAXKORR];
char buf[BUFSIZE];
int i,j,n,numdata,numkorr;
double sum, sqsum,nsum,av, var,korrtime,trans;
double dxmin, dxmax, dxkorr, Neff;
int cut;
numdata=0;
sum=0.0;
sqsum=0.0;
nsum=0;
do {
if (fgets(buf,BUFSIZE,stdin)!=NULL){
numdata++;
sscanf(buf,"%lf %lf",&datax[numdata],&datay[numdata]);
/* printf("Nr %d: %f\n",nr,datay[numdata]); */
sum +=datay[numdata];
sqsum += datay[numdata]*datay[numdata];
nsum +=numdata*datay[numdata];
if ((numdata == 2)||((numdata > 2)&&
(datax[numdata]-datax[numdata-1]<dxmin)))
dxmin = datax[numdata]-datax[numdata-1];
}
} while (feof(stdin)==0);
if (numdata>MAXDATA) error ("too many data points");
if (numdata<2) error ("not enough data");
if (dxmin<0) error ("data not monotonous");
dxmax = datax[numdata]-datax[1];
av=sum/numdata;
var=sqsum/numdata-av*av;
/* compute transient: slope of best linear fit on datay[n] */
/* a = (<xy>-<x><y>)/(<x^2>-<x>^2), here: x=numdata, y=datay[numdata]
trans=(nsum/numdata-0.5*(numdata+1)*av)/((numdata+.5)*(numdata+1)/3-0.25*(numdata+1)*(numdata+1));*/
if (form=='l')
printf("Average: %lf, error: %lf\n",av,sqrt(var/(numdata-1)));
numkorr= MIN(dxmax/(2*dxmin),MAXKORR);
dxkorr = MIN(dxmin/1.5, dxmax/numkorr);
if (form=='c')
printf("# Autocorrelation function: (t,c(t),#points(t))\n");
for (n=0;n<=numkorr;n++){
korr[n]=0.0;
korrw[n]=0;
}
for (i=1;i<=numdata-1;i++){
j = i;
while ((j<numdata)&&(datax[++j]-datax[i]<dxkorr*numkorr)){
n = (int)floor((datax[j]-datax[i])/dxkorr);
korr[n] += (datay[i]-av)*(datay[j]-av)/var;
korrw[n]++;
}
}
if (form=='c')
for (n=0;n<=numkorr;n++)
if (korrw[n]>0)
printf ("%10lf %10lf %4d\n",(n+0.5)*dxkorr,korr[n]/korrw[n],korrw[n]);
if (form=='s') { /* 1:2:3:2:1 smoothed */
korrs[0] = (3*numdata*1.0+3*korr[0]+2*korr[1]+korr[2])/
(3*numdata+3*korrw[0]+2*korrw[1]+korrw[2]);
korrs[1] = (1*numdata*1.0+2*korr[0]+3*korr[1]+2*korr[2]+korr[3])/
(1*numdata+2*korrw[0]+3*korrw[1]+2*korrw[2]+korrw[3]);
for (n=2;n<=numkorr-2;n++)
korrs[n] = (korr[n-2]+2*korr[n-1]+3*korr[n]+2*korr[n+1]+korr[n+2])/
(korrw[n-2]+2*korrw[n-1]+3*korrw[n]+2*korrw[n+1]+korrw[n+2]);
printf ("%10lf %10lf\n",0.0,1.0);
for (n=0;n<=numkorr-2;n++)
printf ("%10lf %10lf \n",(n+0.5)*dxkorr,korrs[n]);
korrtime = dxkorr;
cut = 1;
while ((korrs[cut]>0)&&(cut<=numkorr)){
korrtime += 2*korrs[n]*dxkorr;
cut++;
}
cut--;
for (i=1;i<=numdata;i++)
kappaloc[i]=1.0;
for (i=1;i<=numdata;i++)
for (j=i+1;j<=numdata;j++){
n = (int)floor((datax[j]-datax[i])/dxkorr);
if (n<=cut){
kappaloc[i]+= korrs[n];
kappaloc[j]+= korrs[n];
}
}
/* for (i=1;i<=numdata;i++)
printf ("%10lf\n", kappaloc[i]); */
Neff = 0.0;
for (i=1;i<=numdata;i++)
Neff += 1.0/kappaloc[i];
printf ("# N: %d, Neff: %lf, corrected error: %lf, transient: %lf\n",
numdata, Neff, sqrt(var/(Neff-1)),trans);
}
}
/***************************************************************/
void printhelp ()
{
printf("**********************************************************\n");
printf("%s: statistics utility (average, histogram, etc.)\n",PROGNAME);
printf("Version: %s, %s by %s\n",VERSION,DATE,AUTHOR);
printf("input: standardin, 1 column\n");
printf("options: -a average\n");
printf(" -s average, short output\n");
printf(" -c autocorrelation function (used for averaging)\n");
printf(" -i histogram\n");
printf(" -r average, uneven spacing\n");
printf(" -w weighted histogram (input here: y, delta_y)\n");
printf(" -h this help\n");
}
/***************************************************************/
void histogram ()
{
double data[MAXDATA];
double hist[NUMHIST];
char buf[BUFSIZE];
int i,n,numdata;
double min, max, x;
numdata=0;
do {
if (fgets(buf,BUFSIZE,stdin)!=NULL){
numdata++;
sscanf(buf,"%lf",&data[numdata]);
/* printf("Nr %d: %f\n",nr,data[numdata]); */
}
} while (feof(stdin)==0);
/* printf ("stat/histogram: read %d lines\n",numdata); */
if (numdata>MAXDATA) error ("too many data points");
if (numdata<2) error ("not enough data");
min = data[1];
max = data[1];
for (i=2;i<=numdata;i++){
if (data[i]<min) min=data[i];
if (data[i]>max) max = data[i];
}
/* printf ("min: %lf, max: %lf\n",min,max); */
for (i=0;i<NUMHIST;i++) hist[i]==0.0;
for (i=1;i<=numdata;i++){
x = NUMHIST/(max-min)*(data[i]-min)*0.999999;
hist[(int)floor(x)] += 1.0*NUMHIST/((max-min)*numdata);
/* printf("x: %lf\n",x); */
}
for (i=0;i<NUMHIST;i++)
printf ("%8.6lf %8.6lf\n",(i+0.5)/NUMHIST*(max-min)+min, hist[i]);
}
/***************************************************************/
void whistogram () /*weighted histogram*/
{
double data[MAXDATA];
double datasig[MAXDATA];
double hist[NUMHIST];
char buf[BUFSIZE];
int i,n,numdata;
double min, max, x, signorm;
numdata=0;
do {
if (fgets(buf,BUFSIZE,stdin)!=NULL){
numdata++;
sscanf(buf,"%lf %lf",&data[numdata],&datasig[numdata]);
if (datasig[numdata]==0) error ("error cannot be zero for weighting!");
}
} while (feof(stdin)==0);
if (numdata>MAXDATA) error ("too many data points");
if (numdata<2) error ("not enough data");
min = data[1];
max = data[1];
signorm = 1.0/(datasig[1]*datasig[1]);
for (i=2;i<=numdata;i++){
if (data[i]<min) min=data[i];
if (data[i]>max) max = data[i];
signorm += 1.0/(datasig[i]*datasig[i]);
}
/* printf ("min: %lf, max: %lf\n",min,max); */
for (i=0;i<NUMHIST;i++) hist[i]==0.0;
for (i=1;i<=numdata;i++){
x = NUMHIST/(max-min)*(data[i]-min)*0.999999;
hist[(int)floor(x)] += 1.0*NUMHIST/(datasig[i]*datasig[i]*signorm*
(max-min));
/* printf("x: %lf\n",x); */
}
for (i=0;i<NUMHIST;i++)
printf ("%8.6lf %8.6lf\n",(i+0.5)/NUMHIST*(max-min)+min, hist[i]);
}
/***************************************************************/
int main (int argc, char *argv[])
{
char c;
while (--argc > 0 && (*++argv)[0] == '-')
while (c= *++argv[0])
switch (c) {
case 'a':
average('l');
break;
case 's':
average('s');
break;
case 'c':
average('c');
break;
case 'r':
iaverage('s');
break;
case 'i':
histogram();
break;
case 'w':
whistogram();
break;
case 'h':
printhelp();
exit(0);
default:
error("No valid choice");
}
}
syntax highlighted by Code2HTML, v. 0.9.1