// ASHC expected value of a response variable conditioning on location (x,y) and another response variable #include f#include #include #include #include "avexec32.h" /* COPYRIGHT 2000,2001,2002 BY DAVID W. SCOTT, WILLIAM C. WOJCIECHOWSKI, J. BLAIR CHRISTIAN OF RICE UNIVERSITY, DEPT OF STATISTICS THIS IS PUBLIC DOMAIN SOFTWARE UNDER THE LGPL LICENSE. DISCLAIMER OF WARRANTIES: THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND. THE AUTHORS DO NOT WARRANT, GUARANTEE OR MAKE ANY REPRESENTATIONS REGARDING THE SOFTWARE OR DOCUMENTATION IN TERMS OF THEIR CORRECTNESS, RELIABILITY, CURRENTNESS, OR OTHERWISE. THE ENTIRE RISK AS TO THE RESULTS AND PERFORMANCE OF THE SOFTWARE IS ASSUMED BY YOU. IN NO CASE WILL ANY PARTY INVOLVED WITH THE CREATION OR DISTRIBUTION OF THE SOFTWARE BE LIABLE FOR ANY DAMAGE THAT MAY RESULT FROM THE USE OF THIS SOFTWARE. Sorry about that. ---------------------------------------------------------- BRIEF DESCRIPTION ---------------------------------------------------------- Below is our data structure for an "ASH". ---------------------------------------------------------- WHAT WE START WITH: (INPUT) ---------------------------------------------------------- DATA- (x,y,z,z2) (as 4 arrays). (x,y) is the location (such as centroid of county data), z is the response variable (such as cancer mortality in county data), and z2 is the conditioning variable (such as a cancer screening rate in county data) ASH INPUTS- The ash makes a grid around the data. The grid you see on the final map is of size nbinx * nbiny (for the continental US, nbinx = 200 and nbiny = 100 give you a grid of squares, since the US is about twice as wide as it is tall). Something you don't "see" (this is what the final map is) is the bin of the conditioning variable, nbinz2. The final map is actually a map of the response variable for areas of the conditioning variable in the range of the current bin (currbinz2). In our example of cancer mortality screening, the map produced is the "smooth conditional map" of cancer mortality for the range of screening values we consider (currbinz2). However, to make our map smooth, we use a kernel smoother (set in the "weight" function). We can control the area that the kernel smooths over (how "big" our smooth is). By choosing a smoothing parameter in each direction, (mx,my,mz2), we are able to choose the number of bins that we want to condition over. You really need to see an example of this to get an intuitive idea. As the m? values approach the nbin? size, the map becomes more and more smooth, with the limit (when m?=nbin?) being a map that is one value, the global average of the response variable at that level (of conditioning=currbinz2). How do you choose the smoothing parameter? I have been using about 10% of the nbiny (for both mx and my), and leaving the mz2 at 1. For the map of the US, it's nice to use the same smoothing parameter even though we use different bin sizes (we want to smooth the same amount in both directions). ---------------------------------------------------------- WHAT WE END UP WITH: (OUTPUT) ---------------------------------------------------------- At the end, we have the expected values of our response variable for each bin (r, which is nbinx * nbiny). To find the corners of the bins, or whatever output file format you want, see "ASHC_Add_Polygons" for example. ---------------------------------------------------------- NOTATION: ---------------------------------------------------------- I USE ASH AND ASHC INTERCHANGABLY HERE. STRICTLY, ASHC STANDS FOR CONDITIONAL ASH (THERE IS A VERSION OF THIS CODE, ASH, WHICH ONLY SMOOTHS). TO ONLY SMOOTH USING THIS CODE, SIMPLY MAKE NBINZ2=1, AND SET CURRBINZ2=1. THIS PRODUCES A SMOOTH MAP OF ALL OF THE RESPONSE VARIABLE DATA */ /* ---------------------------------------------------------- VARIABLE DESCRIPTION ---------------------------------------------------------- n The number of points we have (a "point" here is an (x,y) location, as well as the values of two variable of interest, z, z2 SET OUTSIDE INPUT nbinx The number of bins in the x (horizontal) direction (mesh size/grid size) SET OUTSIDE INPUT nbiny In the y (vertical) direction SET OUTSIDE INPUT nbinz2 The number of bins in the conditioning variable SET OUTSIDE INPUT currbinz2 The current bin (set of values/quantile) we're conditioning on SET OUTSIDE INPUT mx,my,mz2 The smoothing parameters SET OUTSIDE INPUT x,y,z,z2 The (x,y) values and response variable (z, ie cancer mortality) and the conditioning variable (z2, ie screening method) SET OUTSIDE INPUT ax,ay,az2,bx,by,bz2 These values, (a,b) are the min and max values of the grid in each direction (although the code takes the min/max values and makes them slightly smaller/larger so that they are inside the mesh (in ASHC_Get_HY, ay<- ay-0.0001f, by<- by+0.0001f) SET INTERNALLY cx,cy,cz2 Are the midpoints of the bins, so cx is an array of length nbinx, whose values are the center of each bin. SET INTERNALLY dx,dy,dz2 Are the unit lengths of each bin, dx=(bx-ax)/nbinx. SET INTERNALLY v,s The 3-D grid where everything is binned, the size is nbinx * nbiny * nbinz2. v is the bin count (ints), and the s is a trick used in calculating the ASH (See Scott, David "Multivariate Density Estimation", CH 5). Both are initialized to zero. SET INTERNALLY r,f These are the values on the grid, so are length nbinx * nbiny (the (x,y) grid) r is the value of the grid, and f is used for internal ASH computations. SET INTERNALLY OUTPUT */ typedef struct { long n,nbinx,nbiny,nbinz2,currbinz2; long mx,my,mz2; float dx,dy,dz2; float ax,ay,az2; float bx,by,bz2; float* x; float* y; float* z; float* z2; float* cx; float* cy; float* cz2; long*** v; float*** s; float** r; float** f; } ASHCStruct; typedef ASHCStruct ASHCdata; typedef ASHCStruct* ASHCptr; /* ---------------------------------------------------------- THESE FUNCTIONS ARE USED TO FIND THE MIN/MAX VALUES OF A LIST (USED IN CALCULATING A? AND B?-- VMAX AND VMIN TAKE IN VECTORS (ARRAYS) AND RETURN THE MIN/MAX VALUES OF THE VECTORS ---------------------------------------------------------- */ long ashmaxl(long a, long b) { return (a > b) ? a : b; } float ashmax(float a, float b) { return (a > b) ? a : b; } long ashminl(long a, long b) { return (a < b) ? a : b; } float ashmin(float a, float b) { return (a < b) ? a : b; } float vmax(float *x, long n) { long i=0; float m = x[0]; for(i=1;iv = (long***)malloc(nbinx*sizeof(long**)); ash->s = (float***)malloc(nbinx*sizeof(float**)); ash->r = (float**)malloc(nbinx*sizeof(float*)); ash->f = (float**)malloc(nbinx*sizeof(float*)); ash->cx = (float*)malloc(nbinx*sizeof(float)); ash->cy = (float*)malloc(nbiny*sizeof(float)); ash->cz2 = (float*)malloc(nbinz2*sizeof(float)); for(k=0;kv)[k]=(long**)malloc(nbiny*sizeof(long*)); (ash->s)[k]=(float**)malloc(nbiny*sizeof(float*)); (ash->r)[k]=(float*)malloc(nbiny*sizeof(float)); (ash->f)[k]=(float*)malloc(nbiny*sizeof(float)); for(j=0;jv)[k][j]=(long*)malloc(nbinz2*sizeof(long)); (ash->s)[k][j]=(float*)malloc(nbinz2*sizeof(float)); for(i=0;iv)[k][j][i]=0; (ash->s)[k][j][i]=0.0f; } (ash->r)[k][j]=0.0f; (ash->f)[k][j]=0.0f; } } ash->nbinx = nbinx; ash->nbiny = nbiny; ash->nbinz2 = nbinz2; ash->currbinz2 = currbinz2; } // // Must call this first to create a new ashptr // /* ---------------------------------------------------------- THIS IS THE CALL THAT ALLOCATES MEMORY WHENEVER AN ASH OBJECT IS CREATED ---------------------------------------------------------- */ __declspec(dllexport) ASHCdata* ASHC_Create_Data(long n, long nbinx, long nbiny, long nbinz2,long currbinz2, long mx, long my, long mz2) { long k = 0; long j = 0; ASHCdata* ash = malloc(sizeof(ASHCdata)); ash->x = (float*)malloc(n*sizeof(float)); ash->y = (float*)malloc(n*sizeof(float)); ash->z = (float*)malloc(n*sizeof(float)); ash->z2 = (float*)malloc(n*sizeof(float)); ASHC_Create_VSRF(nbinx,nbiny,nbinz2,currbinz2,ash); ash->n=n; ash->nbinx=nbinx; ash->nbiny=nbiny; ash->nbinz2=nbinz2; ash->currbinz2=currbinz2; ash->mx=mx; ash->my=my; ash->mz2=mz2; return ash; } /* ---------------------------------------------------------- FREES V,S,R,F AND C?-- CALLED BELOW IN ASHC_FREE_DATA ---------------------------------------------------------- */ void ASHC_Free_VSRF(ASHCptr ash) { long k = 0; long i = 0; long nbinx = ash->nbinx; long nbiny = ash->nbiny; for(k=0;kv)[k][i]); free((ash->s)[k][i]); } free((ash->v)[k]); free((ash->s)[k]); free((ash->r)[k]); free((ash->f)[k]); } free(ash->v); free(ash->s); free(ash->r); free(ash->f); free(ash->cx); free(ash->cy); free(ash->cz2); } // // Frees the memorey in ashptr // /* ---------------------------------------------------------- FREE ASH MEMORY FREES X,Y,Z,Z2 AND CALLS ASHC_FREE_VSRF WHICH FREES V,S,R,F AND C? ---------------------------------------------------------- */ __declspec(dllexport) void ASHC_Free_Data(ASHCdata* ash) { free(ash->x); free(ash->y); free(ash->z); free(ash->z2); ASHC_Free_VSRF(ash); free(ash); } /* ---------------------------------------------------------- SETTERS!!!!!!! SOME SETTERS REQUIRE REALLOCATING MEMORY ---------------------------------------------------------- */ __declspec(dllexport) void ASHC_Set_X(float x, long i, ASHCptr ash) { ash->x[i]=x; } __declspec(dllexport) void ASHC_Set_Y(float y, long i, ASHCptr ash) { ash->y[i]=y; } __declspec(dllexport) void ASHC_Set_Z(float z, long i, ASHCptr ash) { ash->z[i]=z; } __declspec(dllexport) void ASHC_Set_Z2(float z2, long i, ASHCptr ash) { ash->z2[i]=z2; } __declspec(dllexport) void ASHC_Set_N(long n, ASHCptr ash) { ash->n = n; } __declspec(dllexport) void ASHC_Set_NBINX(long nbinx, ASHCptr ash) { ASHC_Free_VSRF(ash); ASHC_Create_VSRF(nbinx, ash->nbiny, ash->nbinz2, ash->currbinz2, ash); } __declspec(dllexport) void ASHC_Set_NBINY(long nbiny, ASHCptr ash) { ASHC_Free_VSRF(ash); ASHC_Create_VSRF(ash->nbinx, nbiny, ash->nbinz2, ash->currbinz2, ash); } __declspec(dllexport) void ASHC_Set_NBINZ2(long nbinz2, ASHCptr ash) { ASHC_Free_VSRF(ash); ASHC_Create_VSRF(ash->nbinx, ash->nbiny, nbinz2, ash->currbinz2, ash); } __declspec(dllexport) void ASHC_Set_CURRBINZ2(long currbinz2, ASHCptr ash) { ASHC_Free_VSRF(ash); ASHC_Create_VSRF(ash->nbinx,ash->nbiny,ash->nbinz2, currbinz2, ash); } __declspec(dllexport) void ASHC_Set_MX(long mx, ASHCptr ash) { ash->mx = mx; } __declspec(dllexport) void ASHC_Set_MY(long my, ASHCptr ash) { ash->my = my; } __declspec(dllexport) void ASHC_Set_MZ2(long mz2, ASHCptr ash) { ash->mz2 = mz2; } /* ---------------------------------------------------------- GETTERS!!!! H? ARE D? * M?, NO LONGER USED BUT MAYBE USEFUL LATER H? IS THE VALUE OF THE BIG HISTOGRAM BIN (SEE SCOTT, CH5) ---------------------------------------------------------- */ __declspec(dllexport) float ASHC_Get_HX(ASHCptr ash) { long n = ash->n; long nbinx = ash->nbinx; float ax = vmin(ash->x,n)-0.0001f; float bx = vmax(ash->x,n)+0.0001f; float dx = (bx-ax)/nbinx; return (ash->mx)*dx; } __declspec(dllexport) float ASHC_Get_HY(ASHCptr ash) { long n = ash->n; long nbiny = ash->nbiny; float ay = vmin(ash->y,n)-0.0001f; float by = vmax(ash->y,n)+0.0001f; float dy = (by-ay)/nbiny; return (ash->my)*dy; } __declspec(dllexport) float ASHC_Get_HZ2(ASHCptr ash) { long n = ash->n; long nbinz2 = ash->nbinz2; float az2 = vmin(ash->z2,n)-0.0001f; float bz2 = vmax(ash->z2,n)+0.0001f; float dz2 = (bz2-az2)/nbinz2; return (ash->mz2)*dz2; } __declspec(dllexport) long ASHC_Get_NBINX(ASHCptr ash) { return ash->nbinx; } __declspec(dllexport) long ASHC_Get_NBINY(ASHCptr ash) { return ash->nbiny; } __declspec(dllexport) long ASHC_Get_NBINZ2(ASHCptr ash) { return ash->nbinz2; } __declspec(dllexport) long ASHC_Get_CURRBINZ2(ASHCptr ash) { return ash->currbinz2; } __declspec(dllexport) long ASHC_Get_MX(ASHCptr ash) { return ash->mx; } __declspec(dllexport) long ASHC_Get_MY(ASHCptr ash) { return ash->my; } __declspec(dllexport) long ASHC_Get_MZ2(ASHCptr ash) { return ash->mz2; } /* ---------------------------------------------------------- THIS IS THE WEIGHT (KERNEL) FUNCTION THAT WE USE THIS KERNEL IS THE TUKEY BIWEIGTH KERNEL. SEE SCOTT, PG 140 FOR A LIST OF SOME COMMONLY USED KERNELS. IT DETERMINES HOW MUCH WEIGTH A VALUE GETS WHEN WE ARE DOING OUR AVERAGING ---------------------------------------------------------- */ float weight(float x) { float ret=0.000f; float c = 15.000f/16.000f; if( (x >= -1) && (x <=1) ) { ret = c*(1-x*x)*(1-x*x); } return ret; } /* ---------------------------------------------------------- THIS FUNCTION BINS THE DATA CALLED BY ASHC_CALC ---------------------------------------------------------- */ void ASHC_Bin_Reg(ASHCptr ash) { long k = 0; long j = 0; long i = 0; long iz = 0; long n = ash->n; long nbinx = ash->nbinx; long nbiny = ash->nbiny; long nbinz2 = ash->nbinz2; float ax = vmin(ash->x,n)-0.0001f; float ay = vmin(ash->y,n)-0.0001f; float az2 = vmin(ash->z2,n)-0.0001f; float bx = vmax(ash->x,n)+0.0001f; float by = vmax(ash->y,n)+0.0001f; float bz2 = vmax(ash->z2,n)+0.0001f; float dx = (bx-ax)/nbinx; float dy = (by-ay)/nbiny; float dz2 = (bz2-az2)/nbinz2; // dereference the pointers here. // so you do not take extra time dereferencing // in the loops long*** v = ash->v; float*** s = ash->s; float* x = ash->x; float* y = ash->y; float* z2 = ash->z2; for(k=0;k=0) && (k=0) && (j=0) && (izz[i]; } } ash->ax=ax; ash->bx=bx; ash->ay=ay; ash->by=by; ash->dx=dx; ash->dy=dy; ash->az2=az2; ash->bz2=bz2; ash->dz2=dz2; } /* ---------------------------------------------------------- CALCULATE THE ASH CALLED BY ASHC_CALC ---------------------------------------------------------- */ void ASHC_Reg(ASHCptr ash) { float dx = ash->dx; float dy = ash->dy; float dz2 = ash->dz2; long nbinx = ash->nbinx; long nbiny = ash->nbiny; long nbinz2 = ash->nbinz2; long currbinz2 = ash->currbinz2; float ax = ash->ax; float ay = ash->ay; float az2 = ash->az2; long mx = ash->mx; long my = ash->my; long mz2 = ash->mz2; long i=0; long j=0; long k=0; long l=0; long x1=0; long x2=0; long y1=0; long y2=0; long z21=0; long z22=0; float** f = ash->f; float** r = ash->r; long*** v = ash->v; float*** s = ash->s; float* cx = ash->cx; float* cy = ash->cy; float* cz2 = ash->cz2; long count=0; // Initialize the function, return values for(k=0;k0.0f) { r[k][j] = r[k][j]/f[k][j]; } } } } /* ---------------------------------------------------------- CREATES THE OUTPUT FORMAT CALLED BY ASHC_CALC ---------------------------------------------------------- */ void ASHC_Add_Polygons(ASHCptr ashptr,char* name) { char cmdstr[256]; //char msgstr[128]; char* resstr; long i=0; long j=0; long nbinx = ashptr->nbinx; long nbiny = ashptr->nbiny; float* cx = ashptr->cx; float* cy = ashptr->cy; float dx = 0.5f*ashptr->dx; float dy = 0.5f*ashptr->dy; // long*** v = ashptr->v; float** r = ashptr->r; float** f = ashptr->f; float ax = ashptr->ax; float ay = ashptr->ay; float bx = ashptr->bx; float by = ashptr->by; sprintf(cmdstr,"%s%s%s","_newFTab=FTab.MakeNew(\"",name,"\".AsFileName,Polygon)"); resstr = AVExec(cmdstr); sprintf(cmdstr,"%s","_idField=Field.Make(\"Predicted\",#FIELD_DECIMAL,8,0)"); resstr = AVExec(cmdstr); sprintf(cmdstr,"%s","_newFTab.AddFields({_idField})"); resstr = AVExec(cmdstr); sprintf(cmdstr,"%s","_shapeField=_newFTab.FindField(\"Shape\")"); resstr = AVExec(cmdstr); for(i=0;i 0.0f) { float left = cx[i] - dx; float right = cx[i] + dx; float top = cy[j] + dy; float bottom = cy[j] - dy; //( (left >=ax) && (right <= bx) && (top <= by) && (bottom >= ay) ) // sprintf(cmdstr,"_newRecNum=_newFTab.AddRecord"); resstr = AVExec(cmdstr); sprintf(cmdstr,"_newFTab.SetValue(_shapeField,_newRecNum,Polygon.Make({{%.4f@%.4f,%.4f@%.4f,%.4f@%.4f,%.4f@%.4f}}))",left,top,right,top,right,bottom,left,bottom); resstr = AVExec(cmdstr); sprintf(cmdstr,"_newFTab.SetValue(_idField,_newRecNum,%.4f)",r[i][j]); resstr = AVExec(cmdstr); // } } } sprintf(cmdstr,"_theView=_selTheme.GetView"); resstr=AVExec(cmdstr); sprintf(cmdstr,"_newTheme=FTheme.Make(_newFTab)"); resstr=AVExec(cmdstr); sprintf(cmdstr,"_theView.AddTheme(_newTheme)"); resstr=AVExec(cmdstr); /*sprintf(cmdstr,"_theLegend=_newTheme.GetLegend"); resstr=AVExec(cmdstr); sprintf(cmdstr,"_theLegend.SetLegendType(#LEGEND_TYPE_COLOR)"); resstr=AVExec(cmdstr); sprintf(cmdstr,"_theLegend.Natural(_newTheme,\"Predicted\",5)"); resstr=AVExec(cmdstr); //sprintf(cmdstr,"_theSymbols=_theLegend.GetSymbols"); //resstr=AVExec(cmdstr); sprintf(cmdstr,"_theColorRamps=SymbolList.GetPredifined(#SYMLIST_TYPE_COLORRAMP)"); resstr=AVExec(cmdstr); sprintf(cmdstr,"for each ramp in _theColorRamps\n\tMsgBox(ramp.GetName,\"Debug\")\nend"); resstr=AVExec(cmdstr); sprintf(cmdstr,"_theLegend.GetSymbols.RampSavedColors(_theColorRamp)"); resstr=AVExec(cmdstr); sprintf(cmdstr,"_newTheme.SetLegendVisible(FALSE)"); resstr=AVExec(cmdstr); sprintf(cmdstr,"_newTheme.UpdateLegend"); resstr=AVExec(cmdstr); */ } // // Given the data in ashptr, // calculates the ASH and adds polygons to a new theme // named 'name'. // /* ---------------------------------------------------------- DOES EVERYTHING NECESSARY TO BIN DATA, CALCULATE ASH AND CREATE OUTPUT ---------------------------------------------------------- */ __declspec(dllexport) char* ASHC_Calc(ASHCptr ashptr, char* name) { char* avestr = "_gact=av.GetActiveDoc"; char* resultStr = "OK!"; ASHC_Bin_Reg(ashptr); ASHC_Reg(ashptr); ASHC_Add_Polygons(ashptr,name); return resultStr; }