filename rawrates "N:\share\notes\ratings.csv"; filename rawdemos "N:\share\notes\demos.csv"; data ratings; infile rawrates firstobs=2 dlm="," dsd missover lrecl=1000; informat restname $40. style $30. loc1 $30. phone1 $12. loc2 $30. phone2 $12. loc3 $30. phone3 $12. loc4 $30. phone4 $12.; input restname ratings price style loc1 phone1 loc2 phone2 loc3 phone3 loc4 phone4; run; data demos; infile rawdemos firstobs=2 dlm="," dsd missover lrecl=1000; informat loc $30. state $10. zipcode $30.; input loc state zipcode pop white black amerind asian othrace colplus medhinc medfinc percinc; if loc="" then delete; run; data ratings; set ratings; if style="Asian" or style="Thai" or style="Chinese" or style="Japanese" or style="Vietamnese" or style="Korean" then asianstyle=1; else asianstyle=0; run; data rateloc1; set ratings; loc=loc1; phone=phone1; drop loc1 phone1 loc2 phone2 loc3 phone3 loc4 phone4; run; data rateloc2; set ratings; loc=loc2; phone=phone2; drop loc1 phone1 loc2 phone2 loc3 phone3 loc4 phone4; if loc="" then delete; run; data rateloc3; set ratings; loc=loc3; phone=phone3; drop loc1 phone1 loc2 phone2 loc3 phone3 loc4 phone4; if loc="" then delete; run; data rateloc4; set ratings; loc=loc4; phone=phone4; drop loc1 phone1 loc2 phone2 loc3 phone3 loc4 phone4; if loc="" then delete; run; data rateloc; set rateloc1 rateloc2 rateloc3 rateloc4; proc sort data=rateloc; by loc; run; proc sort data=demos; by loc; run; data merged; merge rateloc (in=one) demos (in=two); by loc; if one=1 & two=1; run; * two-group comparison, example 1; proc glm data=merged; class asianstyle; model price=asianstyle; means asianstyle; run; * two-group comparison, example 2; proc reg data=merged; model price=asianstyle; run; * comparisons involving more than two groups; proc glm data=merged; class ratings; model price=ratings; means ratings/waller; means ratings/lsd cldiff; run; * a simple regression with multiple right hand side variables; proc reg data=merged; model price=ratings pop percinc; run; * a regression with state fixed effects; proc glm data=merged; class state; model price=ratings pop percinc state/solution; run; * exercise: can we capture price changes by increasing ratings * from 1 star to 2 stars, * from 2 stars to 3 stars, * and from 3 stars to 4 stars *separately?; * a revision of the first regression; proc glm data=merged; class ratings; model price=ratings pop percinc/solution; run; * a revision of the second regression; * because we cannot have two class variables, we have to define one of them into * dummy variables by ourselves; data merged; set merged; if ratings=1 then rating_1star=1; else rating_1star=0; if ratings=2 then rating_2star=1; else rating_2star=0; if ratings=3 then rating_3star=1; else rating_3star=0; if ratings=4 then rating_4star=1; else rating_4star=0; run; proc glm data=merged; class state; model price=rating_2star rating_3star rating_4star pop percinc state / solution; run; * what if we want to know whether the relationship between price and ratings vary by state or not; data merged; set merged; if state="MD" then state_MD=1; else state_MD=0; if state="VA" then state_VA=1; else state_VA=0; if state="DC" then state_DC=1; else state_DC=0; ratings_MD=ratings*state_MD; ratings_VA=ratings*state_VA; ratings_DC=ratings*state_DC; run; proc reg data=merged; model price=ratings_MD ratings_VA ratings_DC pop percinc; run;