rm(list=ls()) #remove everything #Einlesen der Daten data.cancer <- read.csv("D:\\adsm\\lehre\\PhD_Oekologie\\WS2010\\Assignments\\Assignment2\\datacancercsv.csv",header=TRUE) summary(data.cancer) attach(data.cancer) #Make a histogram of the population values for cancer mortality. hist(mortalities, xlab="Mortalities", main = "Histogramm of cancer mortalities", col="blue") #What are the population mean and total cancer mortality? What are the population variance and standard deviation? mean(mortalities) sd(mortalities) mean(population) sd(population) #Draw a scatter plot of the number of cases ($y$) versus population ($x$). plot(population,mortalities) #Compute the linear regression: $y=\beta_0+\beta_1 x$. reg1 <- lm(mortalities ~population) summary(reg1) #Do all necessary plots in order to investigate the necessary assumptions. plot(reg1$fitted,reg1$resid) #Plot the residuals versus log population. Do you believe that the assumption of homoscedasticity is given in this problem? plot(reg1$fitted,reg1$resid,log="x") #Compute the following model: lm(sqrt(deaths)$\sim$sqrt(inhabitants), ...). reg2 <- lm(sqrt(mortalities) ~sqrt(population)) summary(reg2) #Investigate again the assumption of homoscedasticity. What are your conclusions? plot(reg2$fitted,reg2$resid) #Plot a QQ-Plot of the residuals as well as a histogram. What are your conclusions regarding the normality of the data? qqnorm(reg2$resid) qqline(reg2$resid) hist(reg2\$resid)