rm(list=ls()) #remove everything
#Einlesen der Daten
data.cancer <- read.csv("D:\\adsm\\lehre\\PhD_Oekologie\\WS2010\\Assignments\\Assignment2\\datacancercsv.csv",header=TRUE)
summary(data.cancer)
attach(data.cancer)
#Make a histogram of the population values for cancer mortality.
hist(mortalities, xlab="Mortalities", main = "Histogramm of cancer mortalities", col="blue")
#What are the population mean and total cancer mortality? What are the population variance and standard deviation?
mean(mortalities)
sd(mortalities)
mean(population)
sd(population)
#Draw a scatter plot of the number of cases ($y$) versus population ($x$).
plot(population,mortalities)
#Compute the linear regression: $y=\beta_0+\beta_1 x$.
reg1 <- lm(mortalities ~population)
summary(reg1)
#Do all necessary plots in order to investigate the necessary assumptions.
plot(reg1$fitted,reg1$resid)
#Plot the residuals versus log population. Do you believe that the assumption of homoscedasticity is given in this problem?
plot(reg1$fitted,reg1$resid,log="x")
#Compute the following model: lm(sqrt(deaths)$\sim$sqrt(inhabitants), ...).
reg2 <- lm(sqrt(mortalities) ~sqrt(population))
summary(reg2)
#Investigate again the assumption of homoscedasticity. What are your conclusions?
plot(reg2$fitted,reg2$resid)
#Plot a QQ-Plot of the residuals as well as a histogram. What are your conclusions regarding the normality of the data?
qqnorm(reg2$resid)
qqline(reg2$resid)
hist(reg2$resid)