library(lubridate)
library(dplyr)
library(tidyr)
library(ggplot2)
load("datasets/melb_sel.Rdata")
head(melb_sel)
## # A tibble: 6 × 2
## date counts
## <dttm> <dbl>
## 1 2019-06-01 00:00:00 6.74
## 2 2019-06-01 01:00:00 6.33
## 3 2019-06-01 02:00:00 6.01
## 4 2019-06-01 03:00:00 5.28
## 5 2019-06-01 04:00:00 4.75
## 6 2019-06-01 05:00:00 4.49
mel_d_avg = melb_sel |> mutate(wday = wday(date, label = TRUE), hour = hour(date)) |> group_by(hour) |>
summarise(avg_counts = mean(counts))
ggplot(mel_d_avg, aes(x = hour, y = avg_counts)) + geom_line()

melb_w_avg = melb_sel |> mutate(wday = wday(date, label = TRUE), hour = hour(date)) |>
group_by(wday, hour) |> summarise(avg_counts = mean(counts))
melb_w_avg$mdayh = melb_sel$date[1:168]
melb_w_avg |>
ggplot(aes(x = mdayh, y = avg_counts)) + geom_line() +
scale_x_datetime(date_labels = "%a %H:%M:%S", date_breaks = "1 day")

Question: What is the expected number of events?
Answer: \[\lambda \nu(D) = 100\times 4 = 400. \]
Question: What do you conclude from the above plot?
Answer: We fail to reject the null hypothesis since the estimated G function is inside the envelope.
Question: Can you test rNS data whether it follow clustered point pattern?
library(spatstat)
## Loading required package: spatstat.data
## Loading required package: spatstat.geom
## spatstat.geom 3.2-9
## Loading required package: spatstat.random
## spatstat.random 3.2-3
## Loading required package: spatstat.explore
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
## spatstat.explore 3.2-6
## Loading required package: spatstat.model
## Loading required package: rpart
## spatstat.model 3.2-10
## Loading required package: spatstat.linnet
## spatstat.linnet 3.1-4
##
## spatstat 3.0-7
## For an introduction to spatstat, type 'beginner'
set.seed(2025)
nclust <- function(x0, y0, radius, n) {
return(runifdisc(n, radius, centre=c(x0, y0)))
}
rNS = rNeymanScott(kappa=10, expand=0.1,
rcluster = nclust, radius=0.1, n=10)
r=seq(0,1, by=0.005)
Gtest=envelope(rNS,fun=Gest, r=r, nrank=5, nsim=200)
plot(Gtest, xlim = c(0,0.1))

## Generating 200 simulations of CSR ...
## 1, 2, 3, 4.6.8.10.12.14.16.18.20.22.24.26.28.30.32.34
## .36.38.40.42.44.46.48.50.52.54.56.58.60.62.64.66.68.70.72.74
## .76.78.80.82.84.86.88.90.92.94.96.98.100.102.104.106.108.110.112.114
## .116.118.120.122.124.126.128.130.132.134.136.138.140.142.144.146.148.150.152.154
## .156.158.160.162.164.166.168.170.172.174.176.178.180.182.184.186.188.190.192.194
## .196.198.
## 200.
##
## Done.
quadrat.test(rNS, nx = 4, ny= 4, alternative = "clustered", method = "Chisq")
##
## Chi-squared test of CSR using quadrat counts
##
## data: rNS
## X2 = 134.67, df = 15, p-value < 2.2e-16
## alternative hypothesis: clustered
##
## Quadrats: 4 by 4 grid of tiles
Both methods conclude rNS data follow a clustered point pattern.
library(geoR)
## --------------------------------------------------------------
## Analysis of Geostatistical Data
## For an Introduction to geoR go to http://www.leg.ufpr.br/geoR
## geoR version 1.9-4 (built on 2024-02-14) is now loaded
## --------------------------------------------------------------
dfgeo = readRDS("datasets/dfgeo.rds")
dfgeo$x = dfgeo$long * 79
dfgeo$y = dfgeo$lat * 111
idx = seq(1, length(dfgeo$x), 5)
dfgeo_test = dfgeo[idx, ]
dfgeo_train = dfgeo[-idx,]
dfgeodata = as.geodata(dfgeo_train, coords.col=5:6, data.col=4, covar.col = 1:3)
plot(variog(dfgeodata, option="bin", max.dist=900),
xlab = "h", ylab = "variogram")
## variog: computing omnidirectional variogram
lines.variomodel(seq(0, 900, l = 100),
cov.pars = c(0.8, 200),
cov.model = "mat", kap = 0.6, nug = 0.1)

m4 = likfit(dfgeodata, ini = c(0.8, 200), nug = 0.1, cov.model = "matern", fix.kappa = FALSE, kappa = 0.6)
## ---------------------------------------------------------------
## likfit: likelihood maximisation using the function optim.
## likfit: Use control() to pass additional
## arguments for the maximisation function.
## For further details see documentation for optim.
## likfit: It is highly advisable to run this function several
## times with different initial values for the parameters.
## likfit: WARNING: This step can be time demanding!
## ---------------------------------------------------------------
## likfit: end of numerical maximisation.
summary(m4)
## Summary of the parameter estimation
## -----------------------------------
## Estimation method: maximum likelihood
##
## Parameters of the mean component (trend):
## beta
## -0.2354
##
## Parameters of the spatial component:
## correlation function: matern
## (estimated) variance parameter sigmasq (partial sill) = 0.7465
## (estimated) cor. fct. parameter phi (range parameter) = 200
## (estimated) extra parameter kappa = 0.3045
## anisotropy parameters:
## (fixed) anisotropy angle = 0 ( 0 degrees )
## (fixed) anisotropy ratio = 1
##
## Parameter of the error component:
## (estimated) nugget = 0.0275
##
## Transformation parameter:
## (fixed) Box-Cox parameter = 1 (no transformation)
##
## Practical Range with cor=0.05 for asymptotic range: 484.5776
##
## Maximised Likelihood:
## log.L n.params AIC BIC
## "-643.1" "5" "1296" "1319"
##
## non spatial model:
## log.L n.params AIC BIC
## "-968" "2" "1940" "1949"
##
## Call:
## likfit(geodata = dfgeodata, ini.cov.pars = c(0.8, 200), nugget = 0.1,
## fix.kappa = FALSE, kappa = 0.6, cov.model = "matern")