COVID-19 Number of Tests in Italy

Table of Contents

Menu

Introduction

This page presents some data about the number of tests and people tested for COVID-19 over time in Italy and compares them with the number of people found positive.

This page was created on <2020-08-20 Thu> and last updated on <2020-09-15 Tue>.

The source code available on the COVID-19 pages is distributed under the MIT License; the content is distributed under a Creative Commons - Attribution 4.0.

Getting data into R

We first read the data from the Civil Protection repository adding the ratio between positives and tests, computed on the same day and computed with data shifted by two days (on the assumption tests take two days to complete).

In fact data about tests is used with different semantics by different regions. Some regions reports tests with results (and the ratio new positives / tests makes sense). Other reports the number of test performed, in which case the correct ratio is between positives and tests performed some days earlier. We assume two days and report both ratios for all regions. See the following issue on GitHub for an explanation and some more details https://github.com/pcm-dpc/COVID-19/issues/577 (in Italian).

PATH="/home/adolfo/Downloads/COVID-19/"
DIGITS = 4

# evolution over time at the National level
national = read.csv(file.path(PATH, "dati-andamento-nazionale/dpc-covid19-ita-andamento-nazionale.csv"))
national$data <- as.Date(national$data)
national$nuovi_casi_testati = c(NA, diff(national$casi_testati, 1))
national$p_over_t <- round(national$nuovi_positivi / national$nuovi_casi_testati, digits = DIGITS) * 100
national$nuovi_casi_testati_2 <- c(NA, NA, head(national$nuovi_casi_testati, -2))
national$p_over_t_2 = round(national$nuovi_positivi / national$nuovi_casi_testati_2, digits = DIGITS) * 100

Concerning the regional level, computed columns, such as the number of people tested in a day, have to be computed after filtering, or the diif will work on values from different regions.

# evolution over time, by Region
data = read.csv(file.path(PATH, "dati-regioni/dpc-covid19-ita-regioni.csv"))
data$data <- as.Date(data$data)

These are the columns we are interested in and their translation in English:

cols = c(
  "data",
  "casi_testati",
  "totale_positivi",
  "nuovi_casi_testati",
  "nuovi_positivi",
  "p_over_t",
  "p_over_t_2"
)

We now define a function to ouput the last N rows of the input data frame. The real “challenge”, here, is transposing the data, to get a more natural presentation (with time progressing from left to right).

table_data <- function(df, cols, rows = 10) {
  # get the last 10 elements and the interesting columns of the dataframe
  f  <- tail(df, rows)
  rf <- f[, cols]

  # the labels in the transposed matrix are the column names of the original data.frame
  row_labels  <- colnames(rf)
  # the columns in the trasposed matrix are the dates
  col_labels  <- c("Label", format(rf$data, "%a, %b %d"))

  rft <- data.frame(row_labels, t(rf))
  colnames(rft) <- col_labels
  return(rft[-1,])
}

People Tested and Cases in Italy

Data of the last ten days

table_data(national, cols)
Label Sun, Sep 06 Mon, Sep 07 Tue, Sep 08 Wed, Sep 09 Thu, Sep 10 Fri, Sep 11 Sat, Sep 12 Sun, Sep 13 Mon, Sep 14 Tue, Sep 15
casi_testati 5538028 5578731 5636663 5699709 5757488 5818910 5875462 5924322 5956171 6006675
totale_positivi 32078 32993 33789 34734 35708 36767 37503 38509 39187 39712
nuovi_casi_testati 53683 40703 57932 63046 57779 61422 56552 48860 31849 50504
nuovi_positivi 1297 1108 1370 1434 1597 1616 1501 1458 1008 1229
p_over_t 2.42 2.72 2.36 2.27 2.76 2.63 2.65 2.98 3.16 2.43
p_over_t_2 1.79 1.59 2.55 3.52 2.76 2.56 2.6 2.37 1.78 2.52

Number of Tests

plot(x = national[national$data >= "2020-08-01", c("data")], 
     y = national[national$data >= "2020-08-01", c("nuovi_casi_testati")], 
     type="l", lwd=6, pch=16, cex=2.5, col=c("#3B3176"))
text(x = national[national$data >= "2020-08-01", c("data")],
     y = national[national$data >= "2020-08-01", c("nuovi_casi_testati")],
     labels = national[national$data >= "2020-08-01", c("nuovi_casi_testati")],
     pos = 3, cex = 1.5, col=c("#3B3176"))
 grid(col="black")

tests_italia.png

Number of Tests and New Cases

Plot new cases and tests together. (Solution taken from How can I plot with 2 different y-axes? on Stack Overflow.)

## add extra space to right margin of plot within frame
par(mar=c(5, 4, 4, 6) + 0.1)

## Plot first set of data and draw its axis
tests_limits = c( min(national[national$data >= "2020-08-01", c("nuovi_casi_testati")]), max(national[national$data >= "2020-08-01", c("nuovi_casi_testati")]) )
plot(x = national[national$data >= "2020-08-01", c("data")], 
     y = national[national$data >= "2020-08-01", c("nuovi_casi_testati")], 
     type="l", lwd=6, pch=11, cex=1.5, col=c("#3B3176"),
     axes=FALSE,
     ylim=tests_limits,
     ylab="", xlab="")
text(x = national[national$data >= "2020-08-01", c("data")],
     y = national[national$data >= "2020-08-01", c("nuovi_casi_testati")],
     labels = national[national$data >= "2020-08-01", c("nuovi_casi_testati")],
     pos = 3, cex = 1, col=c("#3B3176"))
mtext("Number of Tests", side=2, col="#3B3176", line=4) 
axis(2, ylim=tests_limits, col="black", las=1)  
box()

## Allow a second plot on the same graph
par(new=TRUE)
new_cases_limits = c( min(national[national$data >= "2020-08-01", c("nuovi_positivi")]), max(national[national$data >= "2020-08-01", c("nuovi_positivi")]) )

p = plot(x = national[national$data >= "2020-08-01", c("data")], 
     y = national[national$data >= "2020-08-01", c("nuovi_positivi")], 
     type="l", lwd=6, pch=21, cex=1.5, col=c("#AA0000"),
     axes=FALSE,
     ylim=new_cases_limits,
     ylab="", xlab="")
text(x = national[national$data >= "2020-08-01", c("data")],
     y = national[national$data >= "2020-08-01", c("nuovi_positivi")],
     labels = national[national$data >= "2020-08-01", c("nuovi_positivi")],
     pos = 1, cex = 1, col="#AA0000")
mtext("New Cases", side=4, line=4, col="#AA0000") 
axis(4, ylim=new_cases_limits, las=1)

grid(p, col = "black", lty = "dotted")

# x-axis
dates = national[national$data >= "2020-08-01", c("data")]
axis.Date(1, at=seq(min(dates), max(dates), by="week"), format="%b %d", las=2)
mtext("Day", side=1, line=2.5)

## Add Legend
legend("topleft", legend = c("Tests", "New Cases"),
       text.col = c("#3B3176", "#AA0000"), pch= c(15, 17), col=c("#3B3176", "#AA0000"))

tests_and_new_cases_italia.png

People Tested and Cases in Trentino

region <- subset(data, denominazione_regione == "P.A. Trento")

region$nuovi_casi_testati = c(NA, diff(region$casi_testati, 1))
region$p_over_t <- round(region$nuovi_positivi / region$nuovi_casi_testati, digits = DIGITS) * 100
region$nuovi_casi_testati_2 = c(NA, NA, diff(region$casi_testati, 2))
region$p_over_t_2 = round(region$nuovi_positivi / region$nuovi_casi_testati_2, digits = DIGITS) * 100
region$nuovi_casi_testati_2 <- c(NA, NA, head(region$nuovi_casi_testati, -2))
region$p_over_t_2 = round(region$nuovi_positivi / region$nuovi_casi_testati_2, digits = DIGITS) * 100

table_data(region, cols)
Label Sun, Sep 06 Mon, Sep 07 Tue, Sep 08 Wed, Sep 09 Thu, Sep 10 Fri, Sep 11 Sat, Sep 12 Sun, Sep 13 Mon, Sep 14 Tue, Sep 15
casi_testati 86002 86198 86431 86802 87467 88084 88749 89234 89459 89798
totale_positivi 298 315 317 320 398 422 437 442 459 474
nuovi_casi_testati 539 196 233 371 665 617 665 485 225 339
nuovi_positivi 33 17 3 7 82 29 22 9 20 20
p_over_t 6.12 8.67 1.29 1.89 12.33 4.7 3.31 1.86 8.89 5.9
p_over_t_2 6.4 2.92 0.56 3.57 35.19 7.82 3.31 1.46 3.01 4.12

People Tested and Cases in Liguria

region <- subset(data, denominazione_regione == "Liguria")

region$nuovi_casi_testati = c(NA, diff(region$casi_testati, 1))
region$p_over_t <- round(region$nuovi_positivi / region$nuovi_casi_testati, digits = DIGITS) * 100
region$nuovi_casi_testati_2 = c(NA, NA, diff(region$casi_testati, 2))
region$p_over_t_2 = round(region$nuovi_positivi / region$nuovi_casi_testati_2, digits = DIGITS) * 100
region$nuovi_casi_testati_2 <- c(NA, NA, head(region$nuovi_casi_testati, -2))
region$p_over_t_2 = round(region$nuovi_positivi / region$nuovi_casi_testati_2, digits = DIGITS) * 100

table_data(region, cols)
Label Sun, Sep 06 Mon, Sep 07 Tue, Sep 08 Wed, Sep 09 Thu, Sep 10 Fri, Sep 11 Sat, Sep 12 Sun, Sep 13 Mon, Sep 14 Tue, Sep 15
casi_testati 135222 135646 137135 138436 139851 141350 142774 143543 144077 145476
totale_positivi 768 823 860 907 941 994 1088 1143 1200 1251
nuovi_casi_testati 829 424 1489 1301 1415 1499 1424 769 534 1399
nuovi_positivi 111 59 64 51 114 82 112 78 65 141
p_over_t 13.39 13.92 4.3 3.92 8.06 5.47 7.87 10.14 12.17 10.08
p_over_t_2 10.34 3.55 7.72 12.03 7.66 6.3 7.92 5.2 4.56 18.34

People Tested and Cases in Veneto

region <- subset(data, denominazione_regione == "Veneto")

region$nuovi_casi_testati = c(NA, diff(region$casi_testati, 1))
region$p_over_t <- round(region$nuovi_positivi / region$nuovi_casi_testati, digits = DIGITS) * 100
region$nuovi_casi_testati_2 = c(NA, NA, diff(region$casi_testati, 2))
region$p_over_t_2 = round(region$nuovi_positivi / region$nuovi_casi_testati_2, digits = DIGITS) * 100
region$nuovi_casi_testati_2 <- c(NA, NA, head(region$nuovi_casi_testati, -2))
region$p_over_t_2 = round(region$nuovi_positivi / region$nuovi_casi_testati_2, digits = DIGITS) * 100

table_data(region, cols)
Label Sun, Sep 06 Mon, Sep 07 Tue, Sep 08 Wed, Sep 09 Thu, Sep 10 Fri, Sep 11 Sat, Sep 12 Sun, Sep 13 Mon, Sep 14 Tue, Sep 15
casi_testati 643251 645256 649669 653982 658348 663769 668332 671902 673892 677256
totale_positivi 2939 2943 2914 2937 2926 2964 2940 2967 2983 2984
nuovi_casi_testati 3310 2005 4413 4313 4366 5421 4563 3570 1990 3364
nuovi_positivi 179 69 105 91 147 173 138 142 55 115
p_over_t 5.41 3.44 2.38 2.11 3.37 3.19 3.02 3.98 2.76 3.42
p_over_t_2 2.4 1.3 3.17 4.54 3.33 4.01 3.16 2.62 1.21 3.22

People Tested and Cases in Lombardia

region <- subset(data, denominazione_regione == "Lombardia")

region$nuovi_casi_testati = c(NA, diff(region$casi_testati, 1))
region$p_over_t <- round(region$nuovi_positivi / region$nuovi_casi_testati, digits = DIGITS) * 100
region$nuovi_casi_testati_2 = c(NA, NA, diff(region$casi_testati, 2))
region$p_over_t_2 = round(region$nuovi_positivi / region$nuovi_casi_testati_2, digits = DIGITS) * 100
region$nuovi_casi_testati_2 <- c(NA, NA, head(region$nuovi_casi_testati, -2))
region$p_over_t_2 = round(region$nuovi_positivi / region$nuovi_casi_testati_2, digits = DIGITS) * 100

table_data(region, cols)
Label Sun, Sep 06 Mon, Sep 07 Tue, Sep 08 Wed, Sep 09 Thu, Sep 10 Fri, Sep 11 Sat, Sep 12 Sun, Sep 13 Mon, Sep 14 Tue, Sep 15
casi_testati 1053199 1059954 1073431 1087646 1098986 1110428 1120531 1128937 1134109 1148425
totale_positivi 8031 8110 8221 8324 8452 8591 8567 8789 8848 8799
nuovi_casi_testati 8254 6755 13477 14215 11340 11442 10103 8406 5172 14316
nuovi_positivi 198 109 271 218 245 257 269 265 125 176
p_over_t 2.4 1.61 2.01 1.53 2.16 2.25 2.66 3.15 2.42 1.23
p_over_t_2 0.99 0.66 3.28 3.23 1.82 1.81 2.37 2.32 1.24 2.09

Author: Adolfo Villafiorita

Last modified: 2020-09-15 Tue 17:57 (created on: 2020-08-20 Thu 00:00)

Published: 2020-09-15 Tue 18:07