diff --git a/.Rbuildignore b/.Rbuildignore
index caf548c..2c82ca8 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -1,3 +1,5 @@
 ^renv$
 ^renv\.lock$
 ^requirements\.txt$
+^.*\.Rproj$
+^\.Rproj\.user$
diff --git a/notebooks/longitudinal-tracking/analysis.R b/notebooks/longitudinal-tracking/analysis.R
new file mode 100644
index 0000000..5826c85
--- /dev/null
+++ b/notebooks/longitudinal-tracking/analysis.R
@@ -0,0 +1,97 @@
+library(readr)
+library(lubridate)
+library(dplyr)
+library(tidyr)
+library(ggplot2)
+library(scales)
+
+df <- read_csv(file = "data/processed/questionnaires_linked.csv")
+
+# focus on:
+#   - Wellbeing: Q185
+#   - Anxiety: Q206
+#   - Remote-learning: Q188
+
+# "Is it possible to track a micro-cohort of pupils across the whole period?"
+# "Focusing on those respondents who have multiple responses in the period
+# – there should be a good number of pupils with at least 4 or more responses over the time period"
+pupils_long <- df %>%
+  count(pupil_id) %>%
+  filter(n > 4) %>%
+  distinct(pupil_id) %>%
+  pull()
+
+# are pupils who have returned multiple surveys within the same month
+# as we're informed that ideally, each student should be filling out a survey every month
+# then there's something possibly dodgy about multiple returns in the same month
+# so let's get these students and isolate them
+pupils_return_several_in_one_month <- df %>%
+  filter(pupil_id %in% pupils_long) %>%
+  # check they have completed multiple returns across several months
+  # as we want to avoid duplicates
+  mutate(measurement_month = month(measurement_date)) %>%
+  group_by(pupil_id, measurement_month) %>%
+  tally() %>%
+  filter(n > 1) %>%
+  distinct(pupil_id) %>%
+  pull()
+
+df_wellbeing <- df %>%
+  # filter for students with >= 4 returns
+  # and students who did not return more than one response in same month
+  filter((pupil_id %in% pupils_long) & !(pupil_id %in% pupils_return_several_in_one_month)) %>%
+  # select Q to focus on
+  select(pupil_id, measurement_date, starts_with(match = "185")) %>%
+  # unpivot
+  pivot_longer(
+    cols = -c("pupil_id", "measurement_date"),
+    names_to = "question",
+    values_to = "response"
+  ) %>%
+  # extract month for simplification
+  mutate(measurement_month = factor(x = month(measurement_date)),
+         response = factor(x = response,
+                           levels = c(NA, seq(from = 1, to = 5, by = 1)),
+                           ordered = TRUE))
+
+
+# suggestion i:
+# I guess my go-to would be % of respondents rating 'highly likely' per data point, with time as x-axis?
+# So it could be a stacked area / bar chart / line chart if you decide to include the breaks as well
+df_stack <- df_wellbeing %>% 
+  group_by(measurement_month, response) %>% 
+  tally() %>% 
+  rename('counts' = 'n') %>% 
+  mutate(label = paste0(round(x = 100 * counts / sum(counts), digits = 2), '%'))
+
+ggplot(data = df_stack, mapping = aes(x = measurement_month,
+                                      y = counts,
+                                      fill = response)) +
+  geom_bar(stat = 'identity') +
+  geom_text(mapping = aes(label = label),
+            position = position_stack(vjust = 0.5))
+
+
+# suggestion ii:
+# geom point + jitter them + colour them by ordinal scale (1 - 7 or whatever) + use borders
+# if you need to highlight the micro-cohorts + x-axis is time
+ggplot(data = df_wellbeing, 
+       mapping = aes(x = measurement_month,
+                     y = as.factor(pupil_id),
+                     colour = response)) +
+  geom_point() +
+  facet_grid(. ~ question)
+
+# suggestion iii:
+# graph visualisation where nodes are responses and edges are % of responses following this path
+
+# no. of surveys they complete
+n <- 4
+
+# no. of levels (likert)
+lvls <- list(1:5)
+
+l <- rep(x = lvls, n)
+
+# get no. of unique permutations
+combos <- expand.grid(l)
diff --git a/renv.lock b/renv.lock
index eab7438..06d956c 100644
--- a/renv.lock
+++ b/renv.lock
@@ -1,10 +1,10 @@
 {
   "R": {
-    "Version": "4.0.3",
+    "Version": "4.0.2",
     "Repositories": [
       {
         "Name": "CRAN",
-        "URL": "https://cran.ma.imperial.ac.uk"
+        "URL": "https://cran.rstudio.com"
       }
     ]
   },
@@ -25,10 +25,10 @@
     },
     "MASS": {
       "Package": "MASS",
-      "Version": "7.3-53",
+      "Version": "7.3-51.6",
       "Source": "Repository",
       "Repository": "CRAN",
-      "Hash": "d1bc1c8e9c0ace57ec9ffea01021d45f"
+      "Hash": "1dad32ac9dbd8057167b2979fb932ff7"
     },
     "Matrix": {
       "Package": "Matrix",
@@ -37,40 +37,12 @@
       "Repository": "CRAN",
       "Hash": "08588806cba69f04797dab50627428ed"
     },
-    "R.cache": {
-      "Package": "R.cache",
-      "Version": "0.14.0",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Hash": "1ca02d43e1a4d49e616bd23bb39b17e6"
-    },
-    "R.methodsS3": {
-      "Package": "R.methodsS3",
-      "Version": "1.8.1",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Hash": "4bf6453323755202d5909697b6f7c109"
-    },
-    "R.oo": {
-      "Package": "R.oo",
-      "Version": "1.24.0",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Hash": "5709328352717e2f0a9c012be8a97554"
-    },
-    "R.utils": {
-      "Package": "R.utils",
-      "Version": "2.10.1",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Hash": "a9e316277ff12a43997266f2f6567780"
-    },
     "R6": {
       "Package": "R6",
-      "Version": "2.4.1",
+      "Version": "2.5.0",
       "Source": "Repository",
       "Repository": "CRAN",
-      "Hash": "292b54f8f4b94669b08f94e5acce6be2"
+      "Hash": "b203113193e70978a696b2809525649d"
     },
     "RColorBrewer": {
       "Package": "RColorBrewer",
@@ -170,13 +142,6 @@
       "Repository": "CRAN",
       "Hash": "6b436e95723d1f0e861224dd9b094dfb"
     },
-    "commonmark": {
-      "Package": "commonmark",
-      "Version": "1.7",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Hash": "0f22be39ec1d141fd03683c06f3a6e67"
-    },
     "cpp11": {
       "Package": "cpp11",
       "Version": "0.2.3",
@@ -198,13 +163,6 @@
       "Repository": "CRAN",
       "Hash": "2b7d10581cc730804e9ed178c8374bd6"
     },
-    "cyclocomp": {
-      "Package": "cyclocomp",
-      "Version": "1.1.0",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Hash": "53cbed70a2f7472d48fb6aef08442f25"
-    },
     "dbplyr": {
       "Package": "dbplyr",
       "Version": "1.4.4",
@@ -221,17 +179,10 @@
     },
     "digest": {
       "Package": "digest",
-      "Version": "0.6.25",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Hash": "f697db7d92b7028c4b3436e9603fb636"
-    },
-    "docopt": {
-      "Package": "docopt",
-      "Version": "0.7.1",
+      "Version": "0.6.27",
       "Source": "Repository",
       "Repository": "CRAN",
-      "Hash": "e9eeef7931ee99ca0093f3f20b88e09b"
+      "Hash": "a0cbe758a531d054b537d16dff4d58a1"
     },
     "dplyr": {
       "Package": "dplyr",
@@ -282,6 +233,13 @@
       "Repository": "CRAN",
       "Hash": "44594a07a42e5f91fac9f93fda6d0109"
     },
+    "gargle": {
+      "Package": "gargle",
+      "Version": "0.5.0",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Hash": "aaacaf8b0ec3dfe45df9eb6bc040db44"
+    },
     "generics": {
       "Package": "generics",
       "Version": "0.0.2",
@@ -296,20 +254,6 @@
       "Repository": "CRAN",
       "Hash": "4ded8b439797f7b1693bd3d238d0106b"
     },
-    "gh": {
-      "Package": "gh",
-      "Version": "1.1.0",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Hash": "89ea5998938d1ad55f035c8a86f96b74"
-    },
-    "git2r": {
-      "Package": "git2r",
-      "Version": "0.27.1",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Hash": "531a82d1beed1f545beb25f4f5945bf7"
-    },
     "glue": {
       "Package": "glue",
       "Version": "1.4.2",
@@ -317,6 +261,13 @@
       "Repository": "CRAN",
       "Hash": "6efd734b14c6471cfe443345f3e35e29"
     },
+    "googledrive": {
+      "Package": "googledrive",
+      "Version": "1.0.1",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Hash": "79ba5d18133290a69b7c135dc3dfef1a"
+    },
     "gtable": {
       "Package": "gtable",
       "Version": "0.3.0",
@@ -366,20 +317,6 @@
       "Repository": "CRAN",
       "Hash": "a525aba14184fec243f9eaec62fbed43"
     },
-    "hunspell": {
-      "Package": "hunspell",
-      "Version": "3.0",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Hash": "71e7853d60b6b4ba891d62ede21752e9"
-    },
-    "ini": {
-      "Package": "ini",
-      "Version": "0.3.1",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Hash": "6154ec2223172bce8162d4153cda21f7"
-    },
     "isoband": {
       "Package": "isoband",
       "Version": "0.2.2",
@@ -415,13 +352,6 @@
       "Repository": "CRAN",
       "Hash": "fbd9285028b0263d76d18c95ae51a53d"
     },
-    "lazyeval": {
-      "Package": "lazyeval",
-      "Version": "0.2.2",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Hash": "d908914ae53b04d4c0c0fd72ecc35370"
-    },
     "lifecycle": {
       "Package": "lifecycle",
       "Version": "0.2.0",
@@ -429,13 +359,6 @@
       "Repository": "CRAN",
       "Hash": "361811f31f71f8a617a9a68bf63f1f42"
     },
-    "lintr": {
-      "Package": "lintr",
-      "Version": "2.0.1",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Hash": "023cecbdc0a32f86ad3cb1734c018d2e"
-    },
     "lubridate": {
       "Package": "lubridate",
       "Version": "1.7.9",
@@ -459,10 +382,10 @@
     },
     "mgcv": {
       "Package": "mgcv",
-      "Version": "1.8-33",
+      "Version": "1.8-31",
       "Source": "Repository",
       "Repository": "CRAN",
-      "Hash": "eb7b6439bc6d812eed2cddba5edc6be3"
+      "Hash": "4bb7e0c4f3557583e1e8d3c9ffb8ba5c"
     },
     "mime": {
       "Package": "mime",
@@ -487,10 +410,10 @@
     },
     "nlme": {
       "Package": "nlme",
-      "Version": "3.1-149",
+      "Version": "3.1-148",
       "Source": "Repository",
       "Repository": "CRAN",
-      "Hash": "7c24ab3a1e3afe50388eb2d893aab255"
+      "Hash": "662f52871983ff3e3ef042c62de126df"
     },
     "openssl": {
       "Package": "openssl",
@@ -510,7 +433,7 @@
       "Package": "piton",
       "Version": "0.1.1",
       "Source": "Repository",
-      "Repository": null,
+      "Repository": "CRAN",
       "Hash": "c4e3ffb3a754a77c5fdf89145a2f7d1d"
     },
     "pkgbuild": {
@@ -597,20 +520,6 @@
       "Repository": "CRAN",
       "Hash": "c66b930d20bb6d858cd18e1cebcfae5c"
     },
-    "rematch2": {
-      "Package": "rematch2",
-      "Version": "2.1.2",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Hash": "76c9e04c712a05848ae7a23d2f170a40"
-    },
-    "remotes": {
-      "Package": "remotes",
-      "Version": "2.2.0",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Hash": "430a0908aee75b1fcba0e62857cab0ce"
-    },
     "renv": {
       "Package": "renv",
       "Version": "0.12.0",
@@ -625,13 +534,6 @@
       "Repository": "CRAN",
       "Hash": "b06bfb3504cc8a4579fd5567646f745b"
     },
-    "rex": {
-      "Package": "rex",
-      "Version": "1.2.0",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Hash": "093584b944440c5cd07a696b3c8e0e4c"
-    },
     "rlang": {
       "Package": "rlang",
       "Version": "0.4.8",
@@ -681,13 +583,6 @@
       "Repository": "CRAN",
       "Hash": "3838071b66e0c566d55cc26bd6e27bf4"
     },
-    "spelling": {
-      "Package": "spelling",
-      "Version": "2.1",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Hash": "b3a5ecc3351f41eb30ef87f65cbff390"
-    },
     "stringi": {
       "Package": "stringi",
       "Version": "1.5.3",
@@ -702,13 +597,6 @@
       "Repository": "CRAN",
       "Hash": "0759e6b6c0957edb1311028a49a35e76"
     },
-    "styler": {
-      "Package": "styler",
-      "Version": "1.3.2",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Hash": "60b23effde8d08a56a64ebeb92a32749"
-    },
     "sys": {
       "Package": "sys",
       "Version": "3.4",
@@ -772,13 +660,6 @@
       "Repository": "CRAN",
       "Hash": "9926b1bcf0b8f907b5c1b1dd922875bd"
     },
-    "usethis": {
-      "Package": "usethis",
-      "Version": "1.6.3",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Hash": "c541a7aed5f7fb3b487406bf92842e34"
-    },
     "utf8": {
       "Package": "utf8",
       "Version": "1.1.4",
@@ -786,6 +667,13 @@
       "Repository": "CRAN",
       "Hash": "4a5081acfb7b81a572e4384a7aaf2af1"
     },
+    "uuid": {
+      "Package": "uuid",
+      "Version": "0.1-4",
+      "Source": "Repository",
+      "Repository": "CRAN",
+      "Hash": "e4169eb989a5d03ccb6b628cad1b1b50"
+    },
     "vctrs": {
       "Package": "vctrs",
       "Version": "0.3.4",
@@ -828,13 +716,6 @@
       "Repository": "CRAN",
       "Hash": "d4d71a75dd3ea9eb5fa28cc21f9585e2"
     },
-    "xmlparsedata": {
-      "Package": "xmlparsedata",
-      "Version": "1.0.4",
-      "Source": "Repository",
-      "Repository": "CRAN",
-      "Hash": "373bcee7aad3980799936749cfca6f24"
-    },
     "yaml": {
       "Package": "yaml",
       "Version": "2.2.1",
diff --git a/src/make_data/link_data.R b/src/make_data/link_data.R
new file mode 100644
index 0000000..b44dda1
--- /dev/null
+++ b/src/make_data/link_data.R
@@ -0,0 +1,106 @@
+library(googledrive)
+library(readxl)
+library(httr)
+library(dplyr)
+library(purrr)
+library(tidyr)
+
+source("src/utils/read_responses.R")
+
+
+# authorise access to gdrive
+drive_auth()
+
+# get list of all files
+file_main <- drive_get(path = as_id(x = "https://drive.google.com/file/d/1PS9xQIP_O048rGb-uvwPonyrV_3CCYc4/view?usp=sharing"))
+files <- drive_ls(
+  path = as_id(x = "https://drive.google.com/drive/u/0/folders/1sfavbXr3UAqfd_zWAuDChvC7Hnb69gi5"),
+  type = "csv"
+)
+
+# store Excel file temporarily
+GET(
+  url = file_main$drive_resource[[1]]$webContentLink,
+  write_disk(tf <- tempfile())
+)
+
+# import Excel sheet
+sheet_questions <- read_excel(
+  path = tf,
+  sheet = "List of Questions",
+  col_names = FALSE
+)
+sheet_questionnaires <- read_excel(
+  path = tf,
+  sheet = "Questionnaires",
+  skip = 1,
+  col_names = TRUE
+)
+
+# rename columns
+sheet_questions <- sheet_questions %>%
+  rename(
+    measure = `...1`,
+    questionnaire = `...2`,
+    question = `...3`
+  ) %>%
+  fill(measure, .direction = "down")
+sheet_questionnaires <- select(.data = sheet_questionnaires, pupil_id:dSEND)
+
+# import each csv from gdrive
+list_df <- list()
+j <- 1
+
+for (i in files$drive_resource) {
+  link <- i$webContentLink
+  df <- read_responses(file = link)
+  # store in list
+  list_df[[j]] <- df
+  j <- j + 1
+}
+
+responses <- map_dfr(.x = list_df, .f = rbind) %>%
+  mutate(measurement_date = parse_date(x = measurement_date, format = "%d/%m/%Y"))
+
+# clear environment
+unlink(tf)
+rm(df, file_main, files, i, j, link, list_df, tf)
+
+
+# get questionnaire info in to use as column names later
+responses <- responses %>%
+  left_join(y = sheet_questions, by = "question") %>%
+  mutate(qq = paste0(questionnaire, " - ", question)) %>%
+  # remove duplicates
+  distinct()
+
+
+# see if have unique combo of rows so can pivot_wider safely
+responses %>%
+  group_by(pupil_id, pupil_impacted_id, measurement_date, qq) %>%
+  summarise(count = n()) %>%
+  filter(count > 1)
+# have duplicates, here's an example
+responses %>%
+  filter(pupil_id == "100165", measurement_date == "2020-09-09", qq == "207_10 - I have felt like I have missed important school work")
+
+# partition these duplicate responses with a row number; allocation of this is random
+# this seems the best we can do
+responses <- responses %>%
+  select(pupil_id:measurement_date, qq, response) %>%
+  group_by(pupil_id, pupil_impacted_id, measurement_date, qq) %>%
+  mutate(rank = row_number()) %>%
+  arrange(pupil_id, pupil_impacted_id, measurement_date, qq)
+
+# isolate unique records
+responses_dedupe <- filter(.data = responses, rank == 1)
+
+# pivot wider for ImpactEd's purposes
+df_output <- responses_dedupe %>%
+  pivot_wider(
+    id_cols = c(pupil_id, pupil_impacted_id, measurement_date),
+    names_from = "qq",
+    values_from = "response"
+  ) %>%
+  arrange(pupil_id, measurement_date)
+write_csv(x = df_output, file = "data/processed/questionnaires_linked.csv")
diff --git a/src/utils/read_responses.R b/src/utils/read_responses.R
new file mode 100644
index 0000000..87fc285
--- /dev/null
+++ b/src/utils/read_responses.R
@@ -0,0 +1,27 @@
+library(readr)
+library(dplyr)
+library(tidyr)
+library(stringr)
+
+
+# from Duncan - function for importing
+read_responses <- function(file) {
+  read_csv(file, col_types = cols(.default = col_character())) %>%
+    pivot_longer(-c(pupil_id, pupil_impacted_id, measurement_date),
+      names_to = "question", values_to = "response"
+    ) %>%
+    # Deal with duplicated questions.
+    # Three pupils in questionnaire 186 have a set of columns to themselves, so
+    # we merge them back in by:
+    #
+    # 1. filtering out everyone's blank responses.  That means that most pupils
+    #    only have responses to the first instance of each question, and the
+    #    three affected pupils only have responses to the second instance of
+    #    each question.
+    # 2. dropping the suffix that readr automatically adds to the second
+    #   instance of each question.  Now all pupils are the same.
+    #
+    # instance of each column.
+    filter(!is.na(response)) %>%
+    mutate(question = str_remove(question, "_\\d+$"))
+}
diff --git a/surveyanalysis.Rproj b/surveyanalysis.Rproj
new file mode 100644
index 0000000..21a4da0
--- /dev/null
+++ b/surveyanalysis.Rproj
@@ -0,0 +1,17 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source