diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 37d60bfe9..334a65517 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -2,6 +2,7 @@ ## 8.1.0-beta +- Add `Http.ParseLinkHeader` utility for parsing RFC 5988 `Link` response headers (used by GitHub, GitLab, and other paginated APIs) into a `Map` from relation name to URL (closes #805) - Add `PreferDateTimeOffset` parameter to `CsvProvider`, `JsonProvider`, and `XmlProvider`: when true, date-time values without an explicit timezone offset are inferred as `DateTimeOffset` (using local offset) instead of `DateTime` (closes #1100, #1072) - Make `Http.AppendQueryToUrl` public (closes #1325) - Add `PreferOptionals` parameter to `JsonProvider` and `XmlProvider` (defaults to `true` to match existing behavior; set to `false` to use empty string or `NaN` for missing values, like the CsvProvider default) (closes #649) diff --git a/docs/library/Http.fsx b/docs/library/Http.fsx index 0bfa2c831..47c10a0ff 100644 --- a/docs/library/Http.fsx +++ b/docs/library/Http.fsx @@ -293,6 +293,47 @@ Http.Request( ) ) +(** +## Paginated APIs (RFC 5988 Link headers) + +Many REST APIs — including GitHub, GitLab, and others — use the `Link` response header +(defined by [RFC 5988](https://tools.ietf.org/html/rfc5988)) to indicate pagination URLs. +A typical `Link` header looks like this: + +``` +; rel="next", +; rel="last" +``` + +The `cref:M:FSharp.Data.Http.ParseLinkHeader` utility parses such a header into a +`Map` from relation type to URL. You can then use the result to walk through pages: +*) + +(*** do-not-eval ***) + +type Release = JsonProvider<"https://api.github.com/repos/fsprojects/FSharp.Data/releases"> + +let fetchAllReleases () = + let rec loop url acc = + let response = + Http.Request(url, headers = [ HttpRequestHeaders.UserAgent "myapp" ]) + + let items = + match response.Body with + | Text text -> Release.ParseList text + | Binary _ -> [||] + + let acc' = Array.append acc items + + match response.Headers |> Map.tryFind HttpResponseHeaders.Link with + | Some linkHeader -> + match Http.ParseLinkHeader(linkHeader) |> Map.tryFind "next" with + | Some nextUrl -> loop nextUrl acc' + | None -> acc' + | None -> acc' + + loop "https://api.github.com/repos/fsprojects/FSharp.Data/releases" [||] + (** ## Related articles diff --git a/src/FSharp.Data.Http/Http.fs b/src/FSharp.Data.Http/Http.fs index 4a1c55cb1..3b9b8ed81 100644 --- a/src/FSharp.Data.Http/Http.fs +++ b/src/FSharp.Data.Http/Http.fs @@ -1999,6 +1999,9 @@ type Http private () = static let charsetRegex = Regex("charset=([^;\s]*)", RegexOptions.Compiled) + static let linkHeaderPattern = + Regex(@"<([^>]+)>\s*;\s*rel=""([^""]+)""", RegexOptions.Compiled) + /// Correctly encodes large form data values. /// See https://blogs.msdn.microsoft.com/yangxind/2006/11/08/dont-use-net-system-uri-unescapedatastring-in-url-decoding/ /// and https://msdn.microsoft.com/en-us/library/system.uri.escapedatastring(v=vs.110).aspx @@ -2014,6 +2017,21 @@ type Http private () = + if url.IndexOf('?') >= 0 then "&" else "?" + String.concat "&" [ for k, v in query -> Uri.EscapeDataString k + "=" + Uri.EscapeDataString v ] + /// Parses an RFC 5988 Link header value (e.g. from a GitHub or other paginated API response) + /// and returns a map from relation type to URL. + /// + /// For example, given the header value: + /// <https://api.github.com/repos/.../releases?page=2>; rel="next", <...>; rel="last" + /// this returns: Map [ "next", "https://..."; "last", "https://..." ] + static member ParseLinkHeader(linkHeader: string) = + if String.IsNullOrWhiteSpace(linkHeader) then + Map.empty + else + linkHeaderPattern.Matches(linkHeader) + |> Seq.cast + |> Seq.map (fun m -> m.Groups.[2].Value, m.Groups.[1].Value) + |> Map.ofSeq + static member private InnerRequest ( url: string, diff --git a/tests/FSharp.Data.Core.Tests/Http.fs b/tests/FSharp.Data.Core.Tests/Http.fs index 118d6eb1e..61cb29b41 100644 --- a/tests/FSharp.Data.Core.Tests/Http.fs +++ b/tests/FSharp.Data.Core.Tests/Http.fs @@ -93,6 +93,35 @@ let ``AppendQueryToUrl percent-encodes special characters in keys and values`` ( Http.AppendQueryToUrl("https://example.com/search", [ "q", "hello world" ]) |> should equal "https://example.com/search?q=hello%20world" +[] +let ``ParseLinkHeader returns empty map for empty string`` () = + Http.ParseLinkHeader("") |> should equal Map.empty + +[] +let ``ParseLinkHeader parses next and last relations`` () = + let header = + "; rel=\"next\", ; rel=\"last\"" + let result = Http.ParseLinkHeader(header) + result |> Map.find "next" |> should equal "https://api.github.com/repos/octocat/hello-world/releases?page=2" + result |> Map.find "last" |> should equal "https://api.github.com/repos/octocat/hello-world/releases?page=5" + +[] +let ``ParseLinkHeader parses single relation`` () = + let header = "; rel=\"next\"" + let result = Http.ParseLinkHeader(header) + result |> Map.find "next" |> should equal "https://example.com/items?page=3" + result |> Map.containsKey "prev" |> should equal false + +[] +let ``ParseLinkHeader handles prev, next, first, last`` () = + let header = + "; rel=\"first\", ; rel=\"prev\", ; rel=\"next\", ; rel=\"last\"" + let result = Http.ParseLinkHeader(header) + result |> Map.find "first" |> should equal "https://example.com/items?page=1" + result |> Map.find "prev" |> should equal "https://example.com/items?page=2" + result |> Map.find "next" |> should equal "https://example.com/items?page=4" + result |> Map.find "last" |> should equal "https://example.com/items?page=10" + [] let ``Don't throw exceptions on http error`` () = use localServer = startHttpLocalServer()