From ed222169269d0608d9ffb67a063547f78241996e Mon Sep 17 00:00:00 2001 From: Chet Husk Date: Sun, 13 Jan 2019 12:32:23 -0600 Subject: [PATCH 1/5] chunkify TcSymbolUseData --- src/absil/illib.fs | 41 ++++++++++++++++++++++++++++++++++- src/fsharp/NameResolution.fs | 18 +++++++++------ src/fsharp/NameResolution.fsi | 4 ++-- src/fsharp/service/service.fs | 6 +++-- 4 files changed, 57 insertions(+), 12 deletions(-) diff --git a/src/absil/illib.fs b/src/absil/illib.fs index 69b0030f910..6fa80da4e05 100644 --- a/src/absil/illib.fs +++ b/src/absil/illib.fs @@ -91,7 +91,7 @@ module Order = let toFunction (pxOrder: IComparer<'U>) x y = pxOrder.Compare(x,y) //------------------------------------------------------------------------- -// Library: arrays,lists,options +// Library: arrays,lists,options,resizearrays //------------------------------------------------------------------------- module Array = @@ -432,6 +432,45 @@ module List = let existsSquared f xss = xss |> List.exists (fun xs -> xs |> List.exists (fun x -> f x)) let mapiFoldSquared f z xss = mapFoldSquared f z (xss |> mapiSquared (fun i j x -> (i,j,x))) +module ResizeArray = + + /// Split a ResizeArray into an array of smaller chunks. + /// This requires `items/chunkSize` Array copies of length `chunkSize` if `items/chunkSize % 0 = 0`, + /// otherwise `items/chunkSize + 1` Array copies. + let chunkBySize chunkSize (items: ResizeArray<'t>) = + // we could use Seq.chunkBySize here, but that would involve many enumerator.MoveNext() calls that we can sidestep with a bit of math + let itemCount = items.Count + if itemCount = 0 + then [||] + else + let chunksCount = + match itemCount / chunkSize with + | n when itemCount % chunkSize = 0 -> n + | n -> n + 1 // any remainder means we need an additional chunk to store it + + [| for index in 0..chunksCount-1 do + let startIndex = index * chunkSize + let takeCount = min (itemCount - startIndex) chunkSize + + // pre-creating the array and doing a single copy is slightly better than + // two copies via ResizeArray<'t>.GetRange(start, count).ToArray(). + let holder = Array.zeroCreate takeCount + items.CopyTo(startIndex, holder, 0, takeCount) + yield holder |] + + /// Split a large ResizeArray into a series of array chunks that are each under the + /// Large Object Heap limit, in order to prevent the entire array from not being garbage-collected. + let mapToSmallArrayChunks f (inp: ResizeArray<'t>) = + let itemSizeBytes = sizeof<'t> + let lohSizeThresholdBytes = 85_000 + // rounding down here is good because it ensures we don't go over + let maxArrayItemCount = lohSizeThresholdBytes / itemSizeBytes + + /// chunk the provided input into arrays that are smaller than the LOH limit + /// in order to prevent long-term storage of those values + chunkBySize maxArrayItemCount inp + |> Array.map (Array.map f) + /// Because FSharp.Compiler.Service is a library that will target FSharp.Core 4.5.2 for the forseeable future, /// we need to stick these functions in this module rather than using the module functions for ValueOption /// that come after FSharp.Core 4.5.2. diff --git a/src/fsharp/NameResolution.fs b/src/fsharp/NameResolution.fs index 43c15418490..98799b9be32 100644 --- a/src/fsharp/NameResolution.fs +++ b/src/fsharp/NameResolution.fs @@ -1212,7 +1212,7 @@ let GetNestedTypesOfType (ad, ncenv:NameResolver, optFilter, staticResInfo, chec //------------------------------------------------------------------------- /// Represents the kind of the occurrence when reporting a name in name resolution -[] +[] type ItemOccurence = /// This is a binding / declaration of the item | Binding @@ -1496,17 +1496,21 @@ type TcSymbolUseData = /// This is a memory-critical data structure - allocations of this data structure and its immediate contents /// is one of the highest memory long-lived data structures in typical uses of IDEs. Not many of these objects /// are allocated (one per file), but they are large because the allUsesOfAllSymbols array is large. -type TcSymbolUses(g, capturedNameResolutions : ResizeArray, formatSpecifierLocations: (range * int)[]) = - +type TcSymbolUses(g, capturedNameResolutions : ResizeArray, formatSpecifierLocations: (range * int)[]) = + // Make sure we only capture the information we really need to report symbol uses - let allUsesOfSymbols = [| for cnr in capturedNameResolutions -> { Item=cnr.Item; ItemOccurence=cnr.ItemOccurence; DisplayEnv=cnr.DisplayEnv; Range=cnr.Range } |] + let allUsesOfSymbols = + capturedNameResolutions + |> ResizeArray.mapToSmallArrayChunks (fun cnr -> { Item=cnr.Item; ItemOccurence=cnr.ItemOccurence; DisplayEnv=cnr.DisplayEnv; Range=cnr.Range }) + let capturedNameResolutions = () do ignore capturedNameResolutions // don't capture this! member this.GetUsesOfSymbol(item) = - [| for symbolUse in allUsesOfSymbols do - if protectAssemblyExploration false (fun () -> ItemsAreEffectivelyEqual g item symbolUse.Item) then - yield symbolUse |] + [| for symbolUseChunk in allUsesOfSymbols do + for symbolUse in symbolUseChunk do + if protectAssemblyExploration false (fun () -> ItemsAreEffectivelyEqual g item symbolUse.Item) then + yield symbolUse |] member this.AllUsesOfSymbols = allUsesOfSymbols diff --git a/src/fsharp/NameResolution.fsi b/src/fsharp/NameResolution.fsi index 10fe03ff0b1..a302e22ef17 100755 --- a/src/fsharp/NameResolution.fsi +++ b/src/fsharp/NameResolution.fsi @@ -240,7 +240,7 @@ type TypeNameResolutionInfo = static member ResolveToTypeRefs : TypeNameResolutionStaticArgsInfo -> TypeNameResolutionInfo /// Represents the kind of the occurrence when reporting a name in name resolution -[] +[] type internal ItemOccurence = | Binding | Use @@ -319,7 +319,7 @@ type internal TcSymbolUses = member GetUsesOfSymbol : Item -> TcSymbolUseData[] /// All the uses of all items within the file - member AllUsesOfSymbols : TcSymbolUseData[] + member AllUsesOfSymbols : TcSymbolUseData[][] /// Get the locations of all the printf format specifiers in the file member GetFormatSpecifierLocationsAndArity : unit -> (range * int)[] diff --git a/src/fsharp/service/service.fs b/src/fsharp/service/service.fs index d8eb7442b4a..69be07463f0 100644 --- a/src/fsharp/service/service.fs +++ b/src/fsharp/service/service.fs @@ -1913,7 +1913,8 @@ type FSharpCheckProjectResults(projectFileName:string, tcConfigOption, keepAssem let cenv = SymbolEnv(tcGlobals, thisCcu, Some ccuSig, tcImports) [| for r in tcSymbolUses do - for symbolUse in r.AllUsesOfSymbols do + for symbolUseChunk in r.AllUsesOfSymbols do + for symbolUse in symbolUseChunk do if symbolUse.ItemOccurence <> ItemOccurence.RelatedText then let symbol = FSharpSymbol.Create(cenv, symbolUse.Item) yield FSharpSymbolUse(tcGlobals, symbolUse.DisplayEnv, symbol, symbolUse.ItemOccurence, symbolUse.Range) |] @@ -2107,7 +2108,8 @@ type FSharpCheckFileResults(filename: string, errors: FSharpErrorInfo[], scopeOp (fun () -> [| |]) (fun scope -> let cenv = scope.SymbolEnv - [| for symbolUse in scope.ScopeSymbolUses.AllUsesOfSymbols do + [| for symbolUseChunk in scope.ScopeSymbolUses.AllUsesOfSymbols do + for symbolUse in symbolUseChunk do if symbolUse.ItemOccurence <> ItemOccurence.RelatedText then let symbol = FSharpSymbol.Create(cenv, symbolUse.Item) yield FSharpSymbolUse(scope.TcGlobals, symbolUse.DisplayEnv, symbol, symbolUse.ItemOccurence, symbolUse.Range) |]) From ea38f3fad890a6fd56dd7b09b4e40dd78736d1a9 Mon Sep 17 00:00:00 2001 From: Chet Husk Date: Mon, 14 Jan 2019 10:39:28 -0600 Subject: [PATCH 2/5] move LOH size out to a constant --- src/absil/illib.fs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/absil/illib.fs b/src/absil/illib.fs index 6fa80da4e05..a8284977050 100644 --- a/src/absil/illib.fs +++ b/src/absil/illib.fs @@ -41,6 +41,10 @@ let inline isNonNull x = not (isNull x) let inline nonNull msg x = if isNull x then failwith ("null: " + msg) else x let inline (===) x y = LanguagePrimitives.PhysicalEquality x y +/// Per the docs the threshold for the Large Object Heap is 85000 bytes: https://docs.microsoft.com/en-us/dotnet/standard/garbage-collection/large-object-heap#how-an-object-ends-up-on-the-large-object-heap-and-how-gc-handles-them +/// We set the limit to slightly under that to allow for some 'slop' +let LOH_SIZE_THRESHOLD_BYTES = 84_900 + //--------------------------------------------------------------------- // Library: ReportTime //--------------------------------------------------------------------- @@ -462,15 +466,15 @@ module ResizeArray = /// Large Object Heap limit, in order to prevent the entire array from not being garbage-collected. let mapToSmallArrayChunks f (inp: ResizeArray<'t>) = let itemSizeBytes = sizeof<'t> - let lohSizeThresholdBytes = 85_000 // rounding down here is good because it ensures we don't go over - let maxArrayItemCount = lohSizeThresholdBytes / itemSizeBytes + let maxArrayItemCount = LOH_SIZE_THRESHOLD_BYTES / itemSizeBytes /// chunk the provided input into arrays that are smaller than the LOH limit /// in order to prevent long-term storage of those values chunkBySize maxArrayItemCount inp |> Array.map (Array.map f) + /// Because FSharp.Compiler.Service is a library that will target FSharp.Core 4.5.2 for the forseeable future, /// we need to stick these functions in this module rather than using the module functions for ValueOption /// that come after FSharp.Core 4.5.2. From cda7242ee7e3ba1f13febdc1d0cd10717620afac Mon Sep 17 00:00:00 2001 From: Chet Husk Date: Mon, 14 Jan 2019 12:12:56 -0600 Subject: [PATCH 3/5] do chunking and mapping together to reduce allocations --- src/absil/illib.fs | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/absil/illib.fs b/src/absil/illib.fs index a8284977050..ac6010717ba 100644 --- a/src/absil/illib.fs +++ b/src/absil/illib.fs @@ -441,7 +441,7 @@ module ResizeArray = /// Split a ResizeArray into an array of smaller chunks. /// This requires `items/chunkSize` Array copies of length `chunkSize` if `items/chunkSize % 0 = 0`, /// otherwise `items/chunkSize + 1` Array copies. - let chunkBySize chunkSize (items: ResizeArray<'t>) = + let chunkBySize chunkSize f (items: ResizeArray<'t>) = // we could use Seq.chunkBySize here, but that would involve many enumerator.MoveNext() calls that we can sidestep with a bit of math let itemCount = items.Count if itemCount = 0 @@ -456,10 +456,14 @@ module ResizeArray = let startIndex = index * chunkSize let takeCount = min (itemCount - startIndex) chunkSize - // pre-creating the array and doing a single copy is slightly better than - // two copies via ResizeArray<'t>.GetRange(start, count).ToArray(). let holder = Array.zeroCreate takeCount - items.CopyTo(startIndex, holder, 0, takeCount) + // we take a bounds-check hit here on each access. + // other alternatives here include + // * iterating across an IEnumerator (incurs MoveNext penalty) + // * doing a block copy using `List.CopyTo(index, array, index, count)` (requires more copies to do the mapping) + // none are significantly better. + for i in 0 .. takeCount - 1 do + holder.[i] <- f items.[i] yield holder |] /// Split a large ResizeArray into a series of array chunks that are each under the @@ -471,8 +475,7 @@ module ResizeArray = /// chunk the provided input into arrays that are smaller than the LOH limit /// in order to prevent long-term storage of those values - chunkBySize maxArrayItemCount inp - |> Array.map (Array.map f) + chunkBySize maxArrayItemCount f inp /// Because FSharp.Compiler.Service is a library that will target FSharp.Core 4.5.2 for the forseeable future, From 959ee30fbcafb2a738d9c23f04e57822a4067c99 Mon Sep 17 00:00:00 2001 From: Chet Husk Date: Tue, 15 Jan 2019 11:03:04 -0600 Subject: [PATCH 4/5] clarify comment around GC impacts --- src/absil/illib.fs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/absil/illib.fs b/src/absil/illib.fs index ac6010717ba..44261606387 100644 --- a/src/absil/illib.fs +++ b/src/absil/illib.fs @@ -466,8 +466,9 @@ module ResizeArray = holder.[i] <- f items.[i] yield holder |] - /// Split a large ResizeArray into a series of array chunks that are each under the - /// Large Object Heap limit, in order to prevent the entire array from not being garbage-collected. + /// Split a large ResizeArray into a series of array chunks that are each under the Large Object Heap limit. + /// This is done to help prevent a stop-the-world collection of the single large array, instead allowing for a greater + /// probability of smaller collections. Stop-the-world is still possible, just less likely. let mapToSmallArrayChunks f (inp: ResizeArray<'t>) = let itemSizeBytes = sizeof<'t> // rounding down here is good because it ensures we don't go over From 161e94ec49806c827a80009722451f2308705f2c Mon Sep 17 00:00:00 2001 From: Chet Husk Date: Thu, 17 Jan 2019 17:15:08 -0600 Subject: [PATCH 5/5] add comment informing others of the potential for LOH allocations --- src/fsharp/NameResolution.fs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/fsharp/NameResolution.fs b/src/fsharp/NameResolution.fs index 98799b9be32..9434740d314 100644 --- a/src/fsharp/NameResolution.fs +++ b/src/fsharp/NameResolution.fs @@ -1507,6 +1507,9 @@ type TcSymbolUses(g, capturedNameResolutions : ResizeArray ItemsAreEffectivelyEqual g item symbolUse.Item) then