diff --git a/src/Stack/Script.hs b/src/Stack/Script.hs index 95d9b0e968..e2fbafe292 100644 --- a/src/Stack/Script.hs +++ b/src/Stack/Script.hs @@ -11,7 +11,7 @@ import qualified Data.Conduit.List as CL import Data.List.Split (splitWhen) import qualified Data.Map.Strict as Map import qualified Data.Set as Set -import qualified Distribution.PackageDescription as PD +import Data.Tuple (swap) import Distribution.Types.PackageName (mkPackageName) import Path import Path.IO @@ -241,14 +241,20 @@ getModuleInfo = do installedDeps = toModuleInfo notHiddenDeps snapshotDumpPkgs dumpPkgs = Set.fromList $ map (pkgName . dpPackageIdent) snapshotDumpPkgs notInstalledDeps = Map.withoutKeys notHiddenDeps dumpPkgs - otherDeps <- liftIO $ - fmap (Map.fromListWith mappend . concat) $ - forM (Map.toList notInstalledDeps) $ \(pname, dep) -> do - gpd <- cpGPD (dpCommon dep) - let modules = maybe [] PD.exposedModules $ - maybe (PD.library $ PD.packageDescription gpd) (Just . PD.condTreeData) $ - PD.condLibrary gpd + eitherMutable (pname, dep) = case dpLocation dep of + PLMutable _ -> Right (pname, dep) + PLImmutable pli -> Left (pname, pli) + (notInstImmutable, notInstMutable) = partitionEithers $ + map eitherMutable (Map.toList notInstalledDeps) + otherMutable <- forM notInstMutable $ \(pname, dep) -> liftIO $ do + modules <- allExposedModules <$> cpGPD (dpCommon dep) return [ (m, Set.singleton pname) | m <- modules ] + immutableModules <- exposedPackageModules (map snd notInstImmutable) + let immutablePnames = Map.fromList $ map swap notInstImmutable + otherImmutable = flip mapMaybe immutableModules $ \(loc, modules) -> do + pname <- Map.lookup loc immutablePnames + return [ (m, Set.singleton pname) | m <- modules ] + otherDeps = Map.fromListWith mappend $ concat (otherMutable ++ otherImmutable) return $ globals <> installedDeps <> ModuleInfo otherDeps where notHidden = Map.filter (not . dpHidden) diff --git a/subs/pantry/package.yaml b/subs/pantry/package.yaml index a526245211..a785880e0f 100644 --- a/subs/pantry/package.yaml +++ b/subs/pantry/package.yaml @@ -62,6 +62,7 @@ dependencies: - resourcet - rio-prettyprint - mtl +- extra # FIXME remove when we drop store - integer-gmp diff --git a/subs/pantry/src/Pantry.hs b/subs/pantry/src/Pantry.hs index de1242f393..037ac9358a 100644 --- a/subs/pantry/src/Pantry.hs +++ b/subs/pantry/src/Pantry.hs @@ -160,6 +160,8 @@ module Pantry , getHackageTypoCorrections , loadGlobalHints , partitionReplacedDependencies + , exposedPackageModules + , allExposedModules ) where import RIO @@ -180,6 +182,7 @@ import Pantry.Types import Pantry.Hackage import Path (Path, Abs, File, toFilePath, Dir, (), filename, parseAbsDir, parent, parseRelFile) import Path.IO (doesFileExist, resolveDir', listDir) +import Distribution.ModuleName (ModuleName) import Distribution.PackageDescription (GenericPackageDescription, FlagName) import qualified Distribution.PackageDescription as D import Distribution.Parsec.Common (PWarning (..), showPos) @@ -384,38 +387,36 @@ loadCabalFileImmutable :: (HasPantryConfig env, HasLogFunc env, HasProcessContext env) => PackageLocationImmutable -> RIO env GenericPackageDescription -loadCabalFileImmutable loc = withCache $ do +loadCabalFileImmutable loc = cachedCabalFile rawLoc $ do logDebug $ "Parsing cabal file for " <> display loc bs <- loadCabalFileBytes loc - let foundCabalKey = BlobKey (SHA256.hashBytes bs) (FileSize (fromIntegral (B.length bs))) - (_warnings, gpd) <- rawParseGPD (Left $ toRawPLI loc) bs - let pm = + let rpm = case loc of - PLIHackage (PackageIdentifier name version) cfHash mtree -> PackageMetadata - { pmIdent = PackageIdentifier name version - , pmTreeKey = mtree - , pmCabal = cfHash + PLIHackage (PackageIdentifier name version) cfKey treeKey -> RawPackageMetadata + { rpmName = Just name + , rpmVersion = Just version + , rpmTreeKey = Just treeKey + , rpmCabal = Just cfKey } - PLIArchive _ pm' -> pm' - PLIRepo _ pm' -> pm' - let exc = MismatchedPackageMetadata (toRawPLI loc) (toRawPM pm) Nothing - foundCabalKey (gpdPackageIdentifier gpd) - PackageIdentifier name ver = pmIdent pm - maybe (throwIO exc) pure $ do - guard $ name == gpdPackageName gpd - guard $ ver == gpdVersion gpd - guard $ pmCabal pm == foundCabalKey - pure gpd + PLIArchive _ pm -> toRawPM pm + PLIRepo _ pm -> toRawPM pm + parseAndCheckGPD rawLoc rpm bs where - withCache inner = do - let rawLoc = toRawPLI loc - ref <- view $ pantryConfigL.to pcParsedCabalFilesRawImmutable - m0 <- readIORef ref - case Map.lookup rawLoc m0 of - Just x -> pure x - Nothing -> do - x <- inner - atomicModifyIORef' ref $ \m -> (Map.insert rawLoc x m, x) + rawLoc = toRawPLI loc + +cachedCabalFile :: + (HasPantryConfig env) + => RawPackageLocationImmutable + -> RIO env GenericPackageDescription + -> RIO env GenericPackageDescription +cachedCabalFile rawLoc inner = do + ref <- view $ pantryConfigL.to pcParsedCabalFilesRawImmutable + m0 <- readIORef ref + case Map.lookup rawLoc m0 of + Just x -> pure x + Nothing -> do + x <- inner + atomicModifyIORef' ref $ \m -> (Map.insert rawLoc x m, x) -- | Load the cabal file for the given 'RawPackageLocationImmutable'. -- @@ -430,11 +431,9 @@ loadCabalFileRawImmutable :: (HasPantryConfig env, HasLogFunc env, HasProcessContext env) => RawPackageLocationImmutable -> RIO env GenericPackageDescription -loadCabalFileRawImmutable loc = withCache $ do +loadCabalFileRawImmutable loc = cachedCabalFile loc $ do logDebug $ "Parsing cabal file for " <> display loc bs <- loadRawCabalFileBytes loc - let foundCabalKey = BlobKey (SHA256.hashBytes bs) (FileSize (fromIntegral (B.length bs))) - (_warnings, gpd) <- rawParseGPD (Left loc) bs let rpm = case loc of RPLIHackage (PackageIdentifierRevision name version cfi) mtree -> RawPackageMetadata @@ -448,21 +447,23 @@ loadCabalFileRawImmutable loc = withCache $ do } RPLIArchive _ rpm' -> rpm' RPLIRepo _ rpm' -> rpm' - let exc = MismatchedPackageMetadata loc rpm Nothing foundCabalKey (gpdPackageIdentifier gpd) + parseAndCheckGPD loc rpm bs + +parseAndCheckGPD :: + RawPackageLocationImmutable + -> RawPackageMetadata + -> ByteString + -> RIO env GenericPackageDescription +parseAndCheckGPD rawLoc rpm bs = do + let foundCabalKey = BlobKey (SHA256.hashBytes bs) (FileSize (fromIntegral (B.length bs))) + (_warnings, gpd) <- rawParseGPD (Left rawLoc) bs + let exc = MismatchedPackageMetadata rawLoc rpm Nothing + foundCabalKey (gpdPackageIdentifier gpd) maybe (throwIO exc) pure $ do guard $ maybe True (== gpdPackageName gpd) (rpmName rpm) guard $ maybe True (== gpdVersion gpd) (rpmVersion rpm) guard $ maybe True (== foundCabalKey) (rpmCabal rpm) pure gpd - where - withCache inner = do - ref <- view $ pantryConfigL.to pcParsedCabalFilesRawImmutable - m0 <- readIORef ref - case Map.lookup loc m0 of - Just x -> pure x - Nothing -> do - x <- inner - atomicModifyIORef' ref $ \m -> (Map.insert loc x m, x) -- | Same as 'loadCabalFileRawImmutable', but takes a -- 'RawPackageLocation'. Never prints warnings, see 'loadCabalFilePath' @@ -1501,3 +1502,35 @@ prunePackageWithDeps pkgs getName getDeps (pname, a) = do else do modify' $ first (Map.insert pname prunedDeps) return $ not (null prunedDeps) + +exposedPackageModules :: + (HasPantryConfig env, HasLogFunc env, HasProcessContext env) + => [PackageLocationImmutable] + -> RIO env [(PackageLocationImmutable, [ModuleName])] +exposedPackageModules locs = do + let eitherHackage (PLIHackage pident cfKey treeKey) = Right (pident, cfKey, treeKey) + eitherHackage otherLoc = Left otherLoc + (otherLocs, hkgLocs) = partitionEithers $ map eitherHackage locs + hkgModules <- getHackagePackagesExposedModules hkgLocs $ \bs (pident, cfKey, treeKey) -> do + let loc = PLIHackage pident cfKey treeKey + rawLoc = toRawPLI loc + PackageIdentifier name version = pident + rpm = RawPackageMetadata + { rpmName = Just name + , rpmVersion = Just version + , rpmTreeKey = Just treeKey + , rpmCabal = Just cfKey + } + gpd <- cachedCabalFile rawLoc $ parseAndCheckGPD rawLoc rpm bs + return $ allExposedModules gpd + otherModules <- forM otherLocs $ \loc -> do + gpd <- loadCabalFileImmutable loc + return (loc, allExposedModules gpd) + let hkgPLI (pident, cfKey, treeKey) = PLIHackage pident cfKey treeKey + return $ map (first hkgPLI) hkgModules ++ otherModules + +allExposedModules :: GenericPackageDescription -> [ModuleName] +allExposedModules gpd | Just lib <- D.condTreeData <$> D.condLibrary gpd = + D.exposedModules lib ++ + map D.moduleReexportName (D.reexportedModules lib) + | otherwise = mempty diff --git a/subs/pantry/src/Pantry/Hackage.hs b/subs/pantry/src/Pantry/Hackage.hs index 4286c7da75..bd93e8c5ab 100644 --- a/subs/pantry/src/Pantry/Hackage.hs +++ b/subs/pantry/src/Pantry/Hackage.hs @@ -9,6 +9,7 @@ module Pantry.Hackage , getHackageTarball , getHackageTarballKey , getHackageCabalFile + , getHackagePackagesExposedModules , getHackagePackageVersions , getHackagePackageVersionRevisions , getHackageTypoCorrections @@ -20,9 +21,12 @@ import RIO.Process import Data.Aeson import Conduit import Data.Conduit.Tar +import Data.List.Extra (groupSort) import qualified RIO.Text as T import qualified RIO.Map as Map +import qualified RIO.Set as Set import Data.Text.Unsafe (unsafeTail) +import Data.Tuple (swap) import qualified RIO.ByteString as B import qualified RIO.ByteString.Lazy as BL import Pantry.Archive @@ -38,6 +42,7 @@ import qualified Distribution.PackageDescription as Cabal import System.IO (SeekMode (..)) import qualified Data.List.NonEmpty as NE import Data.Text.Metrics (damerauLevenshtein) +import Distribution.ModuleName (ModuleName) import Distribution.Types.Version (versionNumbers) import Distribution.Types.VersionRange (withinRange) @@ -292,6 +297,42 @@ getHackageCabalFile pir@(PackageIdentifierRevision _ _ cfi) = do _ -> pure () pure bs +type HackageLocation = (PackageIdentifier, BlobKey, TreeKey) + +getHackagePackagesExposedModules + :: (HasPantryConfig env, HasLogFunc env) + => [HackageLocation] + -> (ByteString -> HackageLocation -> RIO env [ModuleName]) + -> RIO env [(HackageLocation, [ModuleName])] +getHackagePackagesExposedModules hkgLocs extractModules = do + locsWithBids <- forM hkgLocs $ \hkgLoc -> do + let (pident, BlobKey sha size, _) = hkgLoc + PackageIdentifier name ver = pident + bid <- resolveCabalFileInfo $ + PackageIdentifierRevision name ver (CFIHash sha (Just size)) + return (hkgLoc, bid) + cachedBids <- withStorage $ loadHasCachedExposedModules (map snd locsWithBids) + let cachedSet = Set.fromList cachedBids + notCached = flip mapMaybe locsWithBids $ \(hkgLoc, bid) -> do + if bid `Set.member` cachedSet + then Nothing + else Just (hkgLoc, bid) + cachedWithBids <- withStorage $ loadHackageExposedModules cachedBids + let locByBid = Map.fromList $ map swap locsWithBids + modulesByBid = groupSort cachedWithBids + cachedModules = flip mapMaybe modulesByBid $ \(bid, ms) -> do + loc <- Map.lookup bid locByBid + pure (loc, ms) + extracted <- for notCached $ \(hkgLoc, bid) -> do + bs <- withStorage $ loadBlobById bid + modules <- extractModules bs hkgLoc + return (map (\m -> (bid, m)) modules, (hkgLoc, modules)) + withStorage $ do + storeExposedModules (concatMap fst extracted) + storeHasCachedExposedModules (map snd notCached) + let extracedModules = map snd extracted + return $ cachedModules ++ extracedModules + resolveCabalFileInfo :: (HasPantryConfig env, HasLogFunc env) => PackageIdentifierRevision diff --git a/subs/pantry/src/Pantry/Storage.hs b/subs/pantry/src/Pantry/Storage.hs index 863e3301ed..95806a7046 100644 --- a/subs/pantry/src/Pantry/Storage.hs +++ b/subs/pantry/src/Pantry/Storage.hs @@ -47,10 +47,15 @@ module Pantry.Storage , loadCabalBlobKey , hpackToCabal , countHackageCabals + , storeExposedModules + , loadHasCachedExposedModules + , storeHasCachedExposedModules + , loadHackageExposedModules -- avoid warnings , BlobId , HackageCabalId + , HackageExposedModulesId , HackageTarballId , CacheUpdateId , FilePathId @@ -62,6 +67,7 @@ module Pantry.Storage , UrlBlobId ) where +import Data.List.Extra (chunksOf) import RIO hiding (FilePath) import RIO.Process import qualified RIO.ByteString as B @@ -134,8 +140,17 @@ HackageCabal -- If available: the full tree containing the HackageTarball -- contents with the cabal file modified. tree TreeId Maybe + -- If available should be equal to True meaning that + -- exposed modules were saved in HackageExposedModules + hasExposedModules Bool Maybe UniqueHackage name version revision +-- Cache of modules exposed by a Hackage package +HackageExposedModules + cabal BlobId + moduleName P.ModuleNameP + UniqueHackageExposedModules cabal moduleName + -- Any preferred-version information from Hackage PreferredVersions name PackageNameId @@ -379,6 +394,7 @@ storeHackageRevision name version key = do , hackageCabalRevision = Revision (fromIntegral rev) , hackageCabalCabal = key , hackageCabalTree = Nothing + , hackageCabalHasExposedModules = Nothing } loadHackagePackageVersions @@ -1023,3 +1039,55 @@ countHackageCabals = do [] -> pure 0 (Single n):_ -> pure n + +loadHasCachedExposedModules + :: (HasPantryConfig env, HasLogFunc env) + => [BlobId] + -> ReaderT SqlBackend (RIO env) [BlobId] +loadHasCachedExposedModules bids = + fmap concat $ forM (chunksOf sqliteListLimit bids) $ \bids' -> + mapMaybe go <$> selectList [HackageCabalCabal <-. bids'] [] + where + go e | v <- entityVal e, + Just True <- hackageCabalHasExposedModules v = + Just $ hackageCabalCabal v + | otherwise = + Nothing + +-- SQLITE_MAX_VARIABLE_NUMBER is 999 by default, +-- see https://www.sqlite.org/limits.html +sqliteListLimit :: Int +sqliteListLimit = 500 + +storeHasCachedExposedModules + :: (HasPantryConfig env, HasLogFunc env) + => [BlobId] + -> ReaderT SqlBackend (RIO env) () +storeHasCachedExposedModules bids = + forM_ (chunksOf sqliteListLimit bids) $ \bids' -> + updateWhere [HackageCabalCabal <-. bids'] + [HackageCabalHasExposedModules =. Just True] + +storeExposedModules + :: (HasPantryConfig env, HasLogFunc env) + => [(BlobId, P.ModuleName)] + -> ReaderT SqlBackend (RIO env) () +storeExposedModules ms = + forM_ ms $ \(bid, m) -> + rawExecute + "INSERT OR IGNORE INTO hackage_exposed_modules(cabal, module_name)\n\ + \VALUES (?, ?)" + [toPersistValue bid, toPersistValue (P.ModuleNameP m)] + +loadHackageExposedModules + :: (HasPantryConfig env, HasLogFunc env) + => [BlobId] + -> ReaderT SqlBackend (RIO env) [(BlobId, P.ModuleName)] +loadHackageExposedModules bids = + fmap concat $ forM (chunksOf sqliteListLimit bids) $ \bids' -> + map go <$> selectList [HackageExposedModulesCabal <-. bids'] [] + where + go e = + let v = entityVal e + P.ModuleNameP m = hackageExposedModulesModuleName v + in (hackageExposedModulesCabal v, m) diff --git a/subs/pantry/src/Pantry/Types.hs b/subs/pantry/src/Pantry/Types.hs index 67d49e7c3d..bd150fbf73 100644 --- a/subs/pantry/src/Pantry/Types.hs +++ b/subs/pantry/src/Pantry/Types.hs @@ -21,10 +21,12 @@ module Pantry.Types , Version , PackageIdentifier (..) , Revision (..) + , ModuleName , CabalFileInfo (..) , PrintWarnings (..) , PackageNameP (..) , VersionP (..) + , ModuleNameP (..) , PackageIdentifierRevision (..) , pirForHash , FileType (..) @@ -608,6 +610,18 @@ instance PersistField VersionP where instance PersistFieldSql VersionP where sqlType _ = SqlString +newtype ModuleNameP = ModuleNameP ModuleName + deriving (Show) +instance PersistField ModuleNameP where + toPersistValue (ModuleNameP mn) = PersistText $ T.pack $ moduleNameString mn + fromPersistValue v = do + str <- fromPersistValue v + case parseModuleName str of + Nothing -> Left $ "Invalid module name: " <> T.pack str + Just pn -> Right $ ModuleNameP pn +instance PersistFieldSql ModuleNameP where + sqlType _ = SqlString + -- | How to choose a cabal file for a package from Hackage. This is to -- work with Hackage cabal file revisions, which makes -- @PackageIdentifier@ insufficient for specifying a package from @@ -1225,6 +1239,12 @@ parseVersionThrowing str = parseVersionRange :: String -> Maybe VersionRange parseVersionRange = Distribution.Text.simpleParse +-- | Parse a module name from a 'String'. +-- +-- @since 0.1.0.0 +parseModuleName :: String -> Maybe ModuleName +parseModuleName = Distribution.Text.simpleParse + -- | Parse a flag name from a 'String'. -- -- @since 0.1.0.0