From e5b1ecc6540d5b098c186bca0201ffc9ea482dae Mon Sep 17 00:00:00 2001 From: Martin Odersky Date: Sun, 23 Aug 2020 11:21:29 +0200 Subject: [PATCH 1/5] Instrument collection sizes Allow to record total sizes of collections on which some selected operation is performed. # Conflicts: # compiler/src/dotty/tools/dotc/transform/Instrumentation.scala --- .../dotty/tools/dotc/core/Definitions.scala | 2 - .../dotc/transform/Instrumentation.scala | 47 ++++++++++++++----- .../src/dotty/tools/dotc/util/Stats.scala | 4 ++ 3 files changed, 40 insertions(+), 13 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/core/Definitions.scala b/compiler/src/dotty/tools/dotc/core/Definitions.scala index ff731f171dc4..7ee845525eef 100644 --- a/compiler/src/dotty/tools/dotc/core/Definitions.scala +++ b/compiler/src/dotty/tools/dotc/core/Definitions.scala @@ -845,8 +845,6 @@ class Definitions { @tu lazy val Not_value: Symbol = NotClass.companionModule.requiredMethod(nme.value) @tu lazy val ValueOfClass: ClassSymbol = requiredClass("scala.ValueOf") - @tu lazy val StatsModule: Symbol = requiredModule("dotty.tools.dotc.util.Stats") - @tu lazy val Stats_doRecord: Symbol = StatsModule.requiredMethod("doRecord") @tu lazy val FromDigitsClass: ClassSymbol = requiredClass("scala.util.FromDigits") @tu lazy val FromDigits_WithRadixClass: ClassSymbol = requiredClass("scala.util.FromDigits.WithRadix") diff --git a/compiler/src/dotty/tools/dotc/transform/Instrumentation.scala b/compiler/src/dotty/tools/dotc/transform/Instrumentation.scala index 5f8f9c91faf3..b4d2c541f2c5 100644 --- a/compiler/src/dotty/tools/dotc/transform/Instrumentation.scala +++ b/compiler/src/dotty/tools/dotc/transform/Instrumentation.scala @@ -1,4 +1,5 @@ -package dotty.tools.dotc +package dotty.tools +package dotc package transform import core._ @@ -27,25 +28,49 @@ class Instrumentation extends MiniPhase { thisPhase => override def isEnabled(using Context) = ctx.settings.Yinstrument.value - private val namesOfInterest = List( - "::", "+=", "toString", "newArray", "box", "toCharArray", - "map", "flatMap", "filter", "withFilter", "collect", "foldLeft", "foldRight", "take", - "reverse", "mapConserve", "mapconserve", "filterConserve", "zip", - "denotsNamed", "lookup", "lookupEntry", "lookupAll", "toList") - private var namesToRecord: Set[Name] = _ + private val collectionNamesOfInterest = List( + "map", "flatMap", "filter", "filterNot", "withFilter", "collect", "flatten", "foldLeft", "foldRight", "take", + "reverse", "zip", "++", ":::", ":+", "distinct", "dropRight", "takeRight", "groupBy", "groupMap", "init", "inits", + "interect", "mkString", "partition", "reverse_:::", "scanLeft", "scanRight", + "sortBy", "sortWith", "sorted", "span", "splitAt", "takeWhile", "transpose", "unzip", "unzip3", + "updated", "zipAll", "zipWithIndex", + "mapConserve", "mapconserve", "filterConserve", "zipWithConserve", "mapWithIndexConserve" + ) + + private val namesOfInterest = collectionNamesOfInterest ++ List( + "::", "+=", "toString", "newArray", "box", "toCharArray", "termName", "typeName", + "slice", "staticRef", "requiredClass") - private var consName: TermName = _ - private var consEqName: TermName = _ + private var namesToRecord: Set[Name] = _ + private var collectionNamesToRecord: Set[Name] = _ + private var Stats_doRecord: Symbol = _ + private var Stats_doRecordSize: Symbol = _ + private var CollectionIterableClass: ClassSymbol = _ override def prepareForUnit(tree: Tree)(using Context): Context = namesToRecord = namesOfInterest.map(_.toTermName).toSet + collectionNamesToRecord = collectionNamesOfInterest.map(_.toTermName).toSet + val StatsModule = requiredModule("dotty.tools.dotc.util.Stats") + Stats_doRecord = StatsModule.requiredMethod("doRecord") + Stats_doRecordSize = StatsModule.requiredMethod("doRecordSize") + CollectionIterableClass = requiredClass("scala.collection.Iterable") ctx private def record(category: String, tree: Tree)(using Context): Tree = { val key = Literal(Constant(s"$category@${tree.sourcePos.show}")) - ref(defn.Stats_doRecord).appliedTo(key, Literal(Constant(1))) + ref(Stats_doRecord).appliedTo(key, Literal(Constant(1))) } + private def recordSize(tree: Apply)(using Context): Tree = tree.fun match + case sel @ Select(qual, name) + if collectionNamesToRecord.contains(name) + && qual.tpe.widen.derivesFrom(CollectionIterableClass) => + val key = Literal(Constant(s"totalSize/${name} in ${qual.tpe.widen.classSymbol.name}@${tree.sourcePos.show}")) + val qual1 = ref(Stats_doRecordSize).appliedTo(key, qual).cast(qual.tpe.widen) + cpy.Apply(tree)(cpy.Select(sel)(qual1, name), tree.args) + case _ => + tree + private def ok(using Context) = !ctx.owner.ownersIterator.exists(_.name.toString.startsWith("Stats")) @@ -53,7 +78,7 @@ class Instrumentation extends MiniPhase { thisPhase => case Select(nu: New, _) => cpy.Block(tree)(record(i"alloc/${nu.tpe}", tree) :: Nil, tree) case ref: RefTree if namesToRecord.contains(ref.name) && ok => - cpy.Block(tree)(record(i"call/${ref.name}", tree) :: Nil, tree) + cpy.Block(tree)(record(i"call/${ref.name}", tree) :: Nil, recordSize(tree)) case _ => tree } diff --git a/compiler/src/dotty/tools/dotc/util/Stats.scala b/compiler/src/dotty/tools/dotc/util/Stats.scala index ecbf266866eb..684083fa77ba 100644 --- a/compiler/src/dotty/tools/dotc/util/Stats.scala +++ b/compiler/src/dotty/tools/dotc/util/Stats.scala @@ -28,6 +28,10 @@ import collection.mutable hits(name) += n } + def doRecordSize(fn: String, coll: scala.collection.Iterable[_]): coll.type = + doRecord(fn, coll.size) + coll + inline def trackTime[T](fn: String)(inline op: T): T = if (enabled) doTrackTime(fn)(op) else op From 1c09b538728b80a5d30c419c57745d46030eaecb Mon Sep 17 00:00:00 2001 From: Martin Odersky Date: Tue, 8 Sep 2020 11:18:33 +0200 Subject: [PATCH 2/5] Fix hashcodes of inner case classes in NameKinds See #9748 for why this is necessary --- compiler/src/dotty/tools/dotc/core/NameKinds.scala | 3 +++ 1 file changed, 3 insertions(+) diff --git a/compiler/src/dotty/tools/dotc/core/NameKinds.scala b/compiler/src/dotty/tools/dotc/core/NameKinds.scala index 6d3bd93f4a6d..28791b168aeb 100644 --- a/compiler/src/dotty/tools/dotc/core/NameKinds.scala +++ b/compiler/src/dotty/tools/dotc/core/NameKinds.scala @@ -124,6 +124,7 @@ object NameKinds { case class QualInfo(name: SimpleName) extends Info with QualifiedInfo { override def map(f: SimpleName => SimpleName): NameInfo = new QualInfo(f(name)) override def toString: String = s"$infoString $name" + override def hashCode = scala.runtime.ScalaRunTime._hashCode(this) * 31 + kind.hashCode } def apply(qual: TermName, name: SimpleName): TermName = @@ -173,6 +174,7 @@ object NameKinds { type ThisInfo = NumberedInfo case class NumberedInfo(val num: Int) extends Info with NameKinds.NumberedInfo { override def toString: String = s"$infoString $num" + override def hashCode = scala.runtime.ScalaRunTime._hashCode(this) * 31 + kind.hashCode } def apply(qual: TermName, num: Int): TermName = qual.derived(new NumberedInfo(num)) @@ -371,6 +373,7 @@ object NameKinds { case class SignedInfo(sig: Signature) extends Info { assert(sig ne Signature.NotAMethod) override def toString: String = s"$infoString $sig" + override def hashCode = scala.runtime.ScalaRunTime._hashCode(this) * 31 + kind.hashCode } type ThisInfo = SignedInfo From ef593392cf8b3e7d13808900f5468213864e2309 Mon Sep 17 00:00:00 2001 From: Martin Odersky Date: Tue, 8 Sep 2020 11:21:15 +0200 Subject: [PATCH 3/5] Fix growTable function for HashMap When tranisitioning from dense to hashing, we grow the table more than usual since otherwise we'd have to grow it again at the very next addEntry. The condition for this was wrong for HashMaps. --- compiler/src/dotty/tools/dotc/util/GenericHashMap.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/src/dotty/tools/dotc/util/GenericHashMap.scala b/compiler/src/dotty/tools/dotc/util/GenericHashMap.scala index 134a94e8b888..8a806bd42b7b 100644 --- a/compiler/src/dotty/tools/dotc/util/GenericHashMap.scala +++ b/compiler/src/dotty/tools/dotc/util/GenericHashMap.scala @@ -156,7 +156,7 @@ abstract class GenericHashMap[Key, Value] protected def growTable(): Unit = val oldTable = table val newLength = - if oldTable.length == DenseLimit then DenseLimit * 2 * roundToPower(capacityMultiple) + if table.length == DenseLimit * 2 then table.length * roundToPower(capacityMultiple) else table.length allocate(newLength) copyFrom(oldTable) From 3ad578e51185c6ae3ba1e3383a1697e485b89cfb Mon Sep 17 00:00:00 2001 From: Martin Odersky Date: Tue, 8 Sep 2020 16:08:55 +0200 Subject: [PATCH 4/5] Fix HashMap and HashSet remove criterion Make criterion when to fill a hole in a HashMap or HashSet remove more robust. The old criterion relied on fill factor always being less than 0.5. The new criterion works for arbitrary fill factors. --- .../tools/dotc/util/GenericHashMap.scala | 6 +- .../src/dotty/tools/dotc/util/HashSet.scala | 6 +- tests/run-with-compiler/settest.scala | 71 +++++++++++++++++++ 3 files changed, 79 insertions(+), 4 deletions(-) create mode 100644 tests/run-with-compiler/settest.scala diff --git a/compiler/src/dotty/tools/dotc/util/GenericHashMap.scala b/compiler/src/dotty/tools/dotc/util/GenericHashMap.scala index 8a806bd42b7b..f2a3f012f76c 100644 --- a/compiler/src/dotty/tools/dotc/util/GenericHashMap.scala +++ b/compiler/src/dotty/tools/dotc/util/GenericHashMap.scala @@ -111,9 +111,11 @@ abstract class GenericHashMap[Key, Value] k = keyAt(idx) k != null do + val eidx = index(hash(k)) if isDense - || index(hole - index(hash(k))) < limit * 2 - // hash(k) is then logically at or before hole; can be moved forward to fill hole + || index(eidx - (hole + 2)) > index(idx - (hole + 2)) + // entry `e` at `idx` can move unless `index(hash(e))` is in + // the (ring-)interval [hole + 2 .. idx] then setKey(hole, k) setValue(hole, valueAt(idx)) diff --git a/compiler/src/dotty/tools/dotc/util/HashSet.scala b/compiler/src/dotty/tools/dotc/util/HashSet.scala index 8d73eada970f..bd764c523d83 100644 --- a/compiler/src/dotty/tools/dotc/util/HashSet.scala +++ b/compiler/src/dotty/tools/dotc/util/HashSet.scala @@ -109,9 +109,11 @@ class HashSet[T](initialCapacity: Int = 8, capacityMultiple: Int = 2) extends Mu e = entryAt(idx) e != null do + val eidx = index(hash(e)) if isDense - || index(hole - index(hash(e))) < limit - // hash(k) is then logically at or before hole; can be moved forward to fill hole + || index(eidx - (hole + 1)) > index(idx - (hole + 1)) + // entry `e` at `idx` can move unless `index(hash(e))` is in + // the (ring-)interval [hole + 1 .. idx] then setEntry(hole, e) hole = idx diff --git a/tests/run-with-compiler/settest.scala b/tests/run-with-compiler/settest.scala new file mode 100644 index 000000000000..2d930ca3fdf9 --- /dev/null +++ b/tests/run-with-compiler/settest.scala @@ -0,0 +1,71 @@ +trait Generator[+T]: + self => + def generate: T + def map[S](f: T => S) = new Generator[S]: + def generate: S = f(self.generate) + def flatMap[S](f: T => Generator[S]) = new Generator[S]: + def generate: S = f(self.generate).generate + +object Generator: + val NumLimit = 300 + val Iterations = 10000 + + given integers as Generator[Int]: + val rand = new java.util.Random + def generate = rand.nextInt() + + given booleans as Generator[Boolean] = + integers.map(x => x > 0) + + def range(end: Int): Generator[Int] = + integers.map(x => (x % end).abs) + + enum Op: + case Lookup, Update, Remove + export Op._ + + given ops as Generator[Op] = + range(10).map { + case 0 | 1 | 2 | 3 => Lookup + case 4 | 5 | 6 | 7 => Update + case 8 | 9 => Remove + } + + val nums: Generator[Integer] = range(NumLimit).map(Integer(_)) + +@main def Test = + import Generator._ + + val set1 = dotty.tools.dotc.util.HashSet[Int]() + val set2 = scala.collection.mutable.HashSet[Int]() + + def checkSame() = + assert(set1.size == set2.size) + for e <- set1.iterator do + assert(set2.contains(e)) + for e <- set2.iterator do + assert(set1.contains(e)) + + def lookupTest(num: Integer) = + val res1 = set1.contains(num) + val res2 = set2.contains(num) + assert(res1 == res2) + + def updateTest(num: Integer) = + lookupTest(num) + set1 += num + set2 += num + checkSame() + + def removeTest(num: Integer) = + //println(s"test remove $num") + set1 -= num + set2 -= num + checkSame() + + for i <- 0 until Iterations do + val num = nums.generate + Generator.ops.generate match + case Lookup => lookupTest(num) + case Update => updateTest(num) + case Remove => removeTest(num) From daf6db70901aceda679db968b33169352e4f66bd Mon Sep 17 00:00:00 2001 From: Martin Odersky Date: Thu, 24 Sep 2020 11:40:16 +0200 Subject: [PATCH 5/5] Process hashcode bits in util.HashMap and HashSet The code was taken from scala.collection.mutable.AnyRefMap --- compiler/src/dotty/tools/dotc/util/HashMap.scala | 7 ++++++- compiler/src/dotty/tools/dotc/util/HashSet.scala | 9 +++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/compiler/src/dotty/tools/dotc/util/HashMap.scala b/compiler/src/dotty/tools/dotc/util/HashMap.scala index 7cf92817763a..dd13d9c9a0e5 100644 --- a/compiler/src/dotty/tools/dotc/util/HashMap.scala +++ b/compiler/src/dotty/tools/dotc/util/HashMap.scala @@ -11,7 +11,12 @@ extends GenericHashMap[Key, Value](initialCapacity, capacityMultiple): /** Hashcode is left-shifted by 1, so lowest bit is not lost * when taking the index. */ - final def hash(x: Key): Int = x.hashCode << 1 + final def hash(key: Key): Int = + val h = key.hashCode + // Part of the MurmurHash3 32 bit finalizer + val i = (h ^ (h >>> 16)) * 0x85EBCA6B + val j = (i ^ (i >>> 13)) & 0x7FFFFFFF + (if j==0 then 0x41081989 else j) << 1 final def isEqual(x: Key, y: Key): Boolean = x.equals(y) diff --git a/compiler/src/dotty/tools/dotc/util/HashSet.scala b/compiler/src/dotty/tools/dotc/util/HashSet.scala index bd764c523d83..73e4c91c2be0 100644 --- a/compiler/src/dotty/tools/dotc/util/HashSet.scala +++ b/compiler/src/dotty/tools/dotc/util/HashSet.scala @@ -48,8 +48,13 @@ class HashSet[T](initialCapacity: Int = 8, capacityMultiple: Int = 2) extends Mu protected def isDense = limit < DenseLimit - /** Hashcode, by defualt `x.hashCode`, can be overridden */ - protected def hash(x: T): Int = x.hashCode + /** Hashcode, by default a processed `x.hashCode`, can be overridden */ + protected def hash(key: T): Int = + val h = key.hashCode + // Part of the MurmurHash3 32 bit finalizer + val i = (h ^ (h >>> 16)) * 0x85EBCA6B + val j = (i ^ (i >>> 13)) & 0x7FFFFFFF + if j==0 then 0x41081989 else j /** Hashcode, by default `equals`, can be overridden */ protected def isEqual(x: T, y: T): Boolean = x.equals(y)