Skip to content
5 changes: 5 additions & 0 deletions core/LocOffsets.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ class MutableContext;

constexpr int INVALID_POS_LOC = 0xfffffff;
struct LocOffsets {
template <typename H> friend H AbslHashValue(H h, const LocOffsets &m);

uint32_t beginLoc = INVALID_POS_LOC;
uint32_t endLoc = INVALID_POS_LOC;
uint32_t beginPos() const {
Expand Down Expand Up @@ -51,6 +53,9 @@ struct LocOffsets {
};
CheckSize(LocOffsets, 8, 4);

template <typename H> H AbslHashValue(H h, const LocOffsets &m) {
return H::combine(std::move(h), m.beginLoc, m.endLoc);
}
} // namespace sorbet::core

#endif // SORBET_CORE_LOCOFFSETS_H
131 changes: 96 additions & 35 deletions scip_indexer/SCIPIndexer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "cfg/CFG.h"
#include "common/common.h"
#include "common/sort.h"
#include "core/ErrorQueue.h"
#include "core/Loc.h"
#include "core/SymbolRef.h"
#include "core/Symbols.h"
Expand Down Expand Up @@ -67,11 +68,6 @@ template <typename T, typename Fn> static string vec_to_string(const vector<T> v
return out.str();
}

static string bbvec_to_string(const vector<sorbet::cfg::BasicBlock *> &parents) {
return vec_to_string(
parents, [](const auto &ptr) -> auto { return fmt::format("(id: {}, ptr: {})", ptr->id, (void *)ptr); });
};

template <typename T> static void drain(vector<T> &input, vector<T> &output) {
output.reserve(output.size() + input.size());
for (auto &v : input) {
Expand Down Expand Up @@ -109,7 +105,7 @@ static bool isTemporary(const core::GlobalState &gs, const core::LocalVariable &
}
auto n = var._name;
return n == Names::blockPreCallTemp() || n == Names::blockTemp() || n == Names::blockPassTemp() ||
n == Names::forTemp() || n == Names::blkArg() || n == Names::blockCall() ||
n == Names::blkArg() || n == Names::blockCall() || n == Names::blockBreakAssign() || n == Names::forTemp() ||
// Insert checks because sometimes temporaries are initialized with a 0 unique value. 😬
n == Names::finalReturn() || n == NameRef::noName() || n == Names::blockCall() || n == Names::selfLocal() ||
n == Names::unconditional();
Expand Down Expand Up @@ -198,6 +194,16 @@ class SCIPState {
public:
UnorderedMap<core::FileRef, vector<scip::Occurrence>> occurrenceMap;
UnorderedMap<core::FileRef, vector<scip::SymbolInformation>> symbolMap;
// Cache of occurrences for locals that have been emitted in this function.
//
// Note that the SymbolRole is a part of the key too, because we can
// have a read-reference and write-reference at the same location
// (we don't merge those for now).
//
// The 'value' in the map is purely for sanity-checking. It's a bit
// cumbersome to conditionalize the type to be a set in non-debug and
// map in debug, so keeping it a map.
UnorderedMap<std::pair<core::LocOffsets, /*SymbolRole*/ int32_t>, uint32_t> localOccurrenceCache;
vector<scip::Document> documents;
vector<scip::SymbolInformation> externalSymbols;

Expand Down Expand Up @@ -271,14 +277,52 @@ class SCIPState {
return absl::OkStatus();
}

// Returns true if there was a cache hit.
//
// Otherwise, inserts the location into the cache and returns false.
bool cacheOccurrence(const core::GlobalState &gs, core::FileRef file, OwnedLocal occ, int32_t symbolRoles) {
// Optimization:
// Avoid emitting duplicate defs/refs for locals.
// This can happen with constructs like:
// z = if cond then expr else expr end
// When this is lowered to a CFG, we will end up with
// multiple bindings with the same LHS location.
//
// Repeated reads for the same occurrence can happen for rvalues too.
// If the compiler has proof that a scrutinee variable is not modified,
// each comparison in a case will perform a read.
// case x
// when 0 then <stuff>
// when 1 then <stuff>
// else <stuff>
// end
// The CFG for this involves two reads for x in calls to ==.
// (This wouldn't have happened if x was always stashed away into
// a temporary first, but the temporary only appears in the CFG if
// evaluating one of the cases has a chance to modify x.)
auto [it, inserted] = this->localOccurrenceCache.insert({{occ.offsets, symbolRoles}, occ.counter});
if (inserted) {
return false;
}
auto savedCounter = it->second;
ENFORCE(occ.counter == savedCounter, "cannot have distinct local variable {} at same location {}",
(symbolRoles & scip::SymbolRole::Definition) ? "definitions" : "references",
core::Loc(file, occ.offsets).showRaw(gs));
return true;
}

public:
absl::Status saveDefinition(const core::GlobalState &gs, core::FileRef file, OwnedLocal occ) {
if (this->cacheOccurrence(gs, file, occ, scip::SymbolRole::Definition)) {
return absl::OkStatus();
}
return this->saveDefinitionImpl(gs, file, occ.toString(gs, file), core::Loc(file, occ.offsets));
}

// Save definition when you have a sorbet Symbol.
// Meant for methods, fields etc., but not local variables.
absl::Status saveDefinition(const core::GlobalState &gs, core::FileRef file, core::SymbolRef symRef) {
// TODO:(varun) Should we cache here too to avoid emitting duplicate definitions?
scip::Symbol symbol;
auto status = symbolForExpr(gs, symRef, symbol);
if (!status.ok()) {
Expand All @@ -297,11 +341,15 @@ class SCIPState {
}

absl::Status saveReference(const core::GlobalState &gs, core::FileRef file, OwnedLocal occ, int32_t symbol_roles) {
if (this->cacheOccurrence(gs, file, occ, symbol_roles)) {
return absl::OkStatus();
}
return this->saveReferenceImpl(gs, file, occ.toString(gs, file), occ.offsets, symbol_roles);
}

absl::Status saveReference(const core::GlobalState &gs, core::FileRef file, core::SymbolRef symRef,
core::LocOffsets occLoc, int32_t symbol_roles) {
// TODO:(varun) Should we cache here to to avoid emitting duplicate references?
absl::StatusOr<string *> valueOrStatus(this->saveSymbolString(gs, symRef, nullptr));
if (!valueOrStatus.ok()) {
return valueOrStatus.status();
Expand Down Expand Up @@ -357,7 +405,29 @@ core::SymbolRef lookupRecursive(const core::GlobalState &gs, const core::SymbolR
}

class CFGTraversal final {
UnorderedMap<const cfg::BasicBlock *, UnorderedMap<cfg::LocalRef, core::Loc>> blockLocals;
// A map from each basic block to the locals in it.
//
// The locals may be coming from the parents, or they may be defined in the
// block. Locals coming from the parents may be in the form of basic block
// arguments or they may be "directly referenced."
//
// For example, if you have code like:
//
// def f(x):
// y = 0
// if cond:
// y = x + $z
//
// Then x and $z will be passed as arguments to the basic block for the
// true branch, whereas 'y' won't be. However, 'y' is still technically
// coming from the parent basic block, otherwise we'd end up marking the
// assignment as a definition instead of a (write) reference.
//
// At the start of the traversal of a basic block, the entry for a basic
// block is populated with the locals coming from the parents. Then,
// we traverse each instruction and populate it with the locals defined
// in the block.
UnorderedMap<const cfg::BasicBlock *, UnorderedSet<cfg::LocalRef>> blockLocals;
UnorderedMap<cfg::LocalRef, uint32_t> functionLocals;

// Local variable counter that is reset for every function.
Expand All @@ -370,8 +440,8 @@ class CFGTraversal final {
: blockLocals(), functionLocals(), scipState(scipState), ctx(ctx) {}

private:
void addLocal(const cfg::BasicBlock *bb, cfg::LocalRef localRef, core::Loc loc) {
this->blockLocals[bb][localRef] = loc;
void addLocal(const cfg::BasicBlock *bb, cfg::LocalRef localRef) {
this->blockLocals[bb].insert(localRef);
this->functionLocals[localRef] = ++this->counter;
}

Expand Down Expand Up @@ -405,7 +475,7 @@ class CFGTraversal final {
if (!this->functionLocals.contains(localRef)) {
isDefinition = true; // If we're seeing this for the first time in topological order,
// The current block must have a definition for the variable.
this->addLocal(bb, localRef, this->ctx.locAt(local.loc));
this->addLocal(bb, localRef);
}
// The variable wasn't passed in as an argument, and hasn't already been recorded
// as a local in the block. So this must be a definition line.
Expand All @@ -419,7 +489,7 @@ class CFGTraversal final {
// Ill-formed code where we're trying to access a variable
// without setting it first. Emit a local as a best-effort.
// TODO(varun): Will Sorbet error out before we get here?
this->addLocal(bb, localRef, this->ctx.locAt(local.loc));
this->addLocal(bb, localRef);
}
// TODO(varun): Will Sorbet error out before we get here?
// It's possible that we have ill-formed code where the variable
Expand Down Expand Up @@ -448,42 +518,34 @@ class CFGTraversal final {
return true;
}

void addArgLocals(cfg::BasicBlock *bb, const cfg::CFG &cfg) {
this->blockLocals[bb] = {};
for (auto &bbArgs : bb->args) {
bool found = false;
for (auto parentBB : bb->backEdges) {
if (this->blockLocals[parentBB].contains(bbArgs.variable)) {
this->blockLocals[bb][bbArgs.variable] = this->blockLocals[parentBB][bbArgs.variable];
found = true;
break;
}
void copyLocalsFromParents(cfg::BasicBlock *bb, const cfg::CFG &cfg) {
UnorderedSet<cfg::LocalRef> bbLocals{};
for (auto parentBB : bb->backEdges) {
if (!this->blockLocals.contains(parentBB)) { // e.g. loops
continue;
}
if (!found) {
print_dbg("# basic block argument {} did not come from parents {}\n",
bbArgs.toString(this->ctx.state, cfg), bbvec_to_string(bb->backEdges));
auto &parentLocals = this->blockLocals[parentBB];
if (bbLocals.size() + parentLocals.size() > bbLocals.capacity()) {
bbLocals.reserve(bbLocals.size() + parentLocals.size());
}
for (auto local : parentLocals) {
bbLocals.insert(local);
}
}
ENFORCE(!this->blockLocals.contains(bb));
this->blockLocals[bb] = std::move(bbLocals);
}

public:
void traverse(const cfg::CFG &cfg) {
auto &gs = this->ctx.state;
auto method = this->ctx.owner;

auto print_map = [&cfg, &gs](const UnorderedMap<cfg::LocalRef, core::Loc> &map, cfg::BasicBlock *ptr) {
print_dbg("# blockLocals (id: {}, ptr: {}) = {}\n", ptr->id, (void *)ptr,
map_to_string(
map, [&](const auto &localref, const auto &loc) -> auto {
return fmt::format("{} {}", localref.data(cfg).toString(gs), loc.showRaw(gs));
}));
};

// I don't fully understand the doc comment for forwardsTopoSort; it seems backwards in practice.
for (auto it = cfg.forwardsTopoSort.rbegin(); it != cfg.forwardsTopoSort.rend(); ++it) {
cfg::BasicBlock *bb = *it;
print_dbg("# Looking at block id: {} ptr: {}\n", bb->id, (void *)bb);
this->addArgLocals(bb, cfg);
this->copyLocalsFromParents(bb, cfg);
for (auto &binding : bb->exprs) {
if (auto *aliasInstr = cfg::cast_instruction<cfg::Alias>(binding.value)) {
auto aliasName = aliasInstr->name;
Expand All @@ -497,7 +559,7 @@ class CFGTraversal final {
method.toString(gs));
continue;
}
this->addLocal(bb, binding.bind.variable, aliasedSym.loc(gs));
this->addLocal(bb, binding.bind.variable);
continue;
}
if (!binding.loc.exists() || binding.loc.empty()) { // TODO(varun): When can each case happen?
Expand Down Expand Up @@ -599,7 +661,6 @@ class CFGTraversal final {
}
}
}
print_map(this->blockLocals.at(bb), bb);
}
}
};
Expand Down
120 changes: 120 additions & 0 deletions test/scip/testdata/loops_and_conditionals.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# typed: true

def if_elsif_else()
x = 0
y = 0
# Basic stuff
if x == 1
y = 2
elsif x == 2
y = 3
else
y = x
end

# More complex expressiosn
z =
if if x == 0 then x+1 else x+2 end == 1
x
else
x+1
end
z = z if z != 10

return
end

def unless()
z = 0
x = 1
unless z == 9
z = 9
end

unless x == 10
x = 3
else
x = 2
end
return
end

def case(x, y)
case x
when 0
x = 3
when y
x = 2
when (3 == (x = 1))
x = 0
else
x = 1
end
return
end

def for(xs)
for e in xs
puts e
end

for f in xs
g = f+1
next if g == 0
next g+1 if g == 1
break if g == 2
break g+1 if g == 3
# NOTE: redo is unsupported (https://srb.help/3003)
# but emitting a reference here does work
redo if g == 4
end
end

def while(xs)
i = 0
while i < 10
puts xs[i]
end

j = 0
while j < 10
g = xs[j]
next if g == 0
next g+1 if g == 1
break if g == 2
break g+1 if g == 3
# NOTE: redo is unsupported (https://srb.help/3003)
# but emitting a reference here does work
redo if g == 4
end
end

def until(xs)
i = 0
until i > 10
puts xs[i]
end

j = 0
until j > 10
g = xs[j]
next if g == 0
next g+1 if g == 1
break if g == 2
break g+1 if g == 3
# NOTE: redo is unsupported (https://srb.help/3003)
# but emitting a reference here does work
redo if g == 4
end
end

def flip_flop(xs)
# NOTE: flip-flops are unsupported (https://srb.help/3003)
# Unlike redo, which somehow works, we fail to emit references
# for the conditions.
# Keep this test anyways to check that we don't crash/mess something up
for x in xs
puts x if x==2..x==8
puts x+1 if x==4...x==6
end
end
Loading