Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions apps/random_pipeline/build_pipelines.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ HL_BEAM_SIZE=${5:-1}
NUM_CORES=${6:-8}
EPOCHS=${7:-4}
LEARNING_RATE=${8:-0.001}
ADAMS2019_DIR=@adams2019_BINARY_DIR@
BINARY_DIR=@random_pipeline_BINARY_DIR@
ADAMS2019_DIR=/usr/share/inteon/build/halide_cpu/src/autoschedulers/adams2019
BINARY_DIR=/usr/share/inteon/build/halide_cpu/apps/random_pipeline
INITIAL_WEIGHTS=${9:-$ADAMS2019_DIR/baseline.weights}
PROGRAM_NAME=`basename $0 .sh`
LOGFILEBASE=${10:-${PROGRAM_NAME}.log}
Expand Down Expand Up @@ -91,7 +91,7 @@ for ((p=$((FIRST+1));p<$((FIRST+PIPELINES+1));p++)); do
-e c_header,object,schedule,python_schedule,static_library,registration,featurization \
-o $PIPELINE_DIR -p $ADAMS2019_DIR/libautoschedule_adams2019.so \
target=${HL_TARGET}-no_runtime auto_schedule=true seed=$P max_stages=$STAGES && \
@CMAKE_CXX_COMPILER@ -std=c++17 -O3 -DNDEBUG -I@Halide_BINARY_DIR@/include @Halide_SOURCE_DIR@/tools/RunGenMain.cpp \
/usr/bin/c++ -std=c++17 -O3 -DNDEBUG -I/usr/share/inteon/include /usr/share/inteon/build/halide_cpu/tools/RunGenMain.cpp \
$PIPELINE_DIR/random_pipeline.registration.cpp \
$PIPELINE_DIR/random_pipeline.a $BINARY_DIR/librandom_pipeline.runtime.a \
-o $PIPELINE_DIR/bench -DHALIDE_NO_PNG -DHALIDE_NO_JPEG -pthread -ldl" | tee -a $LOGFILE
Expand Down
6 changes: 6 additions & 0 deletions src/autoschedulers/adams2019/DefaultCostModel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,11 @@ float DefaultCostModel::backprop(const Runtime::Buffer<const float> &true_runtim
conv1_filter_update, conv1_bias_update,
dst,
loss);

std::cout << "backprop: after train_cost_model, loss() = " << loss() << "\n";

(void)result;
std::cout << "backprop: after train_cost_model, result = " << result << "\n";
internal_assert(result == 0);

bool any_nans = false;
Expand All @@ -248,6 +252,7 @@ float DefaultCostModel::backprop(const Runtime::Buffer<const float> &true_runtim
internal_assert(true_runtimes(i) > 0);
}
if (any_nans) {
std::cout << "backprop: abort() due to any_nans\n";
abort();
}

Expand All @@ -264,6 +269,7 @@ float DefaultCostModel::backprop(const Runtime::Buffer<const float> &true_runtim

internal_assert(cursor != 0);

std::cout << "backprop: Returning loss = " << loss() << "\n";
return loss();
}

Expand Down
4 changes: 3 additions & 1 deletion src/autoschedulers/adams2019/cost_model_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,8 @@ class CostModel : public Generator<CostModel<training>> {
Expr r1 = true_runtime(n) * scale;

// Invert them to get relative throughput, and compute L2 loss.
// Expr delta = pow(1.0f / max(p1, 1e-10f) - 1.0f / r1, 2);
// Testing with original cost model for autotraining weights for 48 cores
//Expr delta = pow(1.0f / max(p1, 1e-10f) - 1.0f / r1, 2);
// Instead of the term above, we will divide the delta by the 1/r1,
// emphasizing that getting smaller runtime predictions wrong would
// contribute more to the error term than getting larger predictions wrong.
Expand All @@ -457,6 +458,7 @@ class CostModel : public Generator<CostModel<training>> {
//Expr delta = exp(-0.22f*(1.0f/r1 - 1.0f/max(p1, 1e-10f))) + 0.22f*(1.0f/r1 - 1.0f/max(p1, 1e-10f)) - 1.0f;
// Expr delta = 17.0f*(exp(-0.22f*(0.5f*r1-p1)) + 0.22f*(0.5f*r1-p1) - 1.0f); // Batch 20 is very interesting with 6 points below 1.6 at 16 sample run
//Expr delta = 17.0f*(exp(-0.22f*(0.3f*r1-p1)) + 0.22f*(0.3f*r1-p1) - 1.0f) + r1*r1; // Interesting!
// Testing with original cost model for autotraining weights for 48 cores
Expr delta = 17.0f*(exp(-0.22f*(0.25f*r1-p1)) + 0.22f*(0.25f*r1-p1) - 1.0f) + r1*r1; //

// Add the regulization with a small weight.
Expand Down
98 changes: 73 additions & 25 deletions src/autoschedulers/adams2019/retrain_cost_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,10 @@ map<int, PipelineSample> load_samples(const Flags &flags) {
// autoscheduling procedure crashes and want to filter them
// out with a warning.

std::cout << "floats_read: " << floats_read << "\n";
std::cout << "num_features: " << num_features << "\n";
std::cout << "features_per_stage: " << features_per_stage << "\n";

if (floats_read == scratch.size()) {
std::cout << "Too-large sample: " << s << " " << floats_read << "\n";
continue;
Expand All @@ -204,12 +208,15 @@ map<int, PipelineSample> load_samples(const Flags &flags) {
}
const size_t num_stages = num_features / features_per_stage;

std::cout << "num_stages: " << num_stages << "\n";

const float runtime = scratch[num_features];
if (runtime > 100000) { // Don't try to predict runtime over 100s
std::cout << "Implausible runtime in ms: " << runtime << "\n";
continue;
}
// std::cout << "Runtime: " << runtime << "\n";
std::cout << "Runtime: " << runtime << " for sample file " << s << "\n";

int pipeline_id = *((int32_t *)(&scratch[num_features + 1]));
const int schedule_id = *((int32_t *)(&scratch[num_features + 2]));
Expand All @@ -220,6 +227,9 @@ map<int, PipelineSample> load_samples(const Flags &flags) {
best_path = s;
}

std::cout << "pipeline_id = " << pipeline_id << "\n";
std::cout << "schedule_id = " << schedule_id << "\n";

PipelineSample &ps = result[pipeline_id];

if (ps.pipeline_features.data() == nullptr) {
Expand All @@ -243,13 +253,18 @@ map<int, PipelineSample> load_samples(const Flags &flags) {
}

uint64_t schedule_hash = 0;

std::cout << "num_stages = " << num_stages << "\n";

for (size_t i = 0; i < num_stages; i++) {
schedule_hash =
hash_floats(schedule_hash,
&scratch[i * features_per_stage],
&scratch[i * features_per_stage + head2_w]);
}

std::cout << "schedule_hash = " << schedule_hash << "\n";

auto it = ps.schedules.find(schedule_hash);
if (it != ps.schedules.end()) {
// Keep the smallest runtime at the front
Expand All @@ -275,6 +290,8 @@ map<int, PipelineSample> load_samples(const Flags &flags) {
sample.schedule_id = schedule_id;
sample.schedule_features = Buffer<float>(head2_w, num_stages);

std::cout << "sample.schedule_id = " << sample.schedule_id << "\n";

bool ok = true;
for (size_t i = 0; i < num_stages; i++) {
for (int x = 0; x < head2_w; x++) {
Expand All @@ -300,15 +317,19 @@ map<int, PipelineSample> load_samples(const Flags &flags) {
}
ps.schedules.emplace(schedule_hash, std::move(sample));
num_unique++;
std::cout << "num_unique = " << num_unique << "\n";
}
}
num_read++;
std::cout << "num_read = " << num_read << "\n";

if (num_read % 10000 == 0) {
std::cout << "Samples loaded: " << num_read << " (" << num_unique << " unique)\n";
}
}

std::cout << "result.size() " << result.size() << "\n";

// Check the noise level
for (const auto &pipe : result) {
double variance_sum = 0;
Expand All @@ -320,6 +341,9 @@ map<int, PipelineSample> load_samples(const Flags &flags) {
abort();
}
std::cout << "Unique sample: " << leaf(p.second.filename) << " : " << p.second.runtimes[0] << "\n";

std::cout << "p.second.runtimes.size() = " << p.second.runtimes.size() << "\n";

if (p.second.runtimes.size() > 1) {
// Compute variance from samples
double mean = 0;
Expand All @@ -336,6 +360,9 @@ map<int, PipelineSample> load_samples(const Flags &flags) {
count += p.second.runtimes.size() - 1;
}
}

std::cout << "count = " << count << "\n";

if (count > 0) {
double stddev = std::sqrt(variance_sum / count);
std::cout << "Noise level: " << stddev << "\n";
Expand Down Expand Up @@ -371,6 +398,8 @@ map<int, PipelineSample> load_samples(const Flags &flags) {
copy_best_schedule(flags.best_schedule_path, ".schedule.h");
copy_best_schedule(flags.best_python_schedule_path, "_schedule.py");

std::cout << "load_samples before return: result.size() " << result.size() << "\n";

return result;
}

Expand All @@ -396,22 +425,23 @@ int main(int argc, char **argv) {
std::cout << "Iterating over " << samples.size() << " samples using seed = " << seed << "\n";
decltype(samples) validation_set;
uint64_t unique_schedules = 0;
if (samples.size() > 16) {
for (const auto &p : samples) {
unique_schedules += p.second.schedules.size();
// Whether or not a pipeline is part of the validation set
// can't be a call to rand. It must be a fixed property of a
// hash of some aspect of it. This way you don't accidentally
// do a training run where a validation set member was in the
// training set of a previous run. The id of the fastest
// schedule will do as a hash.

for (const auto &p : samples) {
unique_schedules += p.second.schedules.size();
// Whether or not a pipeline is part of the validation set
// can't be a call to rand. It must be a fixed property of a
// hash of some aspect of it. This way you don't accidentally
// do a training run where a validation set member was in the
// training set of a previous run. The id of the fastest
// schedule will do as a hash.
if (samples.size() > 16) {
if ((p.second.pipeline_hash & 7) == 0) {
validation_set.insert(p);
}
}

for (const auto &p : validation_set) {
samples.erase(p.first);
for (const auto &p : validation_set) {
samples.erase(p.first);
}
}
}

Expand Down Expand Up @@ -483,26 +513,41 @@ int main(int argc, char **argv) {
float loss = 0.0f;
if (train) {
loss = tp->backprop(runtimes, learning_rate);

// where do we go if we assert here?
assert(!std::isnan(loss));
loss_sum[model] += loss;
loss_sum_counter[model]++;

auto it = p.second.schedules.begin();
std::advance(it, first);
for (size_t j = 0; j < batch_size; j++) {
auto &sched = it->second;
float m = sched.runtimes[0] / (sched.prediction[model] + 1e-10f);
if (m > worst_miss) {
worst_miss = m;
worst_miss_pipeline_id = p.first;
worst_miss_schedule_id = it->first;
assert(!std::isinf(loss));

if (!std::isinf(loss)) {
std::cout << "after backprop, loss = " << loss << "\n";

loss_sum[model] += loss;
loss_sum_counter[model]++;

std::cout << "after backprop, loss_sum[model] = " << loss_sum[model] << "\n";

auto it = p.second.schedules.begin();
std::advance(it, first);
for (size_t j = 0; j < batch_size; j++) {
auto &sched = it->second;
float m = sched.runtimes[0] / (sched.prediction[model] + 1e-10f);
if (m > worst_miss) {
worst_miss = m;
worst_miss_pipeline_id = p.first;
worst_miss_schedule_id = it->first;
}
it++;
}
it++;
} else {
std::cout << "Inf loss, returning early\n";
return 0;
}
} else {
tp->evaluate_costs();
}

std::cout << "returned after if(train) block: loss_sum[model] = " << loss_sum[model] << "\n";

if (true) {
int good = 0, bad = 0;
for (auto &sched : p.second.schedules) {
Expand All @@ -521,6 +566,7 @@ int main(int argc, char **argv) {
if (train) {
float badness = (sched.second.runtimes[0] - ref.runtimes[0]) * (ref.prediction[model] - sched.second.prediction[model]);
badness /= (ref.runtimes[0] * ref.runtimes[0]);

if (badness > worst_inversion.badness) {
worst_inversion.pipeline_id = p.first;
worst_inversion.badness = badness;
Expand Down Expand Up @@ -551,6 +597,8 @@ int main(int argc, char **argv) {

std::cout << "Loss: ";
for (int model = 0; model < kModels; model++) {
std::cout << "\n" << model << " loss_sum[model]= " << loss_sum[model] << "\n";
std::cout << model << " loss_sum_counter[model]= " << loss_sum_counter[model] << "\n";
std::cout << loss_sum[model] / loss_sum_counter[model] << " ";
loss_sum[model] *= 0.9f;
loss_sum_counter[model] *= 0.9f;
Expand Down