diff --git a/apps/random_pipeline/build_pipelines.sh b/apps/random_pipeline/build_pipelines.sh index 6173c4dfb5b3..e513044aeff0 100755 --- a/apps/random_pipeline/build_pipelines.sh +++ b/apps/random_pipeline/build_pipelines.sh @@ -8,8 +8,8 @@ HL_BEAM_SIZE=${5:-1} NUM_CORES=${6:-8} EPOCHS=${7:-4} LEARNING_RATE=${8:-0.001} -ADAMS2019_DIR=@adams2019_BINARY_DIR@ -BINARY_DIR=@random_pipeline_BINARY_DIR@ +ADAMS2019_DIR=/usr/share/inteon/build/halide_cpu/src/autoschedulers/adams2019 +BINARY_DIR=/usr/share/inteon/build/halide_cpu/apps/random_pipeline INITIAL_WEIGHTS=${9:-$ADAMS2019_DIR/baseline.weights} PROGRAM_NAME=`basename $0 .sh` LOGFILEBASE=${10:-${PROGRAM_NAME}.log} @@ -91,7 +91,7 @@ for ((p=$((FIRST+1));p<$((FIRST+PIPELINES+1));p++)); do -e c_header,object,schedule,python_schedule,static_library,registration,featurization \ -o $PIPELINE_DIR -p $ADAMS2019_DIR/libautoschedule_adams2019.so \ target=${HL_TARGET}-no_runtime auto_schedule=true seed=$P max_stages=$STAGES && \ - @CMAKE_CXX_COMPILER@ -std=c++17 -O3 -DNDEBUG -I@Halide_BINARY_DIR@/include @Halide_SOURCE_DIR@/tools/RunGenMain.cpp \ + /usr/bin/c++ -std=c++17 -O3 -DNDEBUG -I/usr/share/inteon/include /usr/share/inteon/build/halide_cpu/tools/RunGenMain.cpp \ $PIPELINE_DIR/random_pipeline.registration.cpp \ $PIPELINE_DIR/random_pipeline.a $BINARY_DIR/librandom_pipeline.runtime.a \ -o $PIPELINE_DIR/bench -DHALIDE_NO_PNG -DHALIDE_NO_JPEG -pthread -ldl" | tee -a $LOGFILE diff --git a/src/autoschedulers/adams2019/DefaultCostModel.cpp b/src/autoschedulers/adams2019/DefaultCostModel.cpp index 7d036f8a5888..d4e102b17861 100644 --- a/src/autoschedulers/adams2019/DefaultCostModel.cpp +++ b/src/autoschedulers/adams2019/DefaultCostModel.cpp @@ -223,7 +223,11 @@ float DefaultCostModel::backprop(const Runtime::Buffer &true_runtim conv1_filter_update, conv1_bias_update, dst, loss); + + std::cout << "backprop: after train_cost_model, loss() = " << loss() << "\n"; + (void)result; + std::cout << "backprop: after train_cost_model, result = " << result << "\n"; internal_assert(result == 0); bool any_nans = false; @@ -248,6 +252,7 @@ float DefaultCostModel::backprop(const Runtime::Buffer &true_runtim internal_assert(true_runtimes(i) > 0); } if (any_nans) { + std::cout << "backprop: abort() due to any_nans\n"; abort(); } @@ -264,6 +269,7 @@ float DefaultCostModel::backprop(const Runtime::Buffer &true_runtim internal_assert(cursor != 0); + std::cout << "backprop: Returning loss = " << loss() << "\n"; return loss(); } diff --git a/src/autoschedulers/adams2019/cost_model_generator.cpp b/src/autoschedulers/adams2019/cost_model_generator.cpp index 65d9dff386ec..55e54a459054 100644 --- a/src/autoschedulers/adams2019/cost_model_generator.cpp +++ b/src/autoschedulers/adams2019/cost_model_generator.cpp @@ -434,7 +434,8 @@ class CostModel : public Generator> { Expr r1 = true_runtime(n) * scale; // Invert them to get relative throughput, and compute L2 loss. - // Expr delta = pow(1.0f / max(p1, 1e-10f) - 1.0f / r1, 2); + // Testing with original cost model for autotraining weights for 48 cores + //Expr delta = pow(1.0f / max(p1, 1e-10f) - 1.0f / r1, 2); // Instead of the term above, we will divide the delta by the 1/r1, // emphasizing that getting smaller runtime predictions wrong would // contribute more to the error term than getting larger predictions wrong. @@ -457,6 +458,7 @@ class CostModel : public Generator> { //Expr delta = exp(-0.22f*(1.0f/r1 - 1.0f/max(p1, 1e-10f))) + 0.22f*(1.0f/r1 - 1.0f/max(p1, 1e-10f)) - 1.0f; // Expr delta = 17.0f*(exp(-0.22f*(0.5f*r1-p1)) + 0.22f*(0.5f*r1-p1) - 1.0f); // Batch 20 is very interesting with 6 points below 1.6 at 16 sample run //Expr delta = 17.0f*(exp(-0.22f*(0.3f*r1-p1)) + 0.22f*(0.3f*r1-p1) - 1.0f) + r1*r1; // Interesting! + // Testing with original cost model for autotraining weights for 48 cores Expr delta = 17.0f*(exp(-0.22f*(0.25f*r1-p1)) + 0.22f*(0.25f*r1-p1) - 1.0f) + r1*r1; // // Add the regulization with a small weight. diff --git a/src/autoschedulers/adams2019/retrain_cost_model.cpp b/src/autoschedulers/adams2019/retrain_cost_model.cpp index d62bab16f1ae..9dea11f48417 100644 --- a/src/autoschedulers/adams2019/retrain_cost_model.cpp +++ b/src/autoschedulers/adams2019/retrain_cost_model.cpp @@ -194,6 +194,10 @@ map load_samples(const Flags &flags) { // autoscheduling procedure crashes and want to filter them // out with a warning. + std::cout << "floats_read: " << floats_read << "\n"; + std::cout << "num_features: " << num_features << "\n"; + std::cout << "features_per_stage: " << features_per_stage << "\n"; + if (floats_read == scratch.size()) { std::cout << "Too-large sample: " << s << " " << floats_read << "\n"; continue; @@ -204,12 +208,15 @@ map load_samples(const Flags &flags) { } const size_t num_stages = num_features / features_per_stage; + std::cout << "num_stages: " << num_stages << "\n"; + const float runtime = scratch[num_features]; if (runtime > 100000) { // Don't try to predict runtime over 100s std::cout << "Implausible runtime in ms: " << runtime << "\n"; continue; } // std::cout << "Runtime: " << runtime << "\n"; + std::cout << "Runtime: " << runtime << " for sample file " << s << "\n"; int pipeline_id = *((int32_t *)(&scratch[num_features + 1])); const int schedule_id = *((int32_t *)(&scratch[num_features + 2])); @@ -220,6 +227,9 @@ map load_samples(const Flags &flags) { best_path = s; } + std::cout << "pipeline_id = " << pipeline_id << "\n"; + std::cout << "schedule_id = " << schedule_id << "\n"; + PipelineSample &ps = result[pipeline_id]; if (ps.pipeline_features.data() == nullptr) { @@ -243,6 +253,9 @@ map load_samples(const Flags &flags) { } uint64_t schedule_hash = 0; + + std::cout << "num_stages = " << num_stages << "\n"; + for (size_t i = 0; i < num_stages; i++) { schedule_hash = hash_floats(schedule_hash, @@ -250,6 +263,8 @@ map load_samples(const Flags &flags) { &scratch[i * features_per_stage + head2_w]); } + std::cout << "schedule_hash = " << schedule_hash << "\n"; + auto it = ps.schedules.find(schedule_hash); if (it != ps.schedules.end()) { // Keep the smallest runtime at the front @@ -275,6 +290,8 @@ map load_samples(const Flags &flags) { sample.schedule_id = schedule_id; sample.schedule_features = Buffer(head2_w, num_stages); + std::cout << "sample.schedule_id = " << sample.schedule_id << "\n"; + bool ok = true; for (size_t i = 0; i < num_stages; i++) { for (int x = 0; x < head2_w; x++) { @@ -300,15 +317,19 @@ map load_samples(const Flags &flags) { } ps.schedules.emplace(schedule_hash, std::move(sample)); num_unique++; + std::cout << "num_unique = " << num_unique << "\n"; } } num_read++; + std::cout << "num_read = " << num_read << "\n"; if (num_read % 10000 == 0) { std::cout << "Samples loaded: " << num_read << " (" << num_unique << " unique)\n"; } } + std::cout << "result.size() " << result.size() << "\n"; + // Check the noise level for (const auto &pipe : result) { double variance_sum = 0; @@ -320,6 +341,9 @@ map load_samples(const Flags &flags) { abort(); } std::cout << "Unique sample: " << leaf(p.second.filename) << " : " << p.second.runtimes[0] << "\n"; + + std::cout << "p.second.runtimes.size() = " << p.second.runtimes.size() << "\n"; + if (p.second.runtimes.size() > 1) { // Compute variance from samples double mean = 0; @@ -336,6 +360,9 @@ map load_samples(const Flags &flags) { count += p.second.runtimes.size() - 1; } } + + std::cout << "count = " << count << "\n"; + if (count > 0) { double stddev = std::sqrt(variance_sum / count); std::cout << "Noise level: " << stddev << "\n"; @@ -371,6 +398,8 @@ map load_samples(const Flags &flags) { copy_best_schedule(flags.best_schedule_path, ".schedule.h"); copy_best_schedule(flags.best_python_schedule_path, "_schedule.py"); + std::cout << "load_samples before return: result.size() " << result.size() << "\n"; + return result; } @@ -396,22 +425,23 @@ int main(int argc, char **argv) { std::cout << "Iterating over " << samples.size() << " samples using seed = " << seed << "\n"; decltype(samples) validation_set; uint64_t unique_schedules = 0; - if (samples.size() > 16) { - for (const auto &p : samples) { - unique_schedules += p.second.schedules.size(); - // Whether or not a pipeline is part of the validation set - // can't be a call to rand. It must be a fixed property of a - // hash of some aspect of it. This way you don't accidentally - // do a training run where a validation set member was in the - // training set of a previous run. The id of the fastest - // schedule will do as a hash. + + for (const auto &p : samples) { + unique_schedules += p.second.schedules.size(); + // Whether or not a pipeline is part of the validation set + // can't be a call to rand. It must be a fixed property of a + // hash of some aspect of it. This way you don't accidentally + // do a training run where a validation set member was in the + // training set of a previous run. The id of the fastest + // schedule will do as a hash. + if (samples.size() > 16) { if ((p.second.pipeline_hash & 7) == 0) { validation_set.insert(p); } - } - for (const auto &p : validation_set) { - samples.erase(p.first); + for (const auto &p : validation_set) { + samples.erase(p.first); + } } } @@ -483,26 +513,41 @@ int main(int argc, char **argv) { float loss = 0.0f; if (train) { loss = tp->backprop(runtimes, learning_rate); + + // where do we go if we assert here? assert(!std::isnan(loss)); - loss_sum[model] += loss; - loss_sum_counter[model]++; - - auto it = p.second.schedules.begin(); - std::advance(it, first); - for (size_t j = 0; j < batch_size; j++) { - auto &sched = it->second; - float m = sched.runtimes[0] / (sched.prediction[model] + 1e-10f); - if (m > worst_miss) { - worst_miss = m; - worst_miss_pipeline_id = p.first; - worst_miss_schedule_id = it->first; + assert(!std::isinf(loss)); + + if (!std::isinf(loss)) { + std::cout << "after backprop, loss = " << loss << "\n"; + + loss_sum[model] += loss; + loss_sum_counter[model]++; + + std::cout << "after backprop, loss_sum[model] = " << loss_sum[model] << "\n"; + + auto it = p.second.schedules.begin(); + std::advance(it, first); + for (size_t j = 0; j < batch_size; j++) { + auto &sched = it->second; + float m = sched.runtimes[0] / (sched.prediction[model] + 1e-10f); + if (m > worst_miss) { + worst_miss = m; + worst_miss_pipeline_id = p.first; + worst_miss_schedule_id = it->first; + } + it++; } - it++; + } else { + std::cout << "Inf loss, returning early\n"; + return 0; } } else { tp->evaluate_costs(); } + std::cout << "returned after if(train) block: loss_sum[model] = " << loss_sum[model] << "\n"; + if (true) { int good = 0, bad = 0; for (auto &sched : p.second.schedules) { @@ -521,6 +566,7 @@ int main(int argc, char **argv) { if (train) { float badness = (sched.second.runtimes[0] - ref.runtimes[0]) * (ref.prediction[model] - sched.second.prediction[model]); badness /= (ref.runtimes[0] * ref.runtimes[0]); + if (badness > worst_inversion.badness) { worst_inversion.pipeline_id = p.first; worst_inversion.badness = badness; @@ -551,6 +597,8 @@ int main(int argc, char **argv) { std::cout << "Loss: "; for (int model = 0; model < kModels; model++) { + std::cout << "\n" << model << " loss_sum[model]= " << loss_sum[model] << "\n"; + std::cout << model << " loss_sum_counter[model]= " << loss_sum_counter[model] << "\n"; std::cout << loss_sum[model] / loss_sum_counter[model] << " "; loss_sum[model] *= 0.9f; loss_sum_counter[model] *= 0.9f;