InteonCo · DawnStone · Aug 4, 2022 · Aug 4, 2022
diff --git a/apps/random_pipeline/build_pipelines.sh b/apps/random_pipeline/build_pipelines.sh
@@ -8,8 +8,8 @@ HL_BEAM_SIZE=${5:-1}
 NUM_CORES=${6:-8}
 EPOCHS=${7:-4}
 LEARNING_RATE=${8:-0.001}
-ADAMS2019_DIR=@adams2019_BINARY_DIR@
-BINARY_DIR=@random_pipeline_BINARY_DIR@
+ADAMS2019_DIR=/usr/share/inteon/build/halide_cpu/src/autoschedulers/adams2019
+BINARY_DIR=/usr/share/inteon/build/halide_cpu/apps/random_pipeline
 INITIAL_WEIGHTS=${9:-$ADAMS2019_DIR/baseline.weights}
 PROGRAM_NAME=`basename $0 .sh`
 LOGFILEBASE=${10:-${PROGRAM_NAME}.log}
@@ -91,7 +91,7 @@ for ((p=$((FIRST+1));p<$((FIRST+PIPELINES+1));p++)); do
        -e c_header,object,schedule,python_schedule,static_library,registration,featurization \
        -o $PIPELINE_DIR -p $ADAMS2019_DIR/libautoschedule_adams2019.so \
        target=${HL_TARGET}-no_runtime auto_schedule=true seed=$P max_stages=$STAGES && \
-       @CMAKE_CXX_COMPILER@ -std=c++17 -O3 -DNDEBUG -I@Halide_BINARY_DIR@/include @Halide_SOURCE_DIR@/tools/RunGenMain.cpp \
+       /usr/bin/c++ -std=c++17 -O3 -DNDEBUG -I/usr/share/inteon/include /usr/share/inteon/build/halide_cpu/tools/RunGenMain.cpp \
          $PIPELINE_DIR/random_pipeline.registration.cpp \
          $PIPELINE_DIR/random_pipeline.a $BINARY_DIR/librandom_pipeline.runtime.a \
          -o $PIPELINE_DIR/bench -DHALIDE_NO_PNG -DHALIDE_NO_JPEG  -pthread -ldl" | tee -a $LOGFILE

diff --git a/src/autoschedulers/adams2019/DefaultCostModel.cpp b/src/autoschedulers/adams2019/DefaultCostModel.cpp
@@ -223,7 +223,11 @@ float DefaultCostModel::backprop(const Runtime::Buffer<const float> &true_runtim
                                   conv1_filter_update, conv1_bias_update,
                                   dst,
                                   loss);
+
+    std::cout << "backprop: after train_cost_model, loss() = " << loss() << "\n";
+
     (void)result;
+    std::cout << "backprop: after train_cost_model, result = " << result << "\n";
     internal_assert(result == 0);
 
     bool any_nans = false;
@@ -248,6 +252,7 @@ float DefaultCostModel::backprop(const Runtime::Buffer<const float> &true_runtim
         internal_assert(true_runtimes(i) > 0);
     }
     if (any_nans) {
+        std::cout << "backprop: abort() due to any_nans\n";
         abort();
     }
 
@@ -264,6 +269,7 @@ float DefaultCostModel::backprop(const Runtime::Buffer<const float> &true_runtim
 
     internal_assert(cursor != 0);
 
+    std::cout << "backprop: Returning loss = " << loss() << "\n";
     return loss();
 }
 

diff --git a/src/autoschedulers/adams2019/cost_model_generator.cpp b/src/autoschedulers/adams2019/cost_model_generator.cpp
@@ -434,7 +434,8 @@ class CostModel : public Generator<CostModel<training>> {
             Expr r1 = true_runtime(n) * scale;
 
             // Invert them to get relative throughput, and compute L2 loss.
-            // Expr delta = pow(1.0f / max(p1, 1e-10f) - 1.0f / r1, 2);
+	    // Testing with original cost model for autotraining weights for 48 cores
+            //Expr delta = pow(1.0f / max(p1, 1e-10f) - 1.0f / r1, 2);
             // Instead of the term above, we will divide the delta by the 1/r1,
             // emphasizing that getting smaller runtime predictions wrong would
             // contribute more to the error term than getting larger predictions wrong.
@@ -457,6 +458,7 @@ class CostModel : public Generator<CostModel<training>> {
             //Expr delta = exp(-0.22f*(1.0f/r1 - 1.0f/max(p1, 1e-10f))) + 0.22f*(1.0f/r1 - 1.0f/max(p1, 1e-10f)) - 1.0f;
             // Expr delta = 17.0f*(exp(-0.22f*(0.5f*r1-p1)) + 0.22f*(0.5f*r1-p1) - 1.0f); // Batch 20 is very interesting with 6 points below 1.6 at 16 sample run
             //Expr delta = 17.0f*(exp(-0.22f*(0.3f*r1-p1)) + 0.22f*(0.3f*r1-p1) - 1.0f) + r1*r1; // Interesting!
+	    // Testing with original cost model for autotraining weights for 48 cores
             Expr delta = 17.0f*(exp(-0.22f*(0.25f*r1-p1)) + 0.22f*(0.25f*r1-p1) - 1.0f) + r1*r1; // 
 
             // Add the regulization with a small weight.

diff --git a/src/autoschedulers/adams2019/retrain_cost_model.cpp b/src/autoschedulers/adams2019/retrain_cost_model.cpp
@@ -194,6 +194,10 @@ map<int, PipelineSample> load_samples(const Flags &flags) {
         // autoscheduling procedure crashes and want to filter them
         // out with a warning.
 
+        std::cout << "floats_read: " << floats_read << "\n";
+        std::cout << "num_features: " << num_features << "\n";
+        std::cout << "features_per_stage: " << features_per_stage << "\n";
+
         if (floats_read == scratch.size()) {
             std::cout << "Too-large sample: " << s << " " << floats_read << "\n";
             continue;
@@ -204,12 +208,15 @@ map<int, PipelineSample> load_samples(const Flags &flags) {
         }
         const size_t num_stages = num_features / features_per_stage;
 
+        std::cout << "num_stages: " << num_stages << "\n";
+
         const float runtime = scratch[num_features];
         if (runtime > 100000) {  // Don't try to predict runtime over 100s
             std::cout << "Implausible runtime in ms: " << runtime << "\n";
             continue;
         }
         // std::cout << "Runtime: " << runtime << "\n";
+        std::cout << "Runtime: " << runtime << " for sample file " << s << "\n";
 
         int pipeline_id = *((int32_t *)(&scratch[num_features + 1]));
         const int schedule_id = *((int32_t *)(&scratch[num_features + 2]));
@@ -220,6 +227,9 @@ map<int, PipelineSample> load_samples(const Flags &flags) {
             best_path = s;
         }
 
+	std::cout << "pipeline_id = " << pipeline_id << "\n";
+	std::cout << "schedule_id = " << schedule_id << "\n";
+
         PipelineSample &ps = result[pipeline_id];
 
         if (ps.pipeline_features.data() == nullptr) {
@@ -243,13 +253,18 @@ map<int, PipelineSample> load_samples(const Flags &flags) {
         }
 
         uint64_t schedule_hash = 0;
+
+	std::cout << "num_stages = " << num_stages << "\n";
+
         for (size_t i = 0; i < num_stages; i++) {
             schedule_hash =
                 hash_floats(schedule_hash,
                             &scratch[i * features_per_stage],
                             &scratch[i * features_per_stage + head2_w]);
         }
 
+	std::cout << "schedule_hash = " << schedule_hash << "\n";
+
         auto it = ps.schedules.find(schedule_hash);
         if (it != ps.schedules.end()) {
             // Keep the smallest runtime at the front
@@ -275,6 +290,8 @@ map<int, PipelineSample> load_samples(const Flags &flags) {
             sample.schedule_id = schedule_id;
             sample.schedule_features = Buffer<float>(head2_w, num_stages);
 
+	    std::cout << "sample.schedule_id = " << sample.schedule_id << "\n";
+
             bool ok = true;
             for (size_t i = 0; i < num_stages; i++) {
                 for (int x = 0; x < head2_w; x++) {
@@ -300,15 +317,19 @@ map<int, PipelineSample> load_samples(const Flags &flags) {
                 }
                 ps.schedules.emplace(schedule_hash, std::move(sample));
                 num_unique++;
+		std::cout << "num_unique = " << num_unique << "\n";
             }
         }
         num_read++;
+        std::cout << "num_read = " << num_read << "\n";
 
         if (num_read % 10000 == 0) {
             std::cout << "Samples loaded: " << num_read << " (" << num_unique << " unique)\n";
         }
     }
 
+    std::cout << "result.size() " << result.size() << "\n";
+
     // Check the noise level
     for (const auto &pipe : result) {
         double variance_sum = 0;
@@ -320,6 +341,9 @@ map<int, PipelineSample> load_samples(const Flags &flags) {
                 abort();
             }
             std::cout << "Unique sample: " << leaf(p.second.filename) << " : " << p.second.runtimes[0] << "\n";
+
+            std::cout << "p.second.runtimes.size() = " << p.second.runtimes.size() << "\n";
+
             if (p.second.runtimes.size() > 1) {
                 // Compute variance from samples
                 double mean = 0;
@@ -336,6 +360,9 @@ map<int, PipelineSample> load_samples(const Flags &flags) {
                 count += p.second.runtimes.size() - 1;
             }
         }
+
+        std::cout << "count = " << count << "\n";
+
         if (count > 0) {
             double stddev = std::sqrt(variance_sum / count);
             std::cout << "Noise level: " << stddev << "\n";
@@ -371,6 +398,8 @@ map<int, PipelineSample> load_samples(const Flags &flags) {
     copy_best_schedule(flags.best_schedule_path, ".schedule.h");
     copy_best_schedule(flags.best_python_schedule_path, "_schedule.py");
 
+    std::cout << "load_samples before return: result.size() " << result.size() << "\n";
+
     return result;
 }
 
@@ -396,22 +425,23 @@ int main(int argc, char **argv) {
     std::cout << "Iterating over " << samples.size() << " samples using seed = " << seed << "\n";
     decltype(samples) validation_set;
     uint64_t unique_schedules = 0;
-    if (samples.size() > 16) {
-        for (const auto &p : samples) {
-            unique_schedules += p.second.schedules.size();
-            // Whether or not a pipeline is part of the validation set
-            // can't be a call to rand. It must be a fixed property of a
-            // hash of some aspect of it.  This way you don't accidentally
-            // do a training run where a validation set member was in the
-            // training set of a previous run. The id of the fastest
-            // schedule will do as a hash.
+
+    for (const auto &p : samples) {
+        unique_schedules += p.second.schedules.size();
+        // Whether or not a pipeline is part of the validation set
+        // can't be a call to rand. It must be a fixed property of a
+        // hash of some aspect of it.  This way you don't accidentally
+        // do a training run where a validation set member was in the
+        // training set of a previous run. The id of the fastest
+        // schedule will do as a hash.
+        if (samples.size() > 16) {
             if ((p.second.pipeline_hash & 7) == 0) {
                 validation_set.insert(p);
             }
-        }
 
-        for (const auto &p : validation_set) {
-            samples.erase(p.first);
+            for (const auto &p : validation_set) {
+                samples.erase(p.first);
+            }
         }
     }
 
@@ -483,26 +513,41 @@ int main(int argc, char **argv) {
                         float loss = 0.0f;
                         if (train) {
                             loss = tp->backprop(runtimes, learning_rate);
+
+			    // where do we go if we assert here?
                             assert(!std::isnan(loss));
-                            loss_sum[model] += loss;
-                            loss_sum_counter[model]++;
-
-                            auto it = p.second.schedules.begin();
-                            std::advance(it, first);
-                            for (size_t j = 0; j < batch_size; j++) {
-                                auto &sched = it->second;
-                                float m = sched.runtimes[0] / (sched.prediction[model] + 1e-10f);
-                                if (m > worst_miss) {
-                                    worst_miss = m;
-                                    worst_miss_pipeline_id = p.first;
-                                    worst_miss_schedule_id = it->first;
+                            assert(!std::isinf(loss));
+
+			    if (!std::isinf(loss)) {
+			        std::cout << "after backprop, loss = " << loss << "\n";
+
+                                loss_sum[model] += loss;
+                                loss_sum_counter[model]++;
+
+                                std::cout << "after backprop, loss_sum[model] = " << loss_sum[model] << "\n";
+
+                                auto it = p.second.schedules.begin();
+                                std::advance(it, first);
+                                for (size_t j = 0; j < batch_size; j++) {
+                                    auto &sched = it->second;
+                                    float m = sched.runtimes[0] / (sched.prediction[model] + 1e-10f);
+                                    if (m > worst_miss) {
+                                        worst_miss = m;
+                                        worst_miss_pipeline_id = p.first;
+                                        worst_miss_schedule_id = it->first;
+                                    }
+                                    it++;
                                 }
-                                it++;
+			    } else {
+                                std::cout << "Inf loss, returning early\n";
+                                return 0;
                             }
                         } else {
                             tp->evaluate_costs();
                         }
 
+			std::cout << "returned after if(train) block: loss_sum[model] = " << loss_sum[model] << "\n";
+
                         if (true) {
                             int good = 0, bad = 0;
                             for (auto &sched : p.second.schedules) {
@@ -521,6 +566,7 @@ int main(int argc, char **argv) {
                                     if (train) {
                                         float badness = (sched.second.runtimes[0] - ref.runtimes[0]) * (ref.prediction[model] - sched.second.prediction[model]);
                                         badness /= (ref.runtimes[0] * ref.runtimes[0]);
+
                                         if (badness > worst_inversion.badness) {
                                             worst_inversion.pipeline_id = p.first;
                                             worst_inversion.badness = badness;
@@ -551,6 +597,8 @@ int main(int argc, char **argv) {
 
             std::cout << "Loss: ";
             for (int model = 0; model < kModels; model++) {
+                std::cout << "\n" << model << " loss_sum[model]= " << loss_sum[model] << "\n"; 
+                std::cout << model << " loss_sum_counter[model]= " << loss_sum_counter[model] << "\n"; 
                 std::cout << loss_sum[model] / loss_sum_counter[model] << " ";
                 loss_sum[model] *= 0.9f;
                 loss_sum_counter[model] *= 0.9f;