diff --git a/.github/workflows/label-validation.yml b/.github/workflows/label-validation.yml new file mode 100644 index 000000000..d4f3c7194 --- /dev/null +++ b/.github/workflows/label-validation.yml @@ -0,0 +1,143 @@ +name: PR Label Validation +run-name: "Validate PR #${{ github.event.pull_request.number }}" + +concurrency: + group: "PR#${{ github.event.pull_request.number }}" + cancel-in-progress: true + +on: + pull_request: + types: [labeled, unlabeled, synchronize] + +jobs: + get-jobs: + runs-on: ubuntu-latest + outputs: + search-space-config: ${{ steps.get-jobs.outputs.search-space-config }} + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - id: get-jobs + shell: python + run: | + import json + import subprocess + import re + import os + + # Get matching labels + labels = json.loads(r'''${{ toJson(github.event.pull_request.labels) }}''') + pattern = r'^([^_]+)_([^_]+)$' + + matching = [] + for label in labels: + match = re.match(pattern, label['name']) + if match: + runner_type = match.group(1) + model_prefix = match.group(2) + + matching.append({'runner-type': runner_type, 'model-prefix': model_prefix}) + print(f"Matched label: {label['name']}") + + if not matching: + print("No matching labels found") + with open(os.environ['GITHUB_OUTPUT'], 'a') as f: + f.write('search-space-config=[]\n') + exit(0) + + # Generate configs for standard labels + all_configs = [] + if matching: + subprocess.run(['pip', 'install', 'pydantic'], check=True) + + for label in matching: + result = subprocess.run([ + 'python3', f"{os.environ['GITHUB_WORKSPACE']}/utils/matrix-logic/generate_sweep_configs.py", + 'full-sweep', + '--runner-type', label['runner-type'], + '--model-prefix', label['model-prefix'], + '--seq-lens', '1k1k', + '--test-mode', + '--config-files', + f"{os.environ['GITHUB_WORKSPACE']}/.github/configs/nvidia-master.yaml", + f"{os.environ['GITHUB_WORKSPACE']}/.github/configs/amd-master.yaml", + '--runner-config', f"{os.environ['GITHUB_WORKSPACE']}/.github/configs/runners.yaml" + ], capture_output=True, text=True) + + if result.returncode != 0: + print(f"Error generating configs:") + print(f"STDOUT: {result.stdout}") + print(f"STDERR: {result.stderr}") + exit(1) + + all_configs.extend(json.loads(result.stdout)) + + print(f"Total standard configs: {len(all_configs)}") + + with open(os.environ['GITHUB_OUTPUT'], 'a') as f: + f.write(f'search-space-config={json.dumps(all_configs)}\n') + + validate: + needs: get-jobs + if: ${{ needs.get-jobs.outputs.search-space-config != '[]' }} + uses: ./.github/workflows/benchmark-tmpl.yml + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.get-jobs.outputs.search-space-config) }} + secrets: inherit + name: validate ${{ matrix.config.runner }} + with: + exp-name: ${{ matrix.config.exp-name }} + isl: ${{ matrix.config.isl }} + osl: ${{ matrix.config.osl }} + max-model-len: ${{ matrix.config.max-model-len }} + runner: ${{ matrix.config.runner }} + image: ${{ matrix.config.image }} + model: ${{ matrix.config.model }} + framework: ${{ matrix.config.framework }} + precision: ${{ matrix.config.precision }} + tp: ${{ matrix.config.tp }} + ep: ${{ matrix.config.ep }} + dp-attn: ${{ matrix.config.dp-attn }} + conc: ${{ matrix.config.conc }} + + collect-results: + needs: validate + if: ${{ always() }} + uses: ./.github/workflows/collect-results.yml + secrets: inherit + + calc-success-rate: + needs: collect-results + if: ${{ always() }} + runs-on: ubuntu-latest + + env: + RESULTS_DIR: "results/" + STATS_FILENAME: "run_stats" + GITHUB_TOKEN: ${{ secrets.REPO_PAT }} + + steps: + - uses: actions/checkout@v3 + with: + token: ${{ secrets.REPO_PAT }} + fetch-depth: 0 + + - name: Download results artifacts + uses: actions/download-artifact@v4 + with: + path: ${{ env.RESULTS_DIR }} + pattern: results_* + + - name: Install python dependencies + run: pip install PyGithub + + - name: Calculate success rate + run: python3 utils/calc_success_rate.py $STATS_FILENAME + + - uses: actions/upload-artifact@v4 + with: + name: "run-stats" + path: ${{ env.STATS_FILENAME }}.json