diff --git a/.github/workflows/mkdocs-deploy.yml b/.github/workflows/mkdocs-deploy.yml deleted file mode 100644 index f17ea8d..0000000 --- a/.github/workflows/mkdocs-deploy.yml +++ /dev/null @@ -1,25 +0,0 @@ -name: mkdocs-deploy -on: - push: - branches: - - docs - -permissions: - contents: write -jobs: - deploy: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Configure Git Credentials - run: | - git config --global user.name 'GitHub Actions' - git config --global user.email 'actions@github.com' - - name: Set up Python 3.11 - uses: actions/setup-python@v5 - with: - python-version: "3.11" - - name: Install Dependencies - run: pip install mkdocs-material[imaging] mkdocs-autorefs mkdocs-get-deps mkdocs-material-extensions mkdocstrings mkdocstrings-python mkdocs-git-revision-date-localized-plugin - - name: Deploy docs - run: mkdocs gh-deploy --force --verbose diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml deleted file mode 100644 index b2e9c30..0000000 --- a/.github/workflows/python-publish.yml +++ /dev/null @@ -1,52 +0,0 @@ -# This workflow will upload a Python Package to PyPi when a Release is created -name: Publish Python Package - -on: - release: - types: [published] - -permissions: - contents: read - -env: - PYPI_USERNAME: __token__ - PYPI_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} - -jobs: - publish: - name: Publish to PyPi - runs-on: ubuntu-latest - - steps: - - name: Checkout the code - uses: actions/checkout@v4 - - - name: Set up Python 3.11 - uses: actions/setup-python@v5 - with: - python-version: "3.11" - - - name: Install Poetry - run: | - curl -sSL https://install.python-poetry.org | python - -y - echo "$HOME/.local/bin" >> $GITHUB_PATH - - - name: Install Python package dependencies - run: | - poetry config virtualenvs.create false - poetry install --sync --no-interaction - - - name: Inject the latest Code Analyzer JAR - run: | - CODE_ANALYZER_URL=$(curl -s https://api.github.com/repos/IBM/codenet-minerva-code-analyzer/releases/latest | jq -r '.assets[] | .browser_download_url') - echo "Downloading: " $CODE_ANALYZER_URL - wget -q $CODE_ANALYZER_URL - echo "Moving codeanalyzer jar to:" ${{ github.workspace }}/cldk/analysis/java/codeanalyzer/jar/ - mv codeanalyzer-*.jar ${{ github.workspace }}/cldk/analysis/java/codeanalyzer/jar/ - - - name: Build package - run: poetry build - - - name: Publish package distributions to PyPI - run: poetry publish --username $PYPI_USERNAME --password $PYPI_PASSWORD - \ No newline at end of file diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 377b062..cf7c58b 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -12,46 +12,55 @@ jobs: release: runs-on: ubuntu-latest - env: - JAVA_HOME: ${{ github.workspace }}/graalvm-ce-java11-22.3.3 - steps: - name: Check out code uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.11' - - name: Set up JDK 11 from GraalVM - run: | - echo "${{ env.JAVA_HOME }}/bin" >> $GITHUB_PATH - wget https://github.com/graalvm/graalvm-ce-builds/releases/download/vm-22.3.3/graalvm-ce-java11-linux-amd64-22.3.3.tar.gz - tar -xvzf graalvm-ce-java11-linux-amd64-22.3.3.tar.gz - ${{ env.JAVA_HOME }}/bin/gu install native-image + - name: Set up GraalVM CE Java 11 + uses: graalvm/setup-graalvm@v1 + with: + java-version: '11' + distribution: 'graalvm-community' + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Install jq + run: sudo apt-get update && sudo apt-get install -y jq - name: Install Poetry run: | curl -sSL https://install.python-poetry.org | python3 - echo "${HOME}/.local/bin" >> $GITHUB_PATH + export PATH="${HOME}/.local/bin:$PATH" - - name: Configure Poetry - run: poetry config virtualenvs.in-project true - - - name: Install Dependencies - run: poetry install --no-root + - name: Install Python package dependencies + run: | + poetry config virtualenvs.create false + poetry install --sync --no-interaction - name: Run Tests - id: build - continue-on-error: true # Allow workflow continuation on failure + id: test + continue-on-error: true run: poetry run make test - name: Delete tag on failure - if: steps.build.outcome != 'success' + if: steps.test.conclusion == 'failure' run: | + echo "Tests failed. Deleting tag ${GITHUB_REF#refs/tags/}..." git push --delete origin ${GITHUB_REF#refs/tags/} - exit 1 # Fail the workflow + exit 1 + + - name: Inject the latest Code Analyzer JAR + run: | + CODE_ANALYZER_URL=$(curl -s https://api.github.com/repos/codellm-devkit/codeanalyzer-java/releases/latest | jq -r '.assets[] | select(.name | endswith(".jar")) | .browser_download_url') + echo "Downloading: $CODE_ANALYZER_URL" + wget -q "$CODE_ANALYZER_URL" + mkdir -p ${{ github.workspace }}/cldk/analysis/java/codeanalyzer/jar/ + mv codeanalyzer-*.jar ${{ github.workspace }}/cldk/analysis/java/codeanalyzer/jar/ - name: Build Package run: poetry build @@ -72,3 +81,6 @@ jobs: body: ${{ steps.gen_changelog.outputs.changelog }} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Publish package distributions to PyPI + run: poetry publish --username __token__ --password ${{ secrets.PYPI_API_TOKEN }} diff --git a/.github/workflows/release_config.json b/.github/workflows/release_config.json index f0d4b5b..200120c 100644 --- a/.github/workflows/release_config.json +++ b/.github/workflows/release_config.json @@ -1,36 +1,62 @@ { "categories": [ + { + "title": "## ✨ Release", + "labels": [ + "release" + ] + }, { "title": "## 🚀 Features", - "labels": ["kind/feature", "enhancement"] + "labels": [ + "kind/feature", + "enhancement" + ] }, { "title": "## 🐛 Fixes", - "labels": ["fix", "bug"] + "labels": [ + "fix", + "bug" + ] }, { "title": "## ♻️ Refactoring", - "labels": ["refactoring"] + "labels": [ + "refactoring" + ] }, { "title": "## ⚡️ Performance Improvements", - "labels": ["performance"] + "labels": [ + "performance" + ] }, { "title": "## \uD83D\uDCDA Documentation", - "labels": ["documentation", "doc"] + "labels": [ + "documentation", + "doc" + ] }, { "title": "## \uD83D\uDEA6 Tests", - "labels": ["test"] + "labels": [ + "test" + ] }, { "title": "## \uD83D\uDEE0 Other Updates", - "labels": ["other", "kind/dependency-change"] + "labels": [ + "other", + "kind/dependency-change" + ] }, { "title": "## 🚨 Breaking Changes", - "labels": ["breaking"] + "labels": [ + "breaking" + ] } ], "ignore_labels": [ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..21c4bd5 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,66 @@ +# Contributing to Codellm-Devkit + +Thank you for your interest in contributing to the Codellm-Devkit (CLDK)! Here we provide guidelines for contributing to any repository in the codellm-devkit organization. + +## How to Contribute + +### Issues and Discussions +- For bugs and actionable items, please prefer creating an issue in the relevant repository +- For open-ended or design discussions _specifically related to the specification_, use our [specification discussions](https://github.com/codellm-devkit/specification/discussions) +- For other general discussions that are not suitable as issues, use our [organization discussions](https://github.com/orgs/codellm-devkit/discussions) + +In all cases, please check for duplicates before creating new issues or discussions! + +### Pull Requests +We welcome PRs across all our repositories! When submitting: +- Fork the repository +- Follow existing code style +- Include tests where applicable +- Update documentation as needed +- Link related issues + +## Development Guidelines + +### Code Quality +- Follow the repository's established patterns +- Include appropriate documentation +- Add tests for new functionality +- Handle errors appropriately + +### Documentation +- Keep READMEs current +- Document configuration options +- Provide clear examples +- Include setup instructions + +### Security +- Follow security best practices +- Implement proper input validation +- Document security considerations + +## Getting Started + +1. Fork the repository +2. Clone your fork: + ```bash + git clone https://github.com/your-username/repository-name.git + ``` +3. Create a feature branch: + ```bash + git checkout -b my-feature + ``` +4. Make your changes and commit: + ```bash + git commit -m "Description of changes" + ``` +5. Push and create a Pull Request + +## Code of Conduct + +Please note that this project is released with a [Code of Conduct](CODE_OF_CONDUCT.md). By participating in this project you agree to abide by its terms. + +## License + +By contributing, you agree that your contributions will be licensed under the Apache 2.0 License. + +Thank you for contributing to Codellm-Devkit! \ No newline at end of file diff --git a/README.md b/README.md index 383c38a..dc40a55 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ - - - Logo + + + Logo

@@ -54,257 +54,195 @@ Codellm-Devkit is an ongoing project, developed at IBM Research. For any questions, feedback, or suggestions, please contact the authors: -| Name | Email | -| ---- | ----- | -| Rahul Krishna | [i.m.ralk@gmail.com](mailto:imralk+oss@gmail.com) | -| Rangeet Pan | [rangeet.pan@ibm.com](mailto:rangeet.pan@gmail.com) | -| Saurabh Sihna | [sinhas@us.ibm.com](mailto:sinhas@us.ibm.com) | +| Name | Email | +| ------------- | --------------------------------------------------- | +| Rahul Krishna | [i.m.ralk@gmail.com](mailto:imralk+oss@gmail.com) | +| Rangeet Pan | [rangeet.pan@ibm.com](mailto:rangeet.pan@gmail.com) | +| Saurabh Sihna | [sinhas@us.ibm.com](mailto:sinhas@us.ibm.com) | ## Table of Contents - [Contact](#contact) - [Table of Contents](#table-of-contents) +- [Quick Start](#quick-start) - [Architectural and Design Overview](#architectural-and-design-overview) -- [Quick Start: Example Walkthrough](#quick-start-example-walkthrough) - - [Prerequisites](#prerequisites) - - [Step 1: Set up an Ollama server](#step-1--set-up-an-ollama-server) - - [Pull the latest version of Granite 8b instruct model from ollama](#pull-the-latest-version-of-granite-8b-instruct-model-from-ollama) - - [Step 2: Install CLDK](#step-2--install-cldk) - - [Step 3: Build a code summarization pipeline](#step-3--build-a-code-summarization-pipeline) + - [1. **Data Models**](#1-data-models) + - [2. **Analysis Backends**](#2-analysis-backends) + - [Java](#java) + - [Python](#python) + - [C](#c) + - [3. **Utilities and Extensions**](#3-utilities-and-extensions) +- [Contributing](#contributing) - [Publication (papers and blogs related to CLDK)](#publication-papers-and-blogs-related-to-cldk) -## Architectural and Design Overview - -Below is a very high-level overview of the architectural of CLDK: - - -```mermaid -graph TD -User <--> A[CLDK] - A --> 15[Retrieval ‡] - A --> 16[Prompting ‡] - A[CLDK] <--> B[Languages] - B --> C[Java, Python, Go ‡, C ‡, JavaScript ‡, TypeScript ‡, Rust ‡] - C --> D[Data Models] - D --> 13{Pydantic} - 13 --> 7 - C --> 7{backends} - 7 <--> 9[WALA] - 9 <--> 14[Analysis] - 7 <--> 10[Tree-sitter] - 10 <--> 14[Analysis] - 7 <--> 11[LLVM ‡] - 11 <--> 14[Analysis] - 7 <--> 12[CodeQL ‡] - 12 <--> 14[Analysis] - +## Quick Start -X[‡ Yet to be implemented] -``` +In this section, we will walk through a simple example to demonstrate how to get started with CLDK. -The user interacts by invoking the CLDK API. The CLDK API is responsible for handling the user requests and delegating them to the appropriate language-specific modules. +1. Install the CLDK package using pip: + + ```bash + pip install cldk + ``` -Each language comprises of two key components: data models and backends. -1. **Data Models:** These are high level abstractions that represent the various language constructs and componentes in a structured format using pydantic. This confers a high degree of flexibility and extensibility to the models as well as allowing for easy accees of various data components via a simple dot notation. In addition, the data models are designed to be easily serializable and deserializable, making it easy to store and retrieve data from various sources. +2. To use CLDK, just import the `CLDK` class from the `cldk` module: + + ```python + from cldk import CLDK + ``` -2. **Analysis Backends:** These are the components that are responsible for interfacing with the various program analysis tools. The core backends are Treesitter, Javaparse, WALA, LLVM, and CodeQL. The backends are responsible for handling the user requests and delegating them to the appropriate analysis tools. The analysis tools perfrom the requisite analysis and return the results to the user. The user merely calls one of several high-level API functions such as `get_method_body`, `get_method_signature`, `get_call_graph`, etc. and the backend takes care of the rest. +3. Next, to select a language for analysis, create an instance of the `CLDK` class with the desired language: - Some langugages may have multiple backends. For example, Java has WALA, Javaparser, Treesitter, and CodeQL backends. The user has freedom to choose the backend that best suits their needs. + ```python + cldk = CLDK(language="java") # For Java analysis + ``` -We are currently working on implementing the retrieval and prompting components. The retrieval component will be responsible for retrieving the relevant code snippets from the codebase for RAG usecases. The prompting component will be responsible for generating the prompts for the CodeLLMs using popular prompting frameworks such as `PDL`, `Guidance`, or `LMQL`. +4. Create an analysis object over the Java application by providing the path to the project: -## Quick Start: Example Walkthrough + ```python + analysis = cldk.analysis(project_path="/path/to/your/java/project") + ``` + This will initialize the analysis pipeline for the specified project. The analysis engine, in the backend, will parse the java project and build a symbol table representing the program structure and return the artifact to CLDK which will map it to the CLDK data schema (`cldk/models/java/models.py`). -In this section, we will walk through a simple example to demonstrate how to use CLDK. We will: + Depending on the size of the project, this step may take some time as it involves parsing, building, and statically analyzing the codebase. -* Set up a local ollama server to interact with CodeLLMs -* Build a simple code summarization pipeline for a Java and a Python application. +5. Once the analysis is complete, you can call the various methods provided by the `analysis` object to interact with the analyzed codebase. For example, you can retrieve method bodies, signatures, and call graphs. -### Prerequisites + ```python + # Iterate over all the files in the project + from CLDK import cldk -Before we begin, make sure you have the following prerequisites installed: + analysis: JavaAnalysis = CLDK(language="java").analysis(project_path="/path/to/your/java/project") + + all_files = [file_path for file_path, class_file in analysis.get_symbol_table().items()] - * Python 3.11 or later - * Ollama v0.3.4 or later + # Process each file + for file_path in all_files: + # Additional processing can be done here + pass + ``` -If you are using [Visual Studio Code](https://code.visualstudio.com) with the [Dev Containers](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) extension along with [Docker Desktop](https://www.docker.com/products/docker-desktop) or [Rancher Desktop](https://w3.ibm.com/w3publisher/docker-desktop/rancher-desktop), this project contains a Dev Container environment for you to develop in. + Likewise, you can also retrieve method bodies. -Use the following commands: + ```python + from cldk import CLDK -```bash -git clone https://github.com/codellm-devkit/python-sdk.git -cd python-dsk -code . -``` + analysis: JavaAnalysis = CLDK(language="java").analysis(project_path="/path/to/your/java/project") + for class_file in analysis.get_symbol_table().values(): + for type_name, type_declaration in class_file.type_declarations.items(): + for method in type_declaration.callable_declarations.values(): + method_body = analysis.get_method_body(method.declaration) + print(f"Method: {method.declaration}\nBody: {method_body}\n") + ``` -When Visual Studio Code starts, select the option to **Reopen in Container** and a development environment with Python, Java, C, and Rust will be available to you. See [Developing inside a Container](https://code.visualstudio.com/docs/devcontainers/containers) for more detailed instructions. +## Architectural and Design Overview -### Step 1: Set up an Ollama server +Below is a very high-level overview of the architectural of CLDK: -If don't already have ollama, please download and install it from here: [Ollama](https://ollama.com/download). -Once you have ollama, start the server and make sure it is running. +```mermaid +graph TD +User <--> A[CLDK] -If you're on MacOS, Linux, or WSL, you can check to make sure the server is running by running the following command: + A --> A1[cldk.analysis] + + A1 --> A2[cldk.analysis.java] + A2 --> A3[codeanalyzer → WALA] + A3 --> JA[Analysis] + + A1 --> A4[cldk.analysis.c] + A4 --> A5[clang] + A5 --> CA[Analysis] + + A1 --> A6[cldk.analysis.python] + A6 --> A7[treesitter_python] + A7 --> PA[Analysis] + + A1 --> A8[cldk.analysis.commons] + A8 --> LSP[LSP] + A8 --> TS[treesitter base] + A8 --> TU[treesitter utils] + + A --> M[cldk.models] + M --> MJ[Java models] + M --> MP[Python models] + M --> MC[C models] + M --> MT[treesitter models] + + A --> U[cldk.utils] + U --> UX[exceptions] + U --> UL[logging] + U --> US[sanitization] + US --> USJ[java sanitization] -```bash -sudo systemctl status ollama ``` -You should see an output similar to the following: - -```bash -➜ sudo systemctl status ollama -● ollama.service - Ollama Service - Loaded: loaded (/etc/systemd/system/ollama.service; enabled; preset: enabled) - Active: active (running) since Sat 2024-08-10 20:39:56 EDT; 17s ago - Main PID: 23069 (ollama) - Tasks: 19 (limit: 76802) - Memory: 1.2G (peak: 1.2G) - CPU: 6.745s - CGroup: /system.slice/ollama.service - └─23069 /usr/local/bin/ollama serve -``` +The user interacts with the CLDK API via the top-level `CLDK` interface exposed in `core.py`. This interface is responsible for configuring the analysis session, initializing language-specific pipelines, and exposing a high-level, language-agnostic API for interacting with program structure and semantics. -If not, you may have to start the server manually. You can do this by running the following command: +CLDK is currently implemented with full support for **Java**, **Python**, and **C**. Each language module is structured around two core components: **data models** and **analysis backends**. -```bash -sudo systemctl start ollama -``` -#### Pull the latest version of Granite 8b instruct model from ollama +### 1. **Data Models** -To pull the latest version of the Granite 8b instruct model from ollama, run the following command: +Each supported language has its own set of Pydantic-based data models, located in the `cldk.models` module (e.g., `cldk.models.java`, `cldk.models.python`, `cldk.models.c`). These models provide: -```bash -ollama pull granite-code:8b-instruct -``` +- **Structured representations** of language elements such as classes, methods, annotations, fields, and statements. +- **Typed access** using dot notation (e.g., `method.return_type` or `klass.methods`), promoting developer productivity. +- **Serialization support** to and from JSON and other formats, enabling easy storage, inspection, and exchange of analysis results. +- **Consistency** across languages via shared modeling conventions and base abstractions, including a common treesitter schema. -Check to make sure the model was successfully pulled by running the following command: -```bash -ollama run granite-code:8b-instruct 'Write a function to print hello world in python' -``` -The output should be similar to the following: +### 2. **Analysis Backends** -``` -➜ ollama run granite-code:8b-instruct 'Write a function to print hello world in python' +Each language has a dedicated analysis backend implemented under `cldk.analysis.`, responsible for coordinating concrete analysis steps using language-specific tooling: -def say_hello(): - print("Hello World!") -``` +#### Java +- **Backend:** `cldk.analysis.java` +- **Tools:** JavaParser, WALA (via CodeAnalyzer JAR) +- **Capabilities:** Bytecode-level call graphs, symbol resolution, method declarations, type hierarchies -### Step 2: Install CLDK +#### Python +- **Backend:** `cldk.analysis.python` +- **Tools:** Tree-sitter +- **Capabilities:** Lightweight structural parsing, method/function boundaries, control/data flow approximation -You may install the latest version of CLDK from [PyPi](https://pypi.org/project/cldk/): +#### C +- **Backend:** `cldk.analysis.c` +- **Tools:** Clang frontend +- **Capabilities:** Structural symbol resolution and method/function layout using Clang AST -```python -pip install cldk -``` +All analysis backends share common infrastructure defined in `cldk.analysis.commons`, including: +- **Tree-sitter utilities** (`treesitter_java`, `treesitter_python`) +- **LSP integration hooks** +- **Generic model builders and transformation utilities** -Once CLDK is installed, you can import it into your Python code: +Backends are internally orchestrated such that the user does not interact with them directly. Instead, they simply call high-level SDK methods such as: ```python -from cldk import CLDK +get_method_body(...) +get_method_signature(...) +get_call_graph(...) ``` -### Step 3: Build a code summarization pipeline - -Now that we have set up the ollama server and installed CLDK, we can build a simple code summarization pipeline for a Java application. - -1. Let's download a sample Java (apache-commons-cli): +CLDK handles tool coordination, language resolution, parsing, transformation, and data marshalling under the hood. - * Download and unzip the sample Java application: - ```bash - wget https://github.com/apache/commons-cli/archive/refs/tags/rel/commons-cli-1.7.0.zip -O commons-cli-1.7.0.zip && unzip commons-cli-1.7.0.zip - ``` - * Record the path to the sample Java application: - ```bash - export JAVA_APP_PATH=/path/to/commons-cli-1.7.0 - ``` +--- -Below is a simple code summarization pipeline for a Java application using CLDK. It does the following things: +### 3. **Utilities and Extensions** -* Creates a new instance of the CLDK class (see comment `# (1)`) -* Creates an analysis object over the Java application (see comment `# (2)`) -* Iterates over all the files in the project (see comment `# (3)`) -* Iterates over all the classes in the file (see comment `# (4)`) -* Iterates over all the methods in the class (see comment `# (5)`) -* Gets the code body of the method (see comment `# (6)`) -* Initializes the treesitter utils for the class file content (see comment `# (7)`) -* Sanitizes the class for analysis (see comment `# (8)`) -* Formats the instruction for the given focal method and class (see comment `# (9)`) -* Prompts the local model on Ollama (see comment `# (10)`) -* Prints the instruction and LLM output (see comment `# (11)`) +The `cldk.utils` module provides additional support functionality: +- **Exception handling utilities** +- **Logging configuration** +- **Sanitization logic** (especially for Java, via `sanitization.java.treesitter_sanitizer`) -```python -# code_summarization_for_java.py - -from cldk import CLDK - - -def format_inst(code, focal_method, focal_class): - """ - Format the instruction for the given focal method and class. - """ - inst = f"Question: Can you write a brief summary for the method `{focal_method}` in the class `{focal_class}` below?\n" - - inst += "\n" - inst += f"```{language}\n" - inst += code - inst += "```" if code.endswith("\n") else "\n```" - inst += "\n" - return inst +These modules ensure robustness and clean error management across backend interactions and user API layers. -def prompt_ollama(message: str, model_id: str = "granite-code:8b-instruct") -> str: - """Prompt local model on Ollama""" - response_object = ollama.generate(model=model_id, prompt=message) - return response_object["response"] +## Contributing +We invite contributors of all levels of experience! We would love to see you get involved in the project. See the [CONTRIBUTING](./CONTRIBUTING.md) guide to get started. -if __name__ == "__main__": - # (1) Create a new instance of the CLDK class - cldk = CLDK(language="java") - - # (2) Create an analysis object over the java application - analysis = cldk.analysis(project_path=os.getenv("JAVA_APP_PATH")) - - # (3) Iterate over all the files in the project - for file_path, class_file in analysis.get_symbol_table().items(): - class_file_path = Path(file_path).absolute().resolve() - # (4) Iterate over all the classes in the file - for type_name, type_declaration in class_file.type_declarations.items(): - # (5) Iterate over all the methods in the class - for method in type_declaration.callable_declarations.values(): - - # (6) Get code body of the method - code_body = class_file_path.read_text() - - # (7) Initialize the treesitter utils for the class file content - tree_sitter_utils = cldk.tree_sitter_utils(source_code=code_body) - - # (8) Sanitize the class for analysis - sanitized_class = tree_sitter_utils.sanitize_focal_class(method.declaration) - - # (9) Format the instruction for the given focal method and class - instruction = format_inst( - code=sanitized_class, - focal_method=method.declaration, - focal_class=type_name, - ) - - # (10) Prompt the local model on Ollama - llm_output = prompt_ollama( - message=instruction, - model_id="granite-code:20b-instruct", - ) - - # (11) Print the instruction and LLM output - print(f"Instruction:\n{instruction}") - print(f"LLM Output:\n{llm_output}") -``` ### Publication (papers and blogs related to CLDK) 1. Krishna, Rahul, Rangeet Pan, Raju Pavuluri, Srikanth Tamilselvam, Maja Vukovic, and Saurabh Sinha. "[Codellm-Devkit: A Framework for Contextualizing Code LLMs with Program Analysis Insights.](https://arxiv.org/pdf/2410.13007)" arXiv preprint arXiv:2410.13007 (2024). diff --git a/pyproject.toml b/pyproject.toml index e8ff4c3..26b270f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "cldk" -version = "0.5.1" +version = "1.0.0" description = "The official Python SDK for Codellm-Devkit." authors = ["Rahul Krishna ", "Rangeet Pan ", "Saurabh Sinhas ", "Raju Pavuluri "] @@ -27,14 +27,18 @@ include = [ "cldk/analysis/java/codeanalyzer/jar/*.jar" ] + +[tool.backend-versions] +codeanalyzer-java = "2.3.3" + [tool.poetry.dependencies] python = ">=3.11" pydantic = "^2.10.6" pandas = "^2.2.3" networkx = "^3.4.2" -pyarrow = "19.0.0" +pyarrow = "20.0.0" tree-sitter = "0.24.0" -rich = "13.9.4" +rich = "14.0.0" wget = "3.2" requests = "^2.32.3" tree-sitter-java = "0.23.5" @@ -52,7 +56,7 @@ ipython = "^8.32.0" [tool.poetry.group.test.dependencies] # Test dependencies toml = "^0.10.2" -pytest = "8.3.4" +pytest = "8.3.5" pytest-pspec = "^0.0.4" pytest-cov = "^6.0.0" pylint = "^3.3.4"