Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions docs/parser-zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,9 @@

### LaunguageSpec

```
用于在 LSP 符号收集过程中转换为 UniAST 所需信息,并且这些信息非 LSP 通用定义

```go

// Detailed implementation used for collect LSP symbols and transform them to UniAST
type LanguageSpec interface {
// initialize a root workspace, and return all modules [modulename=>abs-path] inside
Expand All @@ -50,7 +48,7 @@ type LanguageSpec interface {
ShouldSkip(path string) bool

// FileImports parse file codes to get its imports
FileImports(content []byte) ([]uniast.Import, error)
FileImports(content []byte) ([]uniast.Import, error)

// return the first declaration token of a symbol, as Type-Name
DeclareTokenOfSymbol(sym DocumentSymbol) int
Expand Down
5 changes: 4 additions & 1 deletion docs/uniast-en.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Universal Abstract-Syntax-Tree Specification (v0.1.2)
# Universal Abstract-Syntax-Tree Specification (v0.1.3)

Universal Abstract-Syntax-Tree is a LLM-friendly, language-agnostic code context data structure established by ABCoder. It represents a unified abstract syntax tree of a repository's code, collecting definitions of language entities (functions, types, constants/variables) and their interdependencies for subsequent AI understanding and coding-workflow development.

Expand Down Expand Up @@ -102,6 +102,9 @@ A repository consists of entity Modules and relationship Graph

- Graph: Dependency topology graph of AST Nodes, see [Graph] below

- Path: The file directory of the repository, usually should be an absolute path

- ASTVersion: The UniAST version used to parse

### Module

Expand Down
6 changes: 5 additions & 1 deletion docs/uniast-zh.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Universal Abstract-Syntax-Tree Specification (v0.1.2)
# Universal Abstract-Syntax-Tree Specification (v0.1.3)

Universal Abstract-Syntax-Tree 是 ABCoder 建立的一种 LLM 亲和、语言无关的代码上下文数据结构,表示某个仓库代码的统一抽象语法树。收集了语言实体(函数、类型、常(变)量)的定义及其相互依赖关系,用于后续的 AI 理解、coding-workflow 开发。

Expand Down Expand Up @@ -102,6 +102,10 @@ Universal Abstract-Syntax-Tree 是 ABCoder 建立的一种 LLM 亲和、语言

- Graph: AST Node 的依赖拓扑图,见下文【Graph】

- Path: 仓库的文件目录,通常应该为绝对路径

- ASTVersion: 解析时使用的 UniAST 版本


### Module

Expand Down
11 changes: 8 additions & 3 deletions lang/collect/collect.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,11 @@ func (c *Collector) Collect(ctx context.Context) error {

file := c.files[path]
if file == nil {
file = uniast.NewFile(path)
rel, err := filepath.Rel(c.repo, path)
if err != nil {
return err
}
file = uniast.NewFile(rel)
c.files[path] = file
}

Expand All @@ -179,8 +183,9 @@ func (c *Collector) Collect(ctx context.Context) error {
uses, err := c.spec.FileImports(content)
if err != nil {
log.Error("parse file %s use statements failed: %v", path, err)
} else {
file.Imports = uses
}
file.Imports = uses

// collect symbols
uri := NewURI(path)
Expand Down Expand Up @@ -209,7 +214,7 @@ func (c *Collector) Collect(ctx context.Context) error {
return nil
}
if err := filepath.Walk(c.repo, scanner); err != nil {
return err
log.Error("scan files failed: %v", err)
}

// collect some extra metadata
Expand Down
42 changes: 10 additions & 32 deletions lang/collect/export.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,46 +79,34 @@ func (c *Collector) Export(ctx context.Context) (*uniast.Repository, error) {
_, _ = c.exportSymbol(&repo, symbol, "", visited)
}

// connect file with package on demands
// for p, m := range repo.Modules {
// if p == "" || strings.Contains(p, "@") {
// continue
// }
// for _, f := range m.Files {
// if f.Package != "" {
// continue
// }
// _, pkgpath, err := c.spec.NameSpace(filepath.Join(c.repo, f.Path))
// if err != nil {
// continue
// }
// f.Package = pkgpath
// }
// }
for fp, f := range c.files {
if f.Package != "" {
continue
}
rel, err := filepath.Rel(c.repo, fp)
if err != nil {
continue
}

modpath, pkgpath, err := c.spec.NameSpace(fp)
if err != nil {
continue
}

// connect file to package
if modpath == "" || strings.Contains(modpath, "@") {
continue
}
m, ok := repo.Modules[modpath]
if !ok {
continue
}

m.Files[rel] = f
if pkgpath == "" || f.Package != "" {
continue
}
if _, ok := m.Packages[pkgpath]; !ok {
continue
}
f.Package = pkgpath
m.Files[rel] = f
}

return &repo, nil
Expand Down Expand Up @@ -199,25 +187,15 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol
repo.Modules[mod] = newModule(mod, "", c.Language)
}
module := repo.Modules[mod]
if repo.Modules[mod].Packages[path] == nil {
repo.Modules[mod].Packages[path] = uniast.NewPackage(path)
if module.Packages[path] == nil {
module.Packages[path] = uniast.NewPackage(path)
}
pkg := repo.Modules[mod].Packages[path]
if c.spec.IsMainFunction(*symbol) {
pkg.IsMain = true
}

var relfile string
if c.internal(symbol.Location) {
relfile, _ = filepath.Rel(c.repo, file)
} else {
relfile = filepath.Base(file)
}
fileLine := c.fileLine(symbol.Location)
// collect files
if module.Files[relfile] == nil {
module.Files[relfile] = uniast.NewFile(relfile)
}

content := symbol.Text
public := c.spec.IsPublicSymbol(*symbol)
Expand Down
11 changes: 7 additions & 4 deletions lang/uniast/ast.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ const (
Golang Language = "go"
Rust Language = "rust"
Cxx Language = "cxx"
Python Language = "python"
Python Language = "python"
Unknown Language = ""
)

Expand Down Expand Up @@ -75,10 +75,11 @@ type NodeGraph map[string]*Node

// Repository
type Repository struct {
ASTVersion string
Name string `json:"id"` // module name
Path string // repo path
Modules map[string]*Module // module name => module
Graph NodeGraph // node id => node
ASTVersion string
}

func (r Repository) ID() string {
Expand All @@ -95,9 +96,11 @@ func (r Repository) InternalModules() []*Module {
return ret
}

// NOTICE: Repository.Path is set as name by default, if th name isn't a path, set path somewhere
func NewRepository(name string) Repository {
ret := Repository{
Name: name,
Path: name,
Modules: map[string]*Module{},
Graph: map[string]*Node{},
ASTVersion: Version,
Expand Down Expand Up @@ -178,8 +181,8 @@ type Module struct {
Name string // go module name
Dir string // relative path to repo
Packages map[PkgPath]*Package // pkage import path => Package
Dependencies map[string]string // module name => module_path@version
Files map[string]*File // relative path => file info
Dependencies map[string]string `json:",omitempty"` // module name => module_path@version
Files map[string]*File `json:",omitempty"` // relative path => file info
CompressData *string `json:"compress_data,omitempty"` // module compress info
}

Expand Down
2 changes: 1 addition & 1 deletion lang/uniast/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@

package uniast

const Version = "v0.1.2"
const Version = "v0.1.3"
3 changes: 2 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ Language:
rust for rust codes
cxx for c codes (cpp support is on the way)
go for golang codes
python for python codes
`

func main() {
Expand Down Expand Up @@ -143,7 +144,7 @@ func main() {
if flagOutput != nil && *flagOutput != "" {
wopts.OutputDir = *flagOutput
} else {
wopts.OutputDir = filepath.Base(repo.Name)
wopts.OutputDir = filepath.Base(repo.Path)
}

if err := lang.Write(context.Background(), repo, wopts); err != nil {
Expand Down