diff --git a/docs/parser-zh.md b/docs/parser-zh.md index 691d253f..53b002e0 100644 --- a/docs/parser-zh.md +++ b/docs/parser-zh.md @@ -31,11 +31,9 @@ ### LaunguageSpec -``` 用于在 LSP 符号收集过程中转换为 UniAST 所需信息,并且这些信息非 LSP 通用定义 ```go - // Detailed implementation used for collect LSP symbols and transform them to UniAST type LanguageSpec interface { // initialize a root workspace, and return all modules [modulename=>abs-path] inside @@ -50,7 +48,7 @@ type LanguageSpec interface { ShouldSkip(path string) bool // FileImports parse file codes to get its imports - FileImports(content []byte) ([]uniast.Import, error) + FileImports(content []byte) ([]uniast.Import, error) // return the first declaration token of a symbol, as Type-Name DeclareTokenOfSymbol(sym DocumentSymbol) int diff --git a/docs/uniast-en.md b/docs/uniast-en.md index 1add6e50..48ff999a 100644 --- a/docs/uniast-en.md +++ b/docs/uniast-en.md @@ -1,4 +1,4 @@ -# Universal Abstract-Syntax-Tree Specification (v0.1.2) +# Universal Abstract-Syntax-Tree Specification (v0.1.3) Universal Abstract-Syntax-Tree is a LLM-friendly, language-agnostic code context data structure established by ABCoder. It represents a unified abstract syntax tree of a repository's code, collecting definitions of language entities (functions, types, constants/variables) and their interdependencies for subsequent AI understanding and coding-workflow development. @@ -102,6 +102,9 @@ A repository consists of entity Modules and relationship Graph - Graph: Dependency topology graph of AST Nodes, see [Graph] below +- Path: The file directory of the repository, usually should be an absolute path + +- ASTVersion: The UniAST version used to parse ### Module diff --git a/docs/uniast-zh.md b/docs/uniast-zh.md index d20594b3..2a7d6448 100644 --- a/docs/uniast-zh.md +++ b/docs/uniast-zh.md @@ -1,4 +1,4 @@ -# Universal Abstract-Syntax-Tree Specification (v0.1.2) +# Universal Abstract-Syntax-Tree Specification (v0.1.3) Universal Abstract-Syntax-Tree 是 ABCoder 建立的一种 LLM 亲和、语言无关的代码上下文数据结构,表示某个仓库代码的统一抽象语法树。收集了语言实体(函数、类型、常(变)量)的定义及其相互依赖关系,用于后续的 AI 理解、coding-workflow 开发。 @@ -102,6 +102,10 @@ Universal Abstract-Syntax-Tree 是 ABCoder 建立的一种 LLM 亲和、语言 - Graph: AST Node 的依赖拓扑图,见下文【Graph】 +- Path: 仓库的文件目录,通常应该为绝对路径 + +- ASTVersion: 解析时使用的 UniAST 版本 + ### Module diff --git a/lang/collect/collect.go b/lang/collect/collect.go index 9f0d89f9..0dae1151 100644 --- a/lang/collect/collect.go +++ b/lang/collect/collect.go @@ -167,7 +167,11 @@ func (c *Collector) Collect(ctx context.Context) error { file := c.files[path] if file == nil { - file = uniast.NewFile(path) + rel, err := filepath.Rel(c.repo, path) + if err != nil { + return err + } + file = uniast.NewFile(rel) c.files[path] = file } @@ -179,8 +183,9 @@ func (c *Collector) Collect(ctx context.Context) error { uses, err := c.spec.FileImports(content) if err != nil { log.Error("parse file %s use statements failed: %v", path, err) + } else { + file.Imports = uses } - file.Imports = uses // collect symbols uri := NewURI(path) @@ -209,7 +214,7 @@ func (c *Collector) Collect(ctx context.Context) error { return nil } if err := filepath.Walk(c.repo, scanner); err != nil { - return err + log.Error("scan files failed: %v", err) } // collect some extra metadata diff --git a/lang/collect/export.go b/lang/collect/export.go index 41324f74..c15f10c6 100644 --- a/lang/collect/export.go +++ b/lang/collect/export.go @@ -79,34 +79,18 @@ func (c *Collector) Export(ctx context.Context) (*uniast.Repository, error) { _, _ = c.exportSymbol(&repo, symbol, "", visited) } - // connect file with package on demands - // for p, m := range repo.Modules { - // if p == "" || strings.Contains(p, "@") { - // continue - // } - // for _, f := range m.Files { - // if f.Package != "" { - // continue - // } - // _, pkgpath, err := c.spec.NameSpace(filepath.Join(c.repo, f.Path)) - // if err != nil { - // continue - // } - // f.Package = pkgpath - // } - // } for fp, f := range c.files { - if f.Package != "" { - continue - } rel, err := filepath.Rel(c.repo, fp) if err != nil { continue } + modpath, pkgpath, err := c.spec.NameSpace(fp) if err != nil { continue } + + // connect file to package if modpath == "" || strings.Contains(modpath, "@") { continue } @@ -114,11 +98,15 @@ func (c *Collector) Export(ctx context.Context) (*uniast.Repository, error) { if !ok { continue } + + m.Files[rel] = f + if pkgpath == "" || f.Package != "" { + continue + } if _, ok := m.Packages[pkgpath]; !ok { continue } f.Package = pkgpath - m.Files[rel] = f } return &repo, nil @@ -199,25 +187,15 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol repo.Modules[mod] = newModule(mod, "", c.Language) } module := repo.Modules[mod] - if repo.Modules[mod].Packages[path] == nil { - repo.Modules[mod].Packages[path] = uniast.NewPackage(path) + if module.Packages[path] == nil { + module.Packages[path] = uniast.NewPackage(path) } pkg := repo.Modules[mod].Packages[path] if c.spec.IsMainFunction(*symbol) { pkg.IsMain = true } - var relfile string - if c.internal(symbol.Location) { - relfile, _ = filepath.Rel(c.repo, file) - } else { - relfile = filepath.Base(file) - } fileLine := c.fileLine(symbol.Location) - // collect files - if module.Files[relfile] == nil { - module.Files[relfile] = uniast.NewFile(relfile) - } content := symbol.Text public := c.spec.IsPublicSymbol(*symbol) diff --git a/lang/uniast/ast.go b/lang/uniast/ast.go index f1649595..dde92030 100644 --- a/lang/uniast/ast.go +++ b/lang/uniast/ast.go @@ -30,7 +30,7 @@ const ( Golang Language = "go" Rust Language = "rust" Cxx Language = "cxx" - Python Language = "python" + Python Language = "python" Unknown Language = "" ) @@ -75,10 +75,11 @@ type NodeGraph map[string]*Node // Repository type Repository struct { + ASTVersion string Name string `json:"id"` // module name + Path string // repo path Modules map[string]*Module // module name => module Graph NodeGraph // node id => node - ASTVersion string } func (r Repository) ID() string { @@ -95,9 +96,11 @@ func (r Repository) InternalModules() []*Module { return ret } +// NOTICE: Repository.Path is set as name by default, if th name isn't a path, set path somewhere func NewRepository(name string) Repository { ret := Repository{ Name: name, + Path: name, Modules: map[string]*Module{}, Graph: map[string]*Node{}, ASTVersion: Version, @@ -178,8 +181,8 @@ type Module struct { Name string // go module name Dir string // relative path to repo Packages map[PkgPath]*Package // pkage import path => Package - Dependencies map[string]string // module name => module_path@version - Files map[string]*File // relative path => file info + Dependencies map[string]string `json:",omitempty"` // module name => module_path@version + Files map[string]*File `json:",omitempty"` // relative path => file info CompressData *string `json:"compress_data,omitempty"` // module compress info } diff --git a/lang/uniast/version.go b/lang/uniast/version.go index 541f9fdf..5d84a052 100644 --- a/lang/uniast/version.go +++ b/lang/uniast/version.go @@ -16,4 +16,4 @@ package uniast -const Version = "v0.1.2" +const Version = "v0.1.3" diff --git a/main.go b/main.go index 543b0e15..555ba070 100644 --- a/main.go +++ b/main.go @@ -59,6 +59,7 @@ Language: rust for rust codes cxx for c codes (cpp support is on the way) go for golang codes + python for python codes ` func main() { @@ -143,7 +144,7 @@ func main() { if flagOutput != nil && *flagOutput != "" { wopts.OutputDir = *flagOutput } else { - wopts.OutputDir = filepath.Base(repo.Name) + wopts.OutputDir = filepath.Base(repo.Path) } if err := lang.Write(context.Background(), repo, wopts); err != nil {