Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions data/configuration/general.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ enabled = true
name = "pydoctor"
enabled = true

[[structure-extensions]]
name = "rustdoc"
enabled = true

# External Extension Examples
# Uncomment and modify these examples to add external documentation processors.

Expand Down
26 changes: 26 additions & 0 deletions sources/librovore/structures/rustdoc/__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# vim: set filetype=python fileencoding=utf-8:
# -*- coding: utf-8 -*-

#============================================================================#
# #
# Licensed under the Apache License, Version 2.0 (the "License"); #
# you may not use this file except in compliance with the License. #
# You may obtain a copy of the License at #
# #
# http://www.apache.org/licenses/LICENSE-2.0 #
# #
# Unless required by applicable law or agreed to in writing, software #
# distributed under the License is distributed on an "AS IS" BASIS, #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and #
# limitations under the License. #
# #
#============================================================================#


''' Rustdoc subpackage import namespace. '''

# ruff: noqa: F403


from ..__ import *
33 changes: 33 additions & 0 deletions sources/librovore/structures/rustdoc/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# vim: set filetype=python fileencoding=utf-8:
# -*- coding: utf-8 -*-

#============================================================================#
# #
# Licensed under the Apache License, Version 2.0 (the "License"); #
# you may not use this file except in compliance with the License. #
# You may obtain a copy of the License at #
# #
# http://www.apache.org/licenses/LICENSE-2.0 #
# #
# Unless required by applicable law or agreed to in writing, software #
# distributed under the License is distributed on an "AS IS" BASIS, #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and #
# limitations under the License. #
# #
#============================================================================#


''' Rustdoc documentation structure processor. '''


from .detection import RustdocDetection
from .main import RustdocStructureProcessor

from . import __


def register( arguments: __.cabc.Mapping[ str, __.typx.Any ] ) -> None:
''' Registers configured Rustdoc structure processor instance. '''
processor = RustdocStructureProcessor( )
__.structure_processors[ processor.name ] = processor
49 changes: 49 additions & 0 deletions sources/librovore/structures/rustdoc/conversion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# vim: set filetype=python fileencoding=utf-8:
# -*- coding: utf-8 -*-

#============================================================================#
# #
# Licensed under the Apache License, Version 2.0 (the "License"); #
# you may not use this file except in compliance with the License. #
# You may obtain a copy of the License at #
# #
# http://www.apache.org/licenses/LICENSE-2.0 #
# #
# Unless required by applicable law or agreed to in writing, software #
# distributed under the License is distributed on an "AS IS" BASIS, #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and #
# limitations under the License. #
# #
#============================================================================#


''' HTML to Markdown conversion for Rustdoc content. '''


from markdownify import markdownify as _md

from . import __


def convert_to_markdown( html: str ) -> str:
''' Converts Rustdoc HTML to Markdown format. '''
if not html or not html.strip( ):
return ''
markdown = _md(
html,
heading_style = 'ATX',
code_language = 'rust',
strip = [ 'nav', 'aside', 'header', 'footer' ],
)
return markdown.strip( )


def extract_code_language( element: __.typx.Any ) -> str:
''' Extracts code language from Rustdoc HTML element classes. '''
classes = element.get( 'class', [ ] )
if not classes: return 'rust'
for cls in classes:
if cls in ( 'rust', 'toml', 'text', 'console', 'sh', 'bash' ):
return cls
return 'rust'
120 changes: 120 additions & 0 deletions sources/librovore/structures/rustdoc/detection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# vim: set filetype=python fileencoding=utf-8:
# -*- coding: utf-8 -*-

#============================================================================#
# #
# Licensed under the Apache License, Version 2.0 (the "License"); #
# you may not use this file except in compliance with the License. #
# You may obtain a copy of the License at #
# #
# http://www.apache.org/licenses/LICENSE-2.0 #
# #
# Unless required by applicable law or agreed to in writing, software #
# distributed under the License is distributed on an "AS IS" BASIS, #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and #
# limitations under the License. #
# #
#============================================================================#


''' Rustdoc documentation structure detection. '''


from bs4 import BeautifulSoup as _BeautifulSoup

from . import __
from . import extraction as _extraction


_scribe = __.acquire_scribe( __name__ )


class RustdocDetection( __.StructureDetection ):
''' Detection result for Rustdoc documentation sources. '''

source: str
normalized_source: str = ''
rustdoc_version: __.typx.Optional[ str ] = None

@classmethod
def get_capabilities( cls ) -> __.StructureProcessorCapabilities:
''' Rustdoc processor capabilities based on structure analysis. '''
return __.StructureProcessorCapabilities(
supported_inventory_types = frozenset( { 'rustdoc' } ),
content_extraction_features = frozenset( {
__.ContentExtractionFeatures.Signatures,
__.ContentExtractionFeatures.Descriptions,
__.ContentExtractionFeatures.CodeExamples,
__.ContentExtractionFeatures.CrossReferences,
} ),
confidence_by_inventory_type = __.immut.Dictionary( {
'rustdoc': 0.95
} )
)

@classmethod
async def from_source(
selfclass,
auxdata: __.ApplicationGlobals,
processor: __.Processor,
source: str,
) -> __.typx.Self:
''' Constructs detection from source location. '''
detection = await processor.detect( auxdata, source )
return __.typx.cast( __.typx.Self, detection )

async def extract_contents(
self,
auxdata: __.ApplicationGlobals,
source: str,
objects: __.cabc.Sequence[ __.InventoryObject ], /,
) -> tuple[ __.ContentDocument, ... ]:
''' Extracts documentation content for specified objects. '''
documents = await _extraction.extract_contents(
auxdata, source, objects )
return tuple( documents )


async def detect_rustdoc(
auxdata: __.ApplicationGlobals, source_url: __.typx.Any
) -> tuple[ bool, __.typx.Optional[ str ] ]:
''' Detects if source is Rustdoc-generated documentation. '''
try:
html_content = await __.retrieve_url_as_text(
auxdata.content_cache, source_url, duration_max = 10.0 )
except __.DocumentationInaccessibility:
return False, None
if __.is_absent( html_content ):
return False, None
try: soup = _BeautifulSoup( html_content, 'lxml' )
except Exception as exc:
_scribe.debug( f"HTML parsing failed for {source_url}: {exc}" )
return False, None
is_rustdoc, version = detect_rustdoc_markers( soup )
return is_rustdoc, version


def detect_rustdoc_markers(
soup: __.typx.Any
) -> tuple[ bool, __.typx.Optional[ str ] ]:
''' Detects Rustdoc-specific HTML markers. '''
rustdoc_version = None
meta_generator = soup.find( 'meta', attrs = { 'name': 'generator' } )
if meta_generator:
content = meta_generator.get( 'content', '' )
if 'rustdoc' in str( content ).lower( ):
return True, str( content )
if soup.find( 'rustdoc-topbar' ):
version_attr = soup.find( attrs = { 'data-rustdoc-version': True } )
if version_attr:
rustdoc_version = version_attr.get( 'data-rustdoc-version' )
return True, rustdoc_version
if soup.find( attrs = { 'data-rustdoc-version': True } ):
version_attr = soup.find( attrs = { 'data-rustdoc-version': True } )
rustdoc_version = version_attr.get( 'data-rustdoc-version' )
return True, rustdoc_version
css_pattern = __.re.compile( r'rustdoc.*\.css' )
if soup.find( 'link', href = css_pattern ):
return True, None
return False, None
Loading