From 802ae2d14d84af7669a537dfd7de29b6a9b27c1b Mon Sep 17 00:00:00 2001 From: Peter Collins Date: Tue, 22 Nov 2022 17:18:30 -0500 Subject: [PATCH 1/2] [Java.Interop.Tools.JavaSource] Handle common parsing exceptions Fixes the 91 instances of exceptions being thrown when generating API docs for the API 33 version of Mono.Android. The two most common instances were: System.IndexOutOfRangeException: Index was outside the bounds of the array. at Java.Interop.Tools.JavaSource.UnknownHtmlElementStartTerminal.TryMatch (Irony.Parsing.ParsingContext context, Irony.Parsing.ISourceStream source) [0x0003e] in /Users/builder/azdo/_work/4/s/external/Java.Interop/src/Java.Interop.Tools.JavaSource/Java.Interop.Tools.JavaSource/SourceJavadocToXmldocGrammar.HtmlBnfTerms.cs:390 System.ArgumentOutOfRangeException: Index was out of range. Must be non-negative and less than the size of the collection. Parameter name: index at System.Collections.Generic.List`1[T].get_Item (System.Int32 index) [0x00009] in <08f46039e5064c628bf7795f9b970b7b>:0 at Java.Interop.Tools.JavaSource.SourceJavadocToXmldocGrammar+BlockTagsBnfTerms+<>c__DisplayClass1_0.b__8 (Irony.Ast.AstContext context, Irony.Parsing.ParseTreeNode parseNode) [0x00025] in /Users/builder/azdo/_work/4/s/external/Java.Interop/src/Java.Interop.Tools.JavaSource/Java.Interop.Tools.JavaSource/SourceJavadocToXmldocGrammar.BlockTagsBnfTerms.cs:151 35 of the 118 href element parsing errors have also been fixed. --- ...avadocToXmldocGrammar.BlockTagsBnfTerms.cs | 6 ++- ...urceJavadocToXmldocGrammar.HtmlBnfTerms.cs | 40 ++++++++++++------- ...avadocToXmldocGrammar.HtmlBnfTermsTests.cs | 5 +++ 3 files changed, 35 insertions(+), 16 deletions(-) diff --git a/src/Java.Interop.Tools.JavaSource/Java.Interop.Tools.JavaSource/SourceJavadocToXmldocGrammar.BlockTagsBnfTerms.cs b/src/Java.Interop.Tools.JavaSource/Java.Interop.Tools.JavaSource/SourceJavadocToXmldocGrammar.BlockTagsBnfTerms.cs index 283113df6..7465b5c41 100644 --- a/src/Java.Interop.Tools.JavaSource/Java.Interop.Tools.JavaSource/SourceJavadocToXmldocGrammar.BlockTagsBnfTerms.cs +++ b/src/Java.Interop.Tools.JavaSource/Java.Interop.Tools.JavaSource/SourceJavadocToXmldocGrammar.BlockTagsBnfTerms.cs @@ -145,8 +145,12 @@ internal void CreateRules (SourceJavadocToXmldocGrammar grammar) if (!grammar.ShouldImport (ImportJavadoc.ReturnTag)) { return; } - // When encountering multiple @return keys in a line, append subsequent @return key content to the original element. var jdi = FinishParse (context, parseNode); + // If we have no return value, continue + if (parseNode.ChildNodes.Count < 2) { + return; + } + // When encountering multiple @return keys in a line, append subsequent @return key content to the original element. if (jdi.Returns.Count == 0) { var r = new XElement ("returns", AstNodeToXmlContent (parseNode.ChildNodes [1])); diff --git a/src/Java.Interop.Tools.JavaSource/Java.Interop.Tools.JavaSource/SourceJavadocToXmldocGrammar.HtmlBnfTerms.cs b/src/Java.Interop.Tools.JavaSource/Java.Interop.Tools.JavaSource/SourceJavadocToXmldocGrammar.HtmlBnfTerms.cs index 51fb17151..f8e55e228 100644 --- a/src/Java.Interop.Tools.JavaSource/Java.Interop.Tools.JavaSource/SourceJavadocToXmldocGrammar.HtmlBnfTerms.cs +++ b/src/Java.Interop.Tools.JavaSource/Java.Interop.Tools.JavaSource/SourceJavadocToXmldocGrammar.HtmlBnfTerms.cs @@ -2,6 +2,8 @@ using System.Collections.Generic; using System.IO; using System.Linq; +using System.Net; +using System.Xml; using System.Xml.Linq; using Irony.Ast; @@ -111,20 +113,17 @@ internal void CreateRules (SourceJavadocToXmldocGrammar grammar) } } - XNode astNodeElement = new XText (unparsedAElementValue); - try { - var seeElement = XElement.Parse ($""); - var hrefValue = seeElement.Attribute ("href")?.Value ?? string.Empty; - if (!string.IsNullOrEmpty (hrefValue) && - (hrefValue.StartsWith ("http", StringComparison.OrdinalIgnoreCase) || hrefValue.StartsWith ("www", StringComparison.OrdinalIgnoreCase))) { - parseNode.AstNode = seeElement; - } else { - // TODO: Need to convert relative paths or code references to appropriate CREF value. - parseNode.AstNode = astNodeElement; - } - } catch (Exception) { - Console.Error.WriteLine ($"# Unable to parse HTML element: "); - parseNode.AstNode = astNodeElement; + var seeElement = TryParseHRef (unparsedAElementValue); + if (seeElement == null) + seeElement = TryParseHRef (WebUtility.HtmlDecode (unparsedAElementValue), logError: true); + + var hrefValue = seeElement?.Attribute ("href")?.Value ?? string.Empty; + if (!string.IsNullOrEmpty (hrefValue) && + (hrefValue.StartsWith ("http", StringComparison.OrdinalIgnoreCase) || hrefValue.StartsWith ("www", StringComparison.OrdinalIgnoreCase))) { + parseNode.AstNode = seeElement; + } else { + // TODO: Need to convert relative paths or code references to appropriate CREF value. + parseNode.AstNode = new XText (unparsedAElementValue); } }; @@ -185,6 +184,17 @@ static IEnumerable GetParagraphs (ParseTreeNodeList children) } } + static XElement? TryParseHRef (string unparsedAElementValue, bool logError = false) + { + try { + return XElement.Parse ($""); + } catch (Exception x) { + if (logError) + Console.Error.WriteLine ($"## Unable to parse HTML element: \n{x.GetType ()}: {x.Message}"); + return null; + } + } + public readonly NonTerminal AllHtmlTerms = new NonTerminal (nameof (AllHtmlTerms), ConcatChildNodes); public readonly NonTerminal TopLevelInlineDeclaration = new NonTerminal (nameof (TopLevelInlineDeclaration), ConcatChildNodes); @@ -387,7 +397,7 @@ public override void Init (GrammarData grammarData) source.PreviewPosition += 1; int start = source.Location.Position; int stop = start; - while (source.Text [stop] != '>' && stop < source.Text.Length) + while (stop < source.Text.Length && source.Text [stop] != '>' ) stop++; if (addingRemarks) { Console.Error.WriteLine ($"# Unsupported HTML element: {source.Text.Substring (start, stop - start)}"); diff --git a/tests/Java.Interop.Tools.JavaSource-Tests/SourceJavadocToXmldocGrammar.HtmlBnfTermsTests.cs b/tests/Java.Interop.Tools.JavaSource-Tests/SourceJavadocToXmldocGrammar.HtmlBnfTermsTests.cs index 2ca926a44..eaef58232 100644 --- a/tests/Java.Interop.Tools.JavaSource-Tests/SourceJavadocToXmldocGrammar.HtmlBnfTermsTests.cs +++ b/tests/Java.Interop.Tools.JavaSource-Tests/SourceJavadocToXmldocGrammar.HtmlBnfTermsTests.cs @@ -62,6 +62,11 @@ public void HyperLinkDeclaration () Assert.AreEqual ("application", r.Root.AstNode.ToString ()); + r = p.Parse ("RFC 2396: Uniform Resource Identifiers (URI): Generic Syntax"); + Assert.IsFalse (r.HasErrors (), DumpMessages (r, p)); + Assert.AreEqual ("RFC 2396: Uniform Resource Identifiers (URI): Generic Syntax", + r.Root.AstNode.ToString ()); + r = p.Parse ("field classification"); Assert.IsFalse (r.HasErrors (), DumpMessages (r, p)); Assert.AreEqual ("\"AutofillService.html#FieldClassification\">field classification", From 2637f17ee633924cfec255bebebae3e959eaf27d Mon Sep 17 00:00:00 2001 From: Jonathan Pryor Date: Tue, 29 Nov 2022 14:55:35 -0500 Subject: [PATCH 2/2] Fix formatting --- .../SourceJavadocToXmldocGrammar.HtmlBnfTerms.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Java.Interop.Tools.JavaSource/Java.Interop.Tools.JavaSource/SourceJavadocToXmldocGrammar.HtmlBnfTerms.cs b/src/Java.Interop.Tools.JavaSource/Java.Interop.Tools.JavaSource/SourceJavadocToXmldocGrammar.HtmlBnfTerms.cs index f8e55e228..d716da20c 100644 --- a/src/Java.Interop.Tools.JavaSource/Java.Interop.Tools.JavaSource/SourceJavadocToXmldocGrammar.HtmlBnfTerms.cs +++ b/src/Java.Interop.Tools.JavaSource/Java.Interop.Tools.JavaSource/SourceJavadocToXmldocGrammar.HtmlBnfTerms.cs @@ -119,7 +119,7 @@ internal void CreateRules (SourceJavadocToXmldocGrammar grammar) var hrefValue = seeElement?.Attribute ("href")?.Value ?? string.Empty; if (!string.IsNullOrEmpty (hrefValue) && - (hrefValue.StartsWith ("http", StringComparison.OrdinalIgnoreCase) || hrefValue.StartsWith ("www", StringComparison.OrdinalIgnoreCase))) { + (hrefValue.StartsWith ("http", StringComparison.OrdinalIgnoreCase) || hrefValue.StartsWith ("www", StringComparison.OrdinalIgnoreCase))) { parseNode.AstNode = seeElement; } else { // TODO: Need to convert relative paths or code references to appropriate CREF value.