+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/Css/Css.cs b/src/Web/Css.cs
similarity index 99%
rename from src/Css/Css.cs
rename to src/Web/Css.cs
index b270d4b..91d4378 100644
--- a/src/Css/Css.cs
+++ b/src/Web/Css.cs
@@ -5,7 +5,7 @@
using System.Linq;
using System.Text;
-namespace Devlooped.Xml.Css;
+namespace Devlooped.Web;
class SelectorGroup : List
{
diff --git a/src/Css/CssSelectorExtensions.cs b/src/Web/CssSelectorExtensions.cs
similarity index 99%
rename from src/Css/CssSelectorExtensions.cs
rename to src/Web/CssSelectorExtensions.cs
index ff11243..1bd61f2 100644
--- a/src/Css/CssSelectorExtensions.cs
+++ b/src/Web/CssSelectorExtensions.cs
@@ -6,7 +6,7 @@
using System.Xml.XPath;
using System.Xml.Xsl;
-namespace Devlooped.Xml.Css;
+namespace Devlooped.Web;
///
/// Provides extension methods for that allow selecting
diff --git a/src/Web/HtmlDocument.cs b/src/Web/HtmlDocument.cs
new file mode 100644
index 0000000..abc3260
--- /dev/null
+++ b/src/Web/HtmlDocument.cs
@@ -0,0 +1,280 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Xml;
+using System.Xml.Linq;
+using Sgml;
+
+namespace Devlooped.Web;
+
+///
+/// Allows loading an HTML document as an .
+///
+public static class HtmlDocument
+{
+ const string DefaultPublicIdentifier = "-//W3C//DTD XHTML 1.0 Transitional//EN";
+ const string DefaultSystemLiteral = "http://www.w3.org/TR/html4/loose.dtd";
+
+ ///
+ /// Create a new and initialize its underlying XML tree using
+ /// the passed parameter.
+ ///
+ ///
+ /// A containing the raw HTML to read into the newly
+ /// created .
+ ///
+ ///
+ /// A new containing the contents of the passed in
+ /// .
+ ///
+ public static XDocument Load(Stream stream) => Load(stream, HtmlReaderSettings.Default);
+
+ ///
+ /// The Load method provides multiple strategies for creating a new
+ /// and initializing it from a data source containing
+ /// raw XML. Load from a file (passing in a URI to the file), a
+ /// or an a .
+ /// Note: Use
+ /// to create an from a string containing HTML.
+ ///
+ ///
+ ///
+ /// Create a new based on the contents of the file
+ /// referenced by the URI parameter passed in. Note: Use
+ /// to create an from
+ /// a string containing HTML.
+ ///
+ ///
+ ///
+ /// A URI string referencing the file to load into a new .
+ ///
+ ///
+ /// An initialized with the contents of the file referenced
+ /// in the passed in uri parameter.
+ ///
+ public static XDocument Load(string uri) => Load(uri, HtmlReaderSettings.Default);
+
+ ///
+ /// Create a new and initialize its underlying XML tree using
+ /// the passed parameter. Optionally whitespace handling
+ /// can be preserved.
+ ///
+ ///
+ /// A containing the raw HTML to read into the newly
+ /// created .
+ ///
+ ///
+ /// A new containing the contents of the passed in
+ /// .
+ ///
+ public static XDocument Load(TextReader textReader) => Load(textReader, HtmlReaderSettings.Default);
+
+ ///
+ /// Create a new and initialize its underlying XML tree using
+ /// the passed parameter.
+ ///
+ ///
+ /// A containing the raw HTML to read into the newly
+ /// created .
+ ///
+ /// The settings for the HTML load process.
+ ///
+ /// A new containing the contents of the passed in
+ /// .
+ ///
+ public static XDocument Load(Stream stream, HtmlReaderSettings settings)
+ => Load(new StreamReader(stream), settings);
+
+ ///
+ /// The Load method provides multiple strategies for creating a new
+ /// and initializing it from a data source containing
+ /// raw XML. Load from a file (passing in a URI to the file), a
+ /// or an a .
+ /// Note: Use
+ /// to create an from a string containing HTML.
+ ///
+ ///
+ ///
+ /// Create a new based on the contents of the file
+ /// referenced by the URI parameter passed in. Note: Use
+ /// to create an from
+ /// a string containing HTML.
+ ///
+ ///
+ ///
+ /// A URI string referencing the file to load into a new .
+ ///
+ /// The settings for the HTML load process.
+ ///
+ /// An initialized with the contents of the file referenced
+ /// in the passed in uri parameter.
+ ///
+ public static XDocument Load(string uri, HtmlReaderSettings settings)
+ {
+ using var reader = new SgmlReader
+ {
+ Href = uri,
+ };
+
+ return XDocument.Load(Configure(reader, settings));
+ }
+
+ ///
+ /// Create a new and initialize its underlying XML tree using
+ /// the passed parameter. Optionally whitespace handling
+ /// can be preserved.
+ ///
+ ///
+ /// A containing the raw HTML to read into the newly
+ /// created .
+ ///
+ /// The settings for the HTML load process.
+ ///
+ /// A new containing the contents of the passed in
+ /// .
+ ///
+ public static XDocument Load(TextReader textReader, HtmlReaderSettings settings)
+ {
+ using var reader = new SgmlReader
+ {
+ InputStream = textReader,
+ };
+
+ return XDocument.Load(Configure(reader, settings));
+ }
+
+ ///
+ /// Create a new from a string containing
+ /// HMTL.
+ ///
+ ///
+ /// Create a new from a string containing
+ /// HTML.
+ ///
+ /// A string containing HTML.
+ ///
+ /// An containing an XML tree initialized from the
+ /// passed in HTML string.
+ ///
+ public static XDocument Parse(string html) => Parse(html, HtmlReaderSettings.Default);
+
+ ///
+ /// Create a new from a string containing
+ /// HMTL.
+ ///
+ ///
+ /// Create a new from a string containing
+ /// HTML.
+ ///
+ ///
+ /// A string containing HTML.
+ ///
+ /// The settings for the HTML load process.
+ ///
+ /// An containing an XML tree initialized from the
+ /// passed in HTML string.
+ ///
+ public static XDocument Parse(string html, HtmlReaderSettings settings)
+ => Load(new StringReader(html), settings);
+
+ static XmlReader Configure(SgmlReader reader, HtmlReaderSettings settings)
+ {
+ reader.DocType = "html";
+ reader.PublicIdentifier = DefaultPublicIdentifier;
+ reader.SystemLiteral = DefaultSystemLiteral;
+
+ reader.CaseFolding = settings.CaseFolding;
+ reader.WhitespaceHandling = settings.WhitespaceHandling;
+ reader.TextWhitespace = settings.TextWhitespace;
+
+ XmlReader result = reader;
+
+ if (settings.IgnoreXmlNamespaces)
+ result = new IgnoreXmlNsReader(result);
+
+ if (settings.SkipElements.Length > 0)
+ result = new SkipElementsReader(result, settings.SkipElements);
+
+ return result;
+ }
+
+ ///
+ /// Removes all XML namespaces, since for HTML content it's typically
+ /// irrelevant.
+ ///
+ class IgnoreXmlNsReader : XmlWrappingReader
+ {
+ const string XmlNsNamespace = "http://www.w3.org/2000/xmlns/";
+
+ public IgnoreXmlNsReader(XmlReader baseReader) : base(baseReader) { }
+
+ public override int AttributeCount
+ {
+ get
+ {
+ var count = 0;
+ for (var go = MoveToFirstAttribute(); go; go = MoveToNextAttribute())
+ count++;
+
+ return count;
+ }
+ }
+
+ public override bool MoveToFirstAttribute()
+ {
+ var moved = base.MoveToFirstAttribute();
+ while (moved && (IsXmlNs || IsLocalXmlNs))
+ moved = MoveToNextAttribute();
+
+ if (!moved)
+ base.MoveToElement();
+
+ return moved;
+ }
+
+ public override bool MoveToNextAttribute()
+ {
+ var moved = base.MoveToNextAttribute();
+ while (moved && (IsXmlNs || IsLocalXmlNs))
+ moved = MoveToNextAttribute();
+
+ return moved;
+ }
+
+ ///
+ /// We only support the xml prefix, used for xml:lang and xml:space
+ /// built-in text handling in XHTML.
+ ///
+ public override string Prefix => base.Prefix == "xml" ? "xml" : "";
+
+ public override string NamespaceURI => Prefix == "xml" ? base.NamespaceURI : "";
+
+ bool IsXmlNs => base.NamespaceURI == XmlNsNamespace;
+
+ bool IsLocalXmlNs => Prefix == "xmlns";
+ }
+
+ ///
+ /// Removes all XML namespaces, since for HTML content it's typically
+ /// irrelevant.
+ ///
+ class SkipElementsReader : XmlWrappingReader
+ {
+ readonly HashSet skipElements;
+
+ public SkipElementsReader(XmlReader baseReader, string[] skipElements) : base(baseReader)
+ {
+ this.skipElements = new HashSet(skipElements, StringComparer.OrdinalIgnoreCase);
+ }
+
+ public override bool Read()
+ {
+ var read = base.Read();
+ if (read && base.NodeType == XmlNodeType.Element && skipElements.Contains(LocalName))
+ base.Skip();
+
+ return read;
+ }
+ }
+}
+
diff --git a/src/Web/HtmlReaderSettings.cs b/src/Web/HtmlReaderSettings.cs
new file mode 100644
index 0000000..d6a9138
--- /dev/null
+++ b/src/Web/HtmlReaderSettings.cs
@@ -0,0 +1,53 @@
+using System;
+using System.Xml;
+using Sgml;
+
+namespace Devlooped.Web;
+
+///
+/// Specifies a set of features to support when loading
+/// HTML via .
+///
+public sealed class HtmlReaderSettings
+{
+ ///
+ /// Default settings when reading HTML, which are:
+ /// , =true
+ /// and =["script", "style"].
+ ///
+ public static HtmlReaderSettings Default { get; } = new HtmlReaderSettings
+ {
+ CaseFolding = CaseFolding.ToLower,
+ IgnoreXmlNamespaces = true,
+ SkipElements = new string[] { "script", "style" },
+ };
+
+ ///
+ /// HTML is case insensitive, so you can choose between converting
+ /// to lower case or upper case tags. "None" means that the case is left
+ /// alone, except that end tags will be folded to match the start tags.
+ ///
+ public CaseFolding CaseFolding { get; set; }
+
+ ///
+ /// Whether to ignore XML namespaces in the input. Default is true.
+ ///
+ public bool IgnoreXmlNamespaces { get; set; } = true;
+
+ ///
+ /// Elements that should be skipped when reading the HTML so they are
+ /// not loaded into the resulting XML document. Defaults to no elements.
+ ///
+ public string[] SkipElements { get; set; } = Array.Empty();
+
+ ///
+ /// Specifies how leading and trailing whitespace is handled.
+ /// Note that this is a -enum.
+ ///
+ public TextWhitespaceHandling TextWhitespace { get; set; }
+
+ ///
+ /// Specifies how whitespace nodes are handled.
+ ///
+ public WhitespaceHandling WhitespaceHandling { get; set; }
+}
diff --git a/src/Css/Parser.cs b/src/Web/Parser.cs
similarity index 98%
rename from src/Css/Parser.cs
rename to src/Web/Parser.cs
index 3d8bfd1..7ff332f 100644
--- a/src/Css/Parser.cs
+++ b/src/Web/Parser.cs
@@ -3,7 +3,7 @@
using Superpower;
using Superpower.Parsers;
-namespace Devlooped.Xml.Css;
+namespace Devlooped.Web;
class Parser
{
@@ -133,7 +133,7 @@ from rest in ClassSelector
.Many()
select (start == null && rest.Length == 0) ?
CompositeSelector.Empty :
- new[] { start ?? Css.UniversalSelector.Default }.Concat(rest).ToArray();
+ new[] { start ?? Web.UniversalSelector.Default }.Concat(rest).ToArray();
internal static TextParser CombinatorParser { get; } =
from combinator in Character.In('>', '+', '~').Or(Character.WhiteSpace).Optional().Named("combinator")
@@ -175,7 +175,7 @@ from start in SimpleSelectorSequence
from _ in Character.WhiteSpace.IgnoreMany()
from steps in SelectorStep.ManyDelimitedBy(Span.WhiteSpace)
select new Selector(
- start.Sequence.Length == 0 ? new[] { Css.UniversalSelector.Default } : start.Sequence,
+ start.Sequence.Length == 0 ? new[] { Web.UniversalSelector.Default } : start.Sequence,
steps.Where(x => x != null));
internal static TextParser SelectorGroup { get; } = Selector.ManyDelimitedBy(Character.In(','));
diff --git a/src/Web/System/Xml/XmlWrappingReader.cs b/src/Web/System/Xml/XmlWrappingReader.cs
new file mode 100644
index 0000000..0375246
--- /dev/null
+++ b/src/Web/System/Xml/XmlWrappingReader.cs
@@ -0,0 +1,276 @@
+//
+#region License
+// MIT License
+//
+// Copyright (c) Daniel Cazzulino
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+#endregion
+
+#nullable enable
+using System.Threading.Tasks;
+
+namespace System.Xml
+{
+ ///
+ /// Base that can be use to create new readers by
+ /// wrapping existing ones.
+ ///
+ ///
+ /// Supports if the underlying reader supports it.
+ ///
+ abstract partial class XmlWrappingReader : XmlReader, IXmlLineInfo
+ {
+ ///
+ /// Initializes a new instance of the .
+ ///
+ /// The underlying reader this instance will wrap.
+ protected XmlWrappingReader(XmlReader baseReader) => BaseReader = baseReader;
+
+ ///
+ /// Gets or sets the underlying reader this instance is wrapping.
+ ///
+ protected XmlReader BaseReader { get; set; }
+
+ ///
+ /// See .
+ ///
+ public override bool CanReadBinaryContent => BaseReader.CanReadBinaryContent;
+
+ ///
+ /// See .
+ ///
+ public override bool CanReadValueChunk => BaseReader.CanReadValueChunk;
+
+ ///
+ /// See .
+ ///
+ public override bool CanResolveEntity => BaseReader.CanResolveEntity;
+
+ ///
+ /// See .
+ ///
+ protected override void Dispose(bool disposing)
+ {
+ if (ReadState != ReadState.Closed)
+ Close();
+
+ (BaseReader as IDisposable)?.Dispose();
+ }
+
+ ///
+ /// See .
+ ///
+ public override bool Read() => BaseReader.Read();
+
+ ///
+ /// See .
+ ///
+ public override void Close() => BaseReader.Close();
+
+ ///
+ /// See .
+ ///
+ public override string GetAttribute(int i) => BaseReader.GetAttribute(i);
+
+ ///
+ /// See .
+ ///
+ public override string? GetAttribute(string name) => BaseReader.GetAttribute(name);
+
+ ///
+ /// See .
+ ///
+ public override string? GetAttribute(string localName, string? namespaceURI) => BaseReader.GetAttribute(localName, namespaceURI);
+
+ ///
+ /// See .
+ ///
+ public override string? LookupNamespace(string prefix) => BaseReader.LookupNamespace(prefix);
+
+ ///
+ /// See .
+ ///
+ public override void MoveToAttribute(int i) => BaseReader.MoveToAttribute(i);
+
+ ///
+ /// See .
+ ///
+ public override bool MoveToAttribute(string name) => BaseReader.MoveToAttribute(name);
+
+ ///
+ /// See .
+ ///
+ public override bool MoveToAttribute(string localName, string? namespaceURI) => BaseReader.MoveToAttribute(localName, namespaceURI);
+
+ ///
+ /// See .
+ ///
+ public override bool MoveToElement() => BaseReader.MoveToElement();
+
+ ///
+ /// See .
+ ///
+ public override bool MoveToFirstAttribute() => BaseReader.MoveToFirstAttribute();
+
+ ///
+ /// See .
+ ///
+ public override bool MoveToNextAttribute() => BaseReader.MoveToNextAttribute();
+
+ ///
+ /// See .
+ ///
+ public override bool ReadAttributeValue() => BaseReader.ReadAttributeValue();
+
+ ///
+ /// See .
+ ///
+ public override void ResolveEntity() => BaseReader.ResolveEntity();
+
+ ///
+ /// See .
+ ///
+ public override int AttributeCount => BaseReader.AttributeCount;
+
+ ///
+ /// See .
+ ///
+ public override string BaseURI => BaseReader.BaseURI;
+
+ ///
+ /// See .
+ ///
+ public override int Depth => BaseReader.Depth;
+
+ ///
+ /// See .
+ ///
+ public override bool EOF => BaseReader.EOF;
+
+ ///
+ /// See .
+ ///
+ public override bool HasValue => BaseReader.HasValue;
+
+ ///
+ /// See .
+ ///
+ public override bool IsDefault => BaseReader.IsDefault;
+
+ ///
+ /// See .
+ ///
+ public override bool IsEmptyElement => BaseReader.IsEmptyElement;
+
+ ///
+ /// See .
+ ///
+ public override string this[int i] => BaseReader[i];
+
+ ///
+ /// See .
+ ///
+ public override string? this[string name] => BaseReader[name];
+
+ ///
+ /// See .
+ ///
+ public override string? this[string name, string? namespaceURI] => BaseReader[name, namespaceURI];
+
+ ///
+ /// See .
+ ///
+ public override string LocalName => BaseReader.LocalName;
+
+ ///
+ /// See .
+ ///
+ public override string Name => BaseReader.Name;
+
+ ///
+ /// See .
+ ///
+ public override string NamespaceURI => BaseReader.NamespaceURI;
+
+ ///
+ /// See .
+ ///
+ public override XmlNameTable NameTable => BaseReader.NameTable;
+
+ ///
+ /// See .
+ ///
+ public override XmlNodeType NodeType => BaseReader.NodeType;
+
+ ///
+ /// See .
+ ///
+ public override string Prefix => BaseReader.Prefix;
+
+ ///
+ /// See .
+ ///
+ public override char QuoteChar => BaseReader.QuoteChar;
+
+ ///
+ /// See .
+ ///
+ public override ReadState ReadState => BaseReader.ReadState;
+
+ ///
+ /// See .
+ ///
+ public override string Value => BaseReader.Value;
+
+ ///
+ /// See .
+ ///
+ public override string XmlLang => BaseReader.XmlLang;
+
+ ///
+ /// See .
+ ///
+ public override XmlSpace XmlSpace => BaseReader.XmlSpace;
+
+ ///
+ /// See .
+ ///
+ public override int ReadValueChunk(char[] buffer, int index, int count) => BaseReader.ReadValueChunk(buffer, index, count);
+
+ #region IXmlLineInfo Members
+
+ ///
+ /// See .
+ ///
+ public bool HasLineInfo() => BaseReader is IXmlLineInfo info && info.HasLineInfo();
+
+ ///
+ /// See .
+ ///
+ public int LineNumber => BaseReader is IXmlLineInfo info ? info.LineNumber : 0;
+
+ ///
+ /// See .
+ ///
+ public int LinePosition => BaseReader is IXmlLineInfo info ? info.LinePosition : 0;
+
+ #endregion
+ }
+}
diff --git a/src/Css/Css.csproj b/src/Web/Web.csproj
similarity index 71%
rename from src/Css/Css.csproj
rename to src/Web/Web.csproj
index 50ed139..302c892 100644
--- a/src/Css/Css.csproj
+++ b/src/Web/Web.csproj
@@ -2,12 +2,11 @@
netstandard2.0
- Devlooped.Xml.Css
- Devlooped.Xml.Css
- $(AssemblyName)
- https://clarius.org/css
+ Devlooped.Web
+ Devlooped.Web
+ https://clarius.org/webreadme.md
- CSS selectors for XLinq, aka Linq to Css
+ Read HTML as XML and query it with CSS over XLinq
@@ -16,6 +15,7 @@
+
diff --git a/src/icon.png b/src/icon.png
index 801687c..797307f 100644
Binary files a/src/icon.png and b/src/icon.png differ