Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
353bc2d
closer edits based on new guidelines
vak2ve Nov 9, 2021
ec1f07b
Update frus1981-88v05.xml
vak2ve Nov 11, 2021
0150d42
Revert "Update frus1981-88v05.xml"
vak2ve Nov 12, 2021
240fa19
Update frus1981-88v05.xml
vak2ve Nov 12, 2021
230ac6a
Start of a query for batch fixing signatures
joewiz Nov 15, 2021
38571e4
Update fix-signatures.xq
vak2ve Nov 16, 2021
7be5ebd
Update fix-signatures.xq
vak2ve Nov 16, 2021
37547a7
Update fix-signatures.xq
vak2ve Nov 17, 2021
0a31638
renamed xquery for signature fixes
vak2ve Nov 17, 2021
2cc39a5
Update fix-signature-structure.xq
vak2ve Nov 18, 2021
7113789
Update fix-signature-structure.xq
vak2ve Nov 19, 2021
7fb5180
Update fix-signature-structure.xq
vak2ve Nov 23, 2021
fb06390
add a function to prevent saxon adding namespaces
joewiz Nov 23, 2021
08cdf9e
minor edits
vak2ve Nov 23, 2021
9441687
Update frus1861.xml
vak2ve Nov 24, 2021
81a0132
xquery scripts and schematron warnings tests
vak2ve Dec 1, 2021
e1fb075
Add signature block checks
joewiz Dec 1, 2021
c20ea71
Review rules, add before/after documentation
joewiz Dec 1, 2021
4a68fec
updates to Reagan files
vak2ve Dec 1, 2021
8877173
fixes from review
vak2ve Dec 3, 2021
9283524
Add test file
joewiz Dec 6, 2021
2ea9653
Relax wording of warning
joewiz Dec 6, 2021
68600f0
Add xspec for signature block checks
joewiz Dec 6, 2021
a9453a4
Update fix-signature-structure.xq
vak2ve Dec 6, 2021
0a52fa8
Use copy-namespaces declaration
joewiz Dec 6, 2021
2e34b8b
Delete expression causing over-eager persName
joewiz Dec 6, 2021
d347157
Improve descriptions and examples
joewiz Dec 6, 2021
ba8f122
Fix placement of closer's attributes
joewiz Dec 6, 2021
dca0527
Add a "debug" mode to track behavior of rules
joewiz Dec 6, 2021
fa01620
Add a rule to fix signed without child persName
joewiz Dec 6, 2021
ae4cd2f
Add whitespace inside newly nested elements
joewiz Dec 6, 2021
e7d2425
Remove unnecessary attribute from documentation
joewiz Dec 6, 2021
4c95dbd
Update frus1981-88v11.xml
vak2ve Dec 7, 2021
818b830
frus-xspec updates ongoing
vak2ve Dec 7, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
239 changes: 239 additions & 0 deletions schema/fix-signature-structure.xq
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
xquery version "3.1";

(:

Fixes common problems with signatures in FRUS.

This is designed to be run from an oXygen Transformation Scenario, with saxon-configuration.xml enabled.

Run on a volume. Perform format & indent between each run.

Do not commit the automatically generated .bak file to git.

:)

declare copy-namespaces no-preserve, inherit;

declare namespace tei = "http://www.tei-c.org/ns/1.0";
declare namespace frus = "http://history.state.gov/frus/ns/1.0";

declare variable $path external;
declare variable $vol := doc($path);
declare variable $debug := false();


(: 1. Fix closer lacking child signed

before:

<closer>
<persName>
<hi rend="smallcaps">Inman, Akers and Inman</hi>
</persName>
</closer>

after:

<closer>
<signed>
<persName>
<hi rend="smallcaps">Inman, Akers and Inman</hi>
</persName>
</signed>
</closer>

:)

for $closer in $vol//tei:closer[not(tei:signed) and .//tei:persName[not(ancestor::tei:note)]]
return
replace node $closer with
element
{ QName("http://www.tei-c.org/ns/1.0", "closer") }
{
if ($debug) then attribute ana { "rule-1" } else (),
$closer/@*,
element
{ QName("http://www.tei-c.org/ns/1.0", "signed") }
{
$closer/node()
}
}
,

(: 1b. Fix signed lacking child persName

before:

<closer>
<signed>Steve<note n="4" xml:id="d123fn4">
<persName corresp="#p_OSA_1">Oxman</persName> signed his initials
over his typed signature.</note>
</signed>
</closer>

after:

<closer>
<signed>
<persName>Steve</persName><note n="4" xml:id="d123fn4">
<persName corresp="#p_OSA_1">Oxman</persName> signed his initials
over his typed signature.</note>
</signed>
</closer>

:)

for $text in $vol//tei:signed[not(tei:persName)]/node()[normalize-space(.) ne ""][1][. instance of text()]
return
replace node $text with
element
{ QName("http://www.tei-c.org/ns/1.0", "persName") }
{
if ($debug) then attribute ana { "rule-1b" } else (),
$text
}

,

(: 2. Fix hi/@rend="strong" lacking outer persName

before:

<signed>
<hi rend="strong">Adam M. Howard, Ph.D.</hi>
</signed>

after:

<signed>
<persName>
<hi rend="strong">Adam M. Howard, Ph.D.</hi>
</persName>
</signed>

:)

for $hi in $vol//tei:signed//tei:hi[@rend="strong"][not(parent::tei:persName)]
return
replace node $hi with
(
element
{ QName("http://www.tei-c.org/ns/1.0", "persName") }
{
if ($debug) then attribute ana { "rule-2" } else (),
text { " " },
$hi,
text { " " }
}
)

,

(: 3. Fix persName lacking inner hi/@rend="strong"

before:

<signed>
<persName corresp="#p_HAA_1">Hartman</persName>
</signed>

after:

<signed>
<persName corresp="#p_HAA_1">
<hi rend="strong">Hartman</hi>
</persName>
</signed>

:)
for $persName in $vol//tei:signed/tei:persName[not(tei:hi)]
return
replace node $persName with
element
{ QName("http://www.tei-c.org/ns/1.0", "persName") }
{
if ($debug) then attribute ana { "rule-3" } else (),
$persName/@*,
text { " " },
element
{ QName("http://www.tei-c.org/ns/1.0", "hi") }
{
attribute rend {"strong"},
$persName/node()
},
text { " " }
}

,

(: 4. Delete @corresp from signed if child persName has the same @corresp

before:

<closer>
<signed corresp="#p_RRW_1">
<persName corresp="#p_RRW_1">
<hi rend="strong">Ronald Reagan</hi>
</persName>
</signed>
</closer>

after:

<closer>
<signed>
<persName corresp="#p_RRW_1">
<hi rend="strong">Ronald Reagan</hi>
</persName>
</signed>
</closer>

:)

for $signed in $vol//tei:signed[@corresp = tei:persName/@corresp]
return
delete node $signed/@corresp

,


(: 5. Delete affiliation from signed, keeping the child nodes

before:

<signed rend="left">
<persName><hi rend="strong">Adam M. Howard, Ph.D.</hi></persName>
<lb/>
<affiliation>
<hi rend="italic">General Editor</hi>
</affiliation>
</signed>

after:

<signed rend="left">
<persName><hi rend="strong">Adam M. Howard, Ph.D.</hi></persName>
<lb/>
<hi rend="italic">General Editor</hi>
</signed>

:)

for $affiliation in $vol//tei:signed/tei:affiliation
return
replace node $affiliation with
if ($debug) then
element
{ QName("http://www.tei-c.org/ns/1.0", "seg") }
{
attribute ana { "rule-5" },
$affiliation/node()
}
else
$affiliation/node()

(: 6. Insert <lb/> and hi rend="italic" for post-persName content

TODO

:)
40 changes: 40 additions & 0 deletions schema/frus-xspec.sch
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<?xml version="1.0" encoding="UTF-8"?>
<schema xmlns="http://purl.oclc.org/dsdl/schematron" queryBinding="xslt3"
xmlns:sqf="http://www.schematron-quickfix.com/validator/process">

<ns prefix="tei" uri="http://www.tei-c.org/ns/1.0"/>

<pattern id="signed-checks">
<title>Signature block checks</title>
<rule context="tei:signed">
<assert test="not(@corresp)" id="signed-corresp">The @corresp attribute is not allowed
on the signed element; it should be moved to the persName inside the signed
element</assert>
<assert test=".//tei:persName[not(ancestor::tei:note)]" id="signed-persname">Signature
blocks must contain a persName</assert>
</rule>
<rule context="tei:signed//tei:persName[not(ancestor::tei:note)]">
<assert test="empty(.) or tei:hi[@rend eq 'strong']" role="warn"
id="persnames-child-hi-rend-strong">People who signed must be wrapped in a
hi/@rend="strong" element</assert>
<let name="immediate-following-sibling-node"
value="following-sibling::node()[not(name() = ('note', 'lb') or normalize-space(.) eq '')][1]"/>
<assert
test="empty($immediate-following-sibling-node) or (matches(normalize-space($immediate-following-sibling-node), '^\p{P}*$') or $immediate-following-sibling-node/self::tei:hi/@rend eq 'italic')"
id="remove-affiliation">Text following a persName element must be wrapped in a
hi/@rend="italic" element, not an affiliation element; be sure any line breaks are
marked with lb elements</assert>
<let name="following-nodes"
value="following-sibling::node()[not(name(.) = ('note', 'persName', 'affiliation'))]"/>
<assert
test="count($following-nodes) le 1 or ($following-nodes/self::tei:hi/@rend = 'italic' and $following-nodes/self::tei:lb)" id="insert-linebreaks"
>Any persName elements followed by italicized text should be separated by lb
elements.</assert>
</rule>
<rule context="tei:signed//tei:persName[not(ancestor::tei:note)]">
<assert test="following-sibling::tei:persName" role="warn" id="multiple-persnames">This may need to be adapted
to a list/item structure</assert>
</rule>
</pattern>

</schema>
31 changes: 31 additions & 0 deletions schema/frus.sch
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,37 @@
character of this <value-of select="./name()"/> element. Fix orientation or delete?</assert>
</rule>
</pattern>

<pattern id="signed-checks">
<title>Signature block checks</title>
<rule context="tei:signed">
<assert test="not(@corresp)" id="signed-corresp">The @corresp attribute is not allowed on the signed
element; it should be moved to the persName inside the signed element</assert>
<assert test=".//tei:persName[not(ancestor::tei:note)]">Signature blocks must contain a
persName</assert>
</rule>
<rule context="tei:signed//tei:persName[not(ancestor::tei:note)]">
<assert test="empty(.) or tei:hi[@rend eq 'strong']" role="warn">People who signed should be wrapped
in a hi/@rend="strong" element</assert>
<let name="immediate-following-sibling-node"
value="following-sibling::node()[not(name() = ('note', 'lb') or normalize-space(.) eq '')][1]"/>
<assert
test="empty($immediate-following-sibling-node) or (matches(normalize-space($immediate-following-sibling-node), '^\p{P}*$') or $immediate-following-sibling-node/self::tei:hi/@rend eq 'italic')"
>Text following a persName element must be wrapped in a hi/@rend="italic" element,
not an affiliation element; be sure any line breaks are marked with lb
elements</assert>
<let name="following-nodes"
value="following-sibling::node()[not(name(.) = ('note', 'persName', 'affiliation'))]"/>
<assert
test="count($following-nodes) le 1 or ($following-nodes/self::tei:hi/@rend = 'italic' and $following-nodes/self::tei:lb)"
>Any persName elements followed by italicized text should be separated by lb
elements.</assert>
</rule>
<rule context="tei:signed//tei:persName[not(ancestor::tei:note)]">
<assert test="following-sibling::tei:persName" role="warn">This may need to be adapted
to a list/item structure</assert>
</rule>
</pattern>

<pattern id="image-url-checks">
<title>Image Checks</title>
Expand Down
Loading