0
votes

I have the following files to work with:

<root>
 <html>
  <table class=" table search-results-property-table">  
   <prefterm>Abies</prefterm>
    <tr>
     <td>
      <span class="versal property-click" title="Broader concept">BROADER CONCEPT</span>
     </td>
     <td>
      <ul>
       <li>
        <a class="versal" href="../../../agrovoc/en/page/c_5886">Pinaceae</a>
       </li>
      </ul>
     </td>
    </tr>
    <tr>
     <td>
      <span class="versal property-click" title="Narrower concepts.">NARROWER CONCEPTS</span>
     </td>
     <td>
      <ul>
       <li>
        <a class="versal" href="../../../agrovoc/en/page/c_11">Abies alba</a>
       </li>
       <li>
        <a class="versal" href="../../../agrovoc/en/page/c_26316">Abies amabilis</a>
                ....
       <li>
        <a class="versal" href="../../../agrovoc/en/page/c_26323">Abies veitchii</a>
       </li>
              ....
  </table>
 </html>
 <html>
         .... (another set to crosswalk)
 </html>
</root>

With the following xslt below I can get the value of broader concept which has only one value:

<?xml version="1.0" encoding="UTF-8"?>
 <xsl:stylesheet version="1.0"
 xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 <xsl:output method="text" omit-xml-declaration="yes" indent="no"/>

 <xsl:template match="root">
  <xsl:for-each select="html">
   <xsl:text>START HERE</xsl:text>
   <xsl:text>&#13;&#10;</xsl:text>
   <xsl:text>=LDR  00000nam  2200000Ia 4500</xsl:text>
   <xsl:text>&#13;&#10;</xsl:text>
  </xsl:for-each>
 </xsl:template>

 <xsl:template match="table/tr/td/span">
  <xsl:choose>
   <xsl:when test="@title='Broader concept'">
    <xsl:text>=301  \\$a</xsl:text><xsl:value-of select="." />
    <xsl:text>$b</xsl:text>
    <xsl:value-of select="../../td/ul/li/a" />
    <xsl:text>$c</xsl:text>
    <xsl:value-of select="../../td/ul/li/a/@href" />
    <xsl:text>&#13;&#10;</xsl:text>
   </xsl:when>
  </xsl:choose>
 </xsl:template>

</xsl:stylesheet>

The code seems to work with those that only have one value, I tried adding xsl:for-each but I don't know where to put because I'm tranversing till "a". I tried adding xsl:for-each select="//a" before the first "xsl:choose" and also adding it after xsl:when test="@title='Narrower concepts.'" but to no avail. I wanted the output to be like below:

START HERE
=LDR  00000nam  2200000Ia 4500
=301  \\$abroaderterm$bPinaceae
=302  \\$anarrowerterm$bAbies alba$cc_11
=302  \\$anarrowerterm$bAbies amabilis$cc_26316
   ....
=302  \\$anarrowerterm$bAbies veitchii$cc_26323

START HERE
=LDR  00000nam  2200000Ia 4500
 (data set 2)
  ....

So can somebody lead me to what I should do or what have I missed out or is there another way to resolve my use case? Thanks and cheers! Update: Please take note that I have more than one to crosswalk, something like:

<root>
 <html>
     .... dataset 1
 </html>
 <html>
     .... dataset 2
 </html>
     .... more than 2 datasets
</root>

For reference, the file can be found here: http://128.199.159.143/mergedhtmltest_forprocess_span.xml

1

1 Answers

0
votes

Given a well-formed XML:

<root>
    <html>
        <table class=" table search-results-property-table">  
            <prefterm>Abies</prefterm>
            <tr>
                <td>
                    <span class="versal property-click" title="Broader concept">BROADER CONCEPT</span>
                </td>
                <td>
                    <ul>
                        <li>
                            <a class="versal" href="../../../agrovoc/en/page/c_5886">Pinaceae</a>
                        </li>
                    </ul>
                </td>
            </tr>
            <tr>
                <td>
                    <span class="versal property-click" title="Narrower concepts.">NARROWER CONCEPTS</span>
                </td>
                <td>
                    <ul>
                        <li>
                            <a class="versal" href="../../../agrovoc/en/page/c_11">Abies alba</a>
                        </li>
                        <li>
                            <a class="versal" href="../../../agrovoc/en/page/c_26316">Abies amabilis</a>
                            </li>
                            <li>
                                <a class="versal" href="../../../agrovoc/en/page/c_26323">Abies veitchii</a>
                            </li>
                    </ul>
                </td>
            </tr>
        </table>
    </html>
</root>

and the following stylesheet:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0"
    xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
    <xsl:strip-space elements="*"/>
    <xsl:output method="text" omit-xml-declaration="yes" indent="no"/>

    <xsl:template match="root">
        <xsl:for-each select="html">
            <xsl:if test="position() &gt; 1">
                <xsl:text>&#13;&#10;</xsl:text>
            </xsl:if>
            <xsl:text>START HERE</xsl:text>
            <xsl:text>&#13;&#10;</xsl:text>
            <xsl:text>=LDR  00000nam  2200000Ia 4500</xsl:text>
            <xsl:apply-templates select="table/tr"/>
        </xsl:for-each>
    </xsl:template>


    <xsl:template match="table/tr">
        <xsl:variable name="Concepts">
            <xsl:choose>
                <xsl:when test="td[1]/*[1][local-name()='span']">
                    <xsl:value-of select="td[1]/span/@title"/>
                </xsl:when>
                <xsl:when test="td[1]/*[1][local-name()='narrow']">
                    <xsl:value-of select="td[1]/narrow/@title"/>
                </xsl:when>
            </xsl:choose>
        </xsl:variable>
        <xsl:for-each select="td[2]//a">
            <xsl:choose>
                <xsl:when test="$Concepts='Broader concept'">
                    <xsl:text>&#13;&#10;</xsl:text>
                    <xsl:text>=301  \\$a</xsl:text><xsl:value-of select="'broaderterm'" />
                    <xsl:text>$b</xsl:text>
                    <xsl:value-of select="." />
                    <xsl:text>$c</xsl:text>
                    <xsl:call-template name="tokenizeString">
                        <xsl:with-param name="list" select="@href"/>
                        <xsl:with-param name="delimiter" select="'/'"/>
                    </xsl:call-template>
                </xsl:when>
                <xsl:when test="$Concepts='Narrower concepts.'">
                    <xsl:text>&#13;&#10;</xsl:text>
                    <xsl:text>=302  \\$a</xsl:text><xsl:value-of select="'narrowerterm'" />
                    <xsl:text>$b</xsl:text>
                    <xsl:value-of select="." />
                    <xsl:text>$c</xsl:text>
                    <xsl:call-template name="tokenizeString">
                        <xsl:with-param name="list" select="@href"/>
                        <xsl:with-param name="delimiter" select="'/'"/>
                    </xsl:call-template>
                </xsl:when>
            </xsl:choose>
        </xsl:for-each>
    </xsl:template>

    <xsl:template name="tokenizeString">
        <xsl:param name="list"/>
        <xsl:param name="delimiter"/>
        <xsl:choose>
            <xsl:when test="contains($list, $delimiter)">
                <!-- do nothing, in essence deleting the values -->
                <xsl:call-template name="tokenizeString">
                    <xsl:with-param name="list" select="substring-after($list,$delimiter)"/>
                    <xsl:with-param name="delimiter" select="$delimiter"/>
                </xsl:call-template>
            </xsl:when>
            <xsl:otherwise>
                <xsl:value-of select="$list"/>
            </xsl:otherwise>
        </xsl:choose>
    </xsl:template>

</xsl:stylesheet>

It outputs:

START HERE
=LDR  00000nam  2200000Ia 4500
=301  \\$abroaderterm$bPinaceae$cc_5886
=302  \\$anarrowerterm$bAbies alba$cc_11
=302  \\$anarrowerterm$bAbies amabilis$cc_26316
=302  \\$anarrowerterm$bAbies veitchii$cc_26323