0
votes

I'm still facing XSLT 2.0 key processing problem: What type of key definition should I have to get elements generated only once (and correct apply-template called).

source xml:

<?xml version="1.0" encoding="UTF-8"?>
<ROOT>
  <ELEMENTS>
        <COMMON-DATATYPES>
              <NUMERICAL ID="MyFloat_20_62045">
                    <NAME>MyFloat</NAME>
              </NUMERICAL>
              <RANGEABLE-VALUE ID="PercentType_20_62177">
                    <NAME>PercentType</NAME>
                    <BASE-RANGEABLE-REF TYPE="NUMERICAL">/MyFloat_20_62045</BASE-RANGEABLE-REF>
              </RANGEABLE-VALUE>
        </COMMON-DATATYPES>
        <OBJECT ID="001">
              <NAME>First</NAME>
              <PORTS>
                    <PORT ID="1">
                          <NAME>Input</NAME>
                          <TYPE TREF="NUMERICAL">/MyFloat_20_62045</TYPE>
                    </PORT>
                    <PORT ID="2">
                          <NAME>Output</NAME>
                          <TYPE TREF="NUMERICAL">/MyFloat_20_62045</TYPE>
                    </PORT>
              </PORTS>
        </OBJECT>
        <OBJECT ID="002">
              <NAME>Second</NAME>
              <PORTS>
                    <PORT ID="11">
                          <NAME>Input</NAME>
                          <TYPE TREF="NUMERICAL">/MyFloat_20_62045</TYPE>
                    </PORT>
                    <PORT ID="22">
                          <NAME>Output</NAME>
                          <TYPE TREF="NUMERICAL">
                                <NUMERICAL ID="MySecondFloat_20_62055">
                                      <NAME>MySecondFloat</NAME>
                                </NUMERICAL>
                          </TYPE>
                    </PORT>
                    <PORT ID="33">
                          <NAME>Output</NAME>
                          <TYPE TREF="RANGEABLE-VALUE">/PercentType_20_62177</TYPE>
                    </PORT>
                    <PORT ID="44">
                          <NAME>Output</NAME>
                          <TYPE TREF="RANGEABLE-VALUE">
                                <RANGEABLE-VALUE ID="MyFirstOwnRangeableValue_20_62065">
                                      <NAME>MyFirstOwnRangeableValue</NAME>
                                </RANGEABLE-VALUE>
                          </TYPE>
                    </PORT>
                    <PORT ID="55">
                          <NAME>Output</NAME>
                          <TYPE TREF="RANGEABLE-VALUE">/PercentType_20_62177</TYPE>
                    </PORT>
              </PORTS>
        </OBJECT>
  </ELEMENTS>
</ROOT>

My current XSLT script:

<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xs="http://www.w3.org/2001/XMLSchema" exclude-result-prefixes="xs" version="2.0">
  <xsl:key name="all-datatypes" match="NUMERICAL/@ID | RANGEABLE-VALUE/@ID | PORT/TYPE" use="tokenize(., '/')[last()]"/>
  <xsl:key name="ref-numericals" match="NUMERICAL" use="@ID"/>
  <xsl:key name="ref-rangeablevalues" match="RANGEABLE-VALUE" use="@ID"/>

  <xsl:output method="xml" omit-xml-declaration="yes" indent="yes"/>

  <xsl:template match="ROOT">
        <file>
              <xsl:apply-templates select="//OBJECT"/>
        </file>
  </xsl:template>

  <xsl:template match="NUMERICAL" mode="copy" name="copy-numerical">
        <numerical id="{@ID}">
              <name>
                    <xsl:value-of select="NAME"/>
              </name>
        </numerical>
  </xsl:template>

  <!-- Numerical full -->
  <xsl:template match="NUMERICAL[@ID is key('all-datatypes', @ID)[1]]">
        <xsl:call-template name="copy-numerical"/>
  </xsl:template>

  <!-- Numerical reference -->
  <xsl:template match="NUMERICAL">
        <numericial>
              <xsl:attribute name="href">#<xsl:value-of select="@ID"/></xsl:attribute>
        </numericial>
  </xsl:template>

  <!-- Rangeable full -->
  <xsl:template mode="copy" match="RANGEABLE-VALUE">
        <rangeable-value id="{@ID}">
              <name>
              <xsl:value-of select="NAME"/>
              </name>
              <type-ref>
                    <xsl:attribute name="href">#<xsl:value-of select="tokenize(BASE-RANGEABLE-REF, '/')[last()]"/></xsl:attribute>
              </type-ref>
        </rangeable-value>
  </xsl:template>
<!-- Rangeable reference section still missing -->


  <xsl:template match="OBJECT">
        <object id="{ID}">
              <xsl:attribute name="id">
                    <xsl:value-of select="@ID"/>
              </xsl:attribute>
              <ports>
                    <xsl:for-each select="PORTS/PORT">
                          <port>
                                <xsl:attribute name="id" select="@ID"/>
                                <name>
                                      <xsl:value-of select="NAME"/>
                                </name>
                                <type>
                                      <xsl:apply-templates mode="copy" select="key('ref-numericals', tokenize(TYPE, '/')[last()])"/>
                                      <xsl:apply-templates mode="copy" select="key('ref-rangeablevalues', tokenize(TYPE, '/')[last()])"/>
                                </type>
                          </port>
                    </xsl:for-each>
              </ports>
        </object>
  </xsl:template>
</xsl:stylesheet>

Expected output:

<file>
  <object id="001">
        <ports>
              <port id="1">
                    <name>Input</name>
                    <type>
                          <numerical id="MyFloat_20_62045">
                                <name>MyFloat</name>
                          </numerical>
                    </type>
              </port>
              <port id="2">
                    <name>Output</name>
                    <numerical href="#MyFloat_20_62045"/>
              </port>
        </ports>
  </object>
  <object id="002">
        <ports>
              <port id="11">
                    <name>Input</name>
                    <numerical href="#MyFloat_20_62045"/>
              </port>
              <port id="22">
                    <name>Output</name>
                    <type>
                          <numerical id="MySecondFloat_20_62055">
                                <name>MySecondFloat</name>
                          </numerical>
                    </type>
              </port>
              <port id="33">
                    <name>Output</name>
                    <type>
                          <rangeable-value id="PercentType_20_62177">
                                <name>PercentType</name>
                                <type-ref href="#MyFloat_20_62045"/>
                          </rangeable-value>
                    </type>
              </port>
              <port id="44">
                    <name>Output</name>
                    <type>
                          <rangeable-value id="MyFirstOwnRangeableValue_20_62065">
                                <name>MyFirstOwnRangeableValue</name>
                                <type-ref href="#MyFloat_20_62045"/>
                          </rangeable-value>
                    </type>
              </port>
              <port id="55">
                    <name>Output</name>
                    <rangeable-value href="#PercentType_20_62177"/>
              </port>
        </ports>
  </object>
</file>

So the transformation principle should be: "when the element is handled 1st time, it's fully generated, later if the same element occurs there should be only the href generated for that type." Element details may exist in COMMON-DATATYPE or in the PORT/TYPE sections.

Now my XSLT script produces full information everytime for each PORT.

And would it be possible to handle the "apply-templates command" just once in the OBJECT to cover all different elements (even if the referenced type changes (NUMERICAL, RANGEABLE-VALUE, STRING, BOOLEAN ..))?

Edit:

If I do compare one chosen NUMERICAL with: (generate-id() = generate-id(key('all-datatypes', @ID)[1]))

Left hand side: NUMERICAL[generate-id()] returns different value than the right hand of the comparison: generate-id(key('all-datatypes', @ID)[1]). Beginning of the returned string value is same on both sides (?), but on the right hand side of the comparison there is longer string with other characters included as well. Have I formed the key in all-datatypes mistankenly with:

<xsl:key name="all-datatypes" match="NUMERICAL/@ID | RANGEABLE-VALUE/@ID | PORT/TYPE" use="tokenize(., '/')[last()]"/>

Or could the tokenize command (needed for the PORT/TYPE cases) cause these differences?

1

1 Answers

0
votes

Since what you ultimately want is driven by the structure of the output tree rather than the input, it may be less efficient but I suspect it will be much clearer if you do the transformation in two phases - first build the complete output tree without any href shorthands, and then process that tree again to do the de-duplication.

<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xs="http://www.w3.org/2001/XMLSchema" exclude-result-prefixes="xs" version="2.0">

  <xsl:output method="xml" omit-xml-declaration="yes" indent="yes" />
  <xsl:strip-space elements="*" />

  <xsl:template match="/">
    <xsl:variable name="phase1">
      <file>
        <xsl:apply-templates select="//OBJECT" />
      </file>
    </xsl:variable>

    <!-- debugging
    <xsl:result-document href="phase1-result.xml">
      <xsl:sequence select="$phase1" />
    </xsl:result-document> -->

    <xsl:apply-templates select="$phase1" mode="dedupe" />
  </xsl:template>

  <!-- first phase templates - build the output but with all types fully
       expanded out e.g. 

     <port id="33">
        <name>Output</name>
        <type>
           <rangeable-value id="PercentType_20_62177">
              <name>PercentType</name>
              <type>
                 <numerical id="MyFloat_20_62045">
                    <name>MyFloat</name>
                 </numerical>
              </type>
           </rangeable-value>
        </type>
     </port>
  -->

  <!-- name keys after the TYPE/TREF values so we can select the right key
       without using conditionals -->
  <xsl:key name="NUMERICAL" match="NUMERICAL" use="@ID"/>
  <xsl:key name="RANGEABLE-VALUE" match="RANGEABLE-VALUE" use="@ID"/>

  <xsl:template match="OBJECT">
    <object id="{@ID}">
      <ports>
        <xsl:apply-templates select="PORTS/PORT" />
      </ports>
    </object>
  </xsl:template>

  <xsl:template match="PORT">
    <port id="{@ID}">
      <name>
        <xsl:value-of select="NAME"/>
      </name>
      <xsl:apply-templates select="TYPE" />
    </port>
  </xsl:template>

  <xsl:template match="TYPE | BASE-RANGEABLE-REF">
    <type>
      <!-- This element contains either one child element (type def directly
           nested) or a TYPE (B-R-R) or TREF (TYPE) attribute and a text node
           with cross-reference to the type. -->
      <xsl:apply-templates select="* | key((@TYPE, @TREF)[1], tokenize(text(), '/')[last()])" />
    </type>
  </xsl:template>

  <xsl:template match="NUMERICAL">
    <numerical id="{@ID}">
      <name>
        <xsl:value-of select="NAME"/>
      </name>
    </numerical>
  </xsl:template>

  <xsl:template match="RANGEABLE-VALUE">
    <rangeable-value id="{@ID}">
      <name>
        <xsl:value-of select="NAME"/>
      </name>
      <xsl:apply-templates select="BASE-RANGEABLE-REF" />
    </rangeable-value>
  </xsl:template>

  <!-- phase 2 - deduplicate, replacing second-and-subsequent fully expanded
       types with an href to the first one -->

  <xsl:key name="dedup-key" match="numerical | rangeable-value" use="@id" />

  <xsl:template mode="dedupe" match="@*|node()">
    <xsl:copy>
      <xsl:apply-templates select="@*|node()" mode="dedupe" />
    </xsl:copy>
  </xsl:template>

  <xsl:template mode="dedupe"
                match="type[*[not(. is key('dedup-key', @id)[1])]]">
    <!-- logic to handle the different formats in port (<numeric href="#..."/>)
         and in rangeable-value (<type-ref href="#..."/>) -->
    <xsl:element name="{if (parent::rangeable-value) then 'type-ref'
                        else node-name(*[1])}">
      <xsl:attribute name="href" select="concat('#', */@id)" />
    </xsl:element>
  </xsl:template>

</xsl:stylesheet>

This turned out a little more fiddly than I expected, mainly due to the somewhat inconsistent format you've specified for cross-references (under port you use <numerical href="#..." /> or <rangeable-value href="#..." /> whereas under rangeable-value you use <type-ref>) but I still think it makes the intention clearer than the original approach.