better cleaning

This commit is contained in:
skyanth 2020-02-20 17:16:08 +01:00
parent 3f4a90731c
commit d04838af2c
2 changed files with 46 additions and 24 deletions

View File

@ -2,6 +2,7 @@
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema" exclude-result-prefixes="xs" version="2.0">
<xsl:strip-space elements="*"/>
<xsl:output indent="yes" method="xml"/>
<xsl:template match="@* | node()">
@ -10,13 +11,8 @@
</xsl:copy>
</xsl:template>
<!-- remove all spans; they are not used in pentext -->
<xsl:template match="span">
<xsl:apply-templates/>
</xsl:template>
<!-- remove all divs; they are not used in findings -->
<xsl:template match="div">
<!-- remove all of the following elements; they are not used in pentext -->
<xsl:template match="span | div | font">
<xsl:apply-templates/>
</xsl:template>
@ -53,9 +49,14 @@
<!-- add .. to <img src="/uploads/[long code]/file.png"/> -->
<xsl:template match="img/@src">
<xsl:choose><xsl:when test="starts-with(., '/uploads/')">
<xsl:choose>
<xsl:when test="starts-with(., '/uploads/')">
<xsl:attribute name="src" select="concat('..', .)"/>
</xsl:when><xsl:otherwise><xsl:copy></xsl:copy></xsl:otherwise></xsl:choose>
</xsl:when>
<xsl:otherwise>
<xsl:copy/>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<!-- get rid of superfluous breaks before images or h3 tags -->
@ -63,7 +64,7 @@
<xsl:template
match="br[following-sibling::img] | br[following-sibling::h3] | br[following-sibling::p]"> </xsl:template>
<!-- insert default img width to nudge pentesters :) -->
<!-- insert default img width to familiarize pentesters with the concept of image size :) -->
<xsl:template match="img[not(@height) and not(@width)]">
<xsl:copy>
<xsl:attribute name="width">17</xsl:attribute>
@ -77,8 +78,6 @@
<xsl:apply-templates/>
</xsl:template>
<xsl:template match="p[child::img[not(preceding-sibling::*)][not(following-sibling::*)]]">
<xsl:apply-templates select="@* | node()"/>
</xsl:template>
</xsl:stylesheet>

View File

@ -2,7 +2,8 @@
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema" exclude-result-prefixes="xs" version="2.0">
<xsl:output indent="yes" method="xml"/>
<xsl:strip-space elements="*"/>
<xsl:output indent="yes" method="xml"></xsl:output>
@ -13,8 +14,30 @@
</xsl:template>
<xsl:template match="p[child::img[not(preceding-sibling::*)][not(following-sibling::*)]]">
<!--<xsl:template match="p[child::img[not(preceding-sibling::*)][not(following-sibling::*)]]">
<xsl:copy-of select="img"/>
</xsl:template>-->
<!-- find p with only img children & free the children -->
<xsl:template match="p[not(text()) and * and not(*[not(self::img)])]" priority="10">
<xsl:copy-of select="*"/>
</xsl:template>
<!-- find p with intro text and single or double img and move img out of p -->
<xsl:template match="
p[node()[1][self::text()] and node()[2][self::img] and
(not(node()[3])
or
(node()[3][self::img] and not(node()[4])))
]" priority="5">
<p><xsl:copy-of select="node()[1]"/></p>
<xsl:copy-of select="*"/>
</xsl:template>
<!-- find p where last element is img and move img out of p (picking up some stragglers this way) -->
<xsl:template match="
p[node()[last()][self::img]]" priority="1">
<p><xsl:copy-of select="node()[position() &lt; last()]"/></p>
<xsl:copy-of select="node()[last()]"/>
</xsl:template>
</xsl:stylesheet>