better cleaning

This commit is contained in:
skyanth 2020-02-20 17:16:08 +01:00
parent 3f4a90731c
commit d04838af2c
2 changed files with 46 additions and 24 deletions

View File

@ -2,6 +2,7 @@
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema" exclude-result-prefixes="xs" version="2.0"> xmlns:xs="http://www.w3.org/2001/XMLSchema" exclude-result-prefixes="xs" version="2.0">
<xsl:strip-space elements="*"/>
<xsl:output indent="yes" method="xml"/> <xsl:output indent="yes" method="xml"/>
<xsl:template match="@* | node()"> <xsl:template match="@* | node()">
@ -10,13 +11,8 @@
</xsl:copy> </xsl:copy>
</xsl:template> </xsl:template>
<!-- remove all spans; they are not used in pentext --> <!-- remove all of the following elements; they are not used in pentext -->
<xsl:template match="span"> <xsl:template match="span | div | font">
<xsl:apply-templates/>
</xsl:template>
<!-- remove all divs; they are not used in findings -->
<xsl:template match="div">
<xsl:apply-templates/> <xsl:apply-templates/>
</xsl:template> </xsl:template>
@ -27,19 +23,19 @@
<!-- remove selected attributes from selected elements --> <!-- remove selected attributes from selected elements -->
<xsl:template match="pre/@class | a/@class | tr/@class | img/@alt"/> <xsl:template match="pre/@class | a/@class | tr/@class | img/@alt"/>
<!-- change em to i --> <!-- change em to i -->
<xsl:template match="em"> <xsl:template match="em">
<i> <i>
<xsl:apply-templates/> <xsl:apply-templates/>
</i> </i>
</xsl:template> </xsl:template>
<!-- change strong to b --> <!-- change strong to b -->
<xsl:template match="strong"> <xsl:template match="strong">
<b> <b>
<xsl:apply-templates/> <xsl:apply-templates/>
</b> </b>
</xsl:template> </xsl:template>
<!-- remove h*, make bold paragraph --> <!-- remove h*, make bold paragraph -->
@ -53,9 +49,14 @@
<!-- add .. to <img src="/uploads/[long code]/file.png"/> --> <!-- add .. to <img src="/uploads/[long code]/file.png"/> -->
<xsl:template match="img/@src"> <xsl:template match="img/@src">
<xsl:choose><xsl:when test="starts-with(., '/uploads/')"> <xsl:choose>
<xsl:attribute name="src" select="concat('..', .)"/> <xsl:when test="starts-with(., '/uploads/')">
</xsl:when><xsl:otherwise><xsl:copy></xsl:copy></xsl:otherwise></xsl:choose> <xsl:attribute name="src" select="concat('..', .)"/>
</xsl:when>
<xsl:otherwise>
<xsl:copy/>
</xsl:otherwise>
</xsl:choose>
</xsl:template> </xsl:template>
<!-- get rid of superfluous breaks before images or h3 tags --> <!-- get rid of superfluous breaks before images or h3 tags -->
@ -63,7 +64,7 @@
<xsl:template <xsl:template
match="br[following-sibling::img] | br[following-sibling::h3] | br[following-sibling::p]"> </xsl:template> match="br[following-sibling::img] | br[following-sibling::h3] | br[following-sibling::p]"> </xsl:template>
<!-- insert default img width to nudge pentesters :) --> <!-- insert default img width to familiarize pentesters with the concept of image size :) -->
<xsl:template match="img[not(@height) and not(@width)]"> <xsl:template match="img[not(@height) and not(@width)]">
<xsl:copy> <xsl:copy>
<xsl:attribute name="width">17</xsl:attribute> <xsl:attribute name="width">17</xsl:attribute>
@ -77,8 +78,6 @@
<xsl:apply-templates/> <xsl:apply-templates/>
</xsl:template> </xsl:template>
<xsl:template match="p[child::img[not(preceding-sibling::*)][not(following-sibling::*)]]">
<xsl:apply-templates select="@* | node()"/>
</xsl:template>
</xsl:stylesheet> </xsl:stylesheet>

View File

@ -2,7 +2,8 @@
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema" exclude-result-prefixes="xs" version="2.0"> xmlns:xs="http://www.w3.org/2001/XMLSchema" exclude-result-prefixes="xs" version="2.0">
<xsl:output indent="yes" method="xml"/> <xsl:strip-space elements="*"/>
<xsl:output indent="yes" method="xml"></xsl:output>
@ -13,8 +14,30 @@
</xsl:template> </xsl:template>
<xsl:template match="p[child::img[not(preceding-sibling::*)][not(following-sibling::*)]]"> <!--<xsl:template match="p[child::img[not(preceding-sibling::*)][not(following-sibling::*)]]">
<xsl:copy-of select="img"/> <xsl:copy-of select="img"/>
</xsl:template>-->
<!-- find p with only img children & free the children -->
<xsl:template match="p[not(text()) and * and not(*[not(self::img)])]" priority="10">
<xsl:copy-of select="*"/>
</xsl:template>
<!-- find p with intro text and single or double img and move img out of p -->
<xsl:template match="
p[node()[1][self::text()] and node()[2][self::img] and
(not(node()[3])
or
(node()[3][self::img] and not(node()[4])))
]" priority="5">
<p><xsl:copy-of select="node()[1]"/></p>
<xsl:copy-of select="*"/>
</xsl:template> </xsl:template>
<!-- find p where last element is img and move img out of p (picking up some stragglers this way) -->
<xsl:template match="
p[node()[last()][self::img]]" priority="1">
<p><xsl:copy-of select="node()[position() &lt; last()]"/></p>
<xsl:copy-of select="node()[last()]"/>
</xsl:template>
</xsl:stylesheet> </xsl:stylesheet>