This commit is contained in:
skyanth 2017-06-29 11:08:54 +02:00
commit dd97bdeb47

View File

@ -32,23 +32,25 @@ import sys
import textwrap
import xml.sax
try:
from lxml import etree as ElementTree
from titlecase import titlecase
import enchant
except ImportError as exception:
print('[-] This script needs lxml',
print('[-] This script needs the lxml, pyenchant and titlecase libary ({0}'.format(exception),
file=sys.stderr)
print(" Install requirements using pip install -r requirements.txt",
file=sys.stderr)
print("Install lxml with: sudo pip install lxml", file=sys.stderr)
sys.exit(-1)
# When set to True, the report will be validated using docbuilder
DOCBUILDER = False
VOCABULARY = 'project-vocabulary.pws'
UPPERCASE = ['TCP', 'UDP', 'XSS']
VOCABULARY = 'project-vocabulary.txt'
# Snippets may contain XML fragments without the proper entities
EXAMPLEDIR = 'examples/'
NOT_CAPITALIZED = ['a', 'an', 'and', 'as', 'at', 'but', 'by', 'for', 'in',
'jQuery', 'jQuery-UI', 'nor', 'of', 'on', 'or', 'the', 'to',
'up']
SNIPPETDIR = 'snippets/'
STATUS = 25 # loglevel for 'generic' status messages
TEMPLATEDIR = 'templates/'
@ -58,15 +60,6 @@ WARN_LINE = 80 # There should be a separation character after x characters...
MAX_LINE = 86 # ... and before y
if DOCBUILDER:
import docbuilder_proxy
import proxy_vagrant
try:
import aspell
except ImportError:
print('[-] aspell not installed: spelling not available',)
class LogFormatter(logging.Formatter):
"""
Format log messages according to their type.
@ -130,34 +123,14 @@ the Free Software Foundation, either version 3 of the License, or
return vars(parser.parse_args())
def initialize_speller():
"""
Initialize and return speller module.
"""
speller = None
try:
speller = aspell.Speller(('lang', 'en'),
('personal-dir', '.'),
('personal', VOCABULARY))
except aspell.AspellConfigError as exception: # some versions of aspell use a different path
logging.debug('Encountered exception when trying to intialize spelling: %s',
exception)
try:
speller = aspell.Speller(('lang', 'en'),
('personal-path', './' + VOCABULARY))
except aspell.AspellSpellerError as exception:
logging.error('Could not initialize speller: %s', exception)
if speller:
[logging.debug('%s %s', i[0], i[2]) for i in speller.ConfigKeys()]
return speller
def validate_spelling(tree, filename, options, speller):
def validate_spelling(tree, filename, options):
"""
Check spelling of text within tags.
If options['learn'], then unknown words will be added to the dictionary.
"""
result = True
learn = []
speller = enchant.DictWithPWL("en_US", VOCABULARY)
if not speller:
options['spelling'] = False
return result
@ -168,17 +141,20 @@ def validate_spelling(tree, filename, options, speller):
section.tag not in ('a', 'code', 'monospace', 'pre'):
for word in re.findall('([a-zA-Z]+\'?[a-zA-Z]+)', section.text):
if not speller.check(word):
if options['learn']:
speller.addtoPersonal(word)
else:
result = False
logging.warning('Misspelled (unknown) word %s in %s',
word.encode('utf-8'), filename)
if options['learn']:
speller.saveAllwords()
except aspell.AspellSpellerError as exception:
logging.error('Disabled spelling (%s)', exception)
options['spelling'] = False
if word.upper() not in (learned.upper() for learned in learn):
learn.append(word)
result = False
logging.warning('Misspelled (unknown) word %s in %s',
word.encode('utf-8'), filename)
except:
print('[-] Hmm. spell exception')
if options['learn'] and learn:
try:
with open(VOCABULARY, mode='a+') as open_file:
for word in learn:
open_file.write(word + '\n')
except IOError:
logging.error('Could not write to %s', open_file)
return result
@ -220,7 +196,6 @@ def validate_files(filenames, options):
findings = []
non_findings = []
scans = []
speller = initialize_speller()
for filename in filenames:
if (filename.lower().endswith('.xml') or
filename.lower().endswith('xml"')):
@ -229,7 +204,7 @@ def validate_files(filenames, options):
(REPORT in filename and not options['no_report']):
masters.append(filename)
# try:
type_result, xml_type = validate_xml(filename, options, speller)
type_result, xml_type = validate_xml(filename, options)
result = result and type_result
if 'non-finding' in xml_type:
non_findings.append(filename)
@ -255,7 +230,7 @@ def validate_report():
return proxy_vagrant.execute_command(host, command)
def validate_xml(filename, options, speller):
def validate_xml(filename, options):
"""
Validates XML file by trying to parse it.
Returns True if the file validated successfully.
@ -270,7 +245,7 @@ def validate_xml(filename, options, speller):
with open(filename, 'rb') as xml_file:
xml.sax.parse(xml_file, xml.sax.ContentHandler())
tree = ElementTree.parse(filename, ElementTree.XMLParser(strip_cdata=False))
type_result, xml_type = validate_type(tree, filename, options, speller)
type_result, xml_type = validate_type(tree, filename, options)
result = validate_long_lines(tree, filename, options) and result and type_result
if options['edit'] and not result:
open_editor(filename)
@ -295,29 +270,24 @@ def get_all_text(node):
return text_string.strip()
def abbreviations(word, **kwargs):
"""
Check whether word needs to be all caps
"""
if word.upper() in (UPPERCASE):
return word.upper()
def is_capitalized(line):
"""
Checks whether all words in @line start with a capital.
Returns True if that's the case.
"""
return not line or line.strip() == capitalize(line)
return not line or line.strip() == titlecase(line, callback=abbreviations).strip()
def capitalize(line):
"""
Returns a capitalized version of @line, where the first word and all other
words not in NOT_CAPITALIZED are capitalized.
"""
capitalized = ''
for word in line.strip().split():
if word not in NOT_CAPITALIZED or not len(capitalized):
word = word[0].upper() + word[1:]
capitalized += word + ' '
return capitalized.strip()
def validate_type(tree, filename, options, speller):
def validate_type(tree, filename, options):
"""
Performs specific checks based on type.
Currently only finding and non-finding are supported.
@ -329,7 +299,7 @@ def validate_type(tree, filename, options, speller):
attributes = []
tags = []
if options['spelling']:
result = validate_spelling(tree, filename, options, speller)
result = validate_spelling(tree, filename, options)
if xml_type == 'pentest_report':
attributes = ['findingCode']
if xml_type == 'finding':
@ -362,7 +332,7 @@ def validate_type(tree, filename, options, speller):
print('[A] Type missing capitalization (expected {0}, read {1})'.
format(capitalize(root.attrib[attribute]),
root.attrib[attribute]))
root.attrib[attribute] = capitalize(root.attrib[attribute])
root.attrib[attribute] = titlecase(root.attrib[attribute], callback=abbreviations)
fix = True
for tag in tags:
if root.find(tag) is None:
@ -376,9 +346,9 @@ def validate_type(tree, filename, options, speller):
if tag == 'title' and (options['capitalization'] and \
not is_capitalized(root.find(tag).text)):
print('[A] Title missing capitalization in {0} (expected {1}, read {2})'.
format(filename, capitalize(root.find(tag).text),
root.find(tag).text))
root.find(tag).text = capitalize(root.find(tag).text)
format(filename, titlecase(root.find(tag).text, callback=abbreviations).strip(),
root.find(tag).text.strip()))
root.find(tag).text = titlecase(root.find(tag).text, callback=abbreviations)
fix = True
all_text = get_all_text(root.find(tag))
if tag == 'description' and all_text.strip()[-1] != '.':