Merge branch 'master' of https://github.com/radicallyopensecurity/pentext

2017-06-29 11:08:54 +02:00 · 2017-06-29 11:08:54 +02:00 · dd97bdeb47
commit dd97bdeb47
parent 319516ecc9 67c3d16efb
1 changed files with 43 additions and 73 deletions
--- a/chatops/python/validate_report.py
+++ b/chatops/python/validate_report.py
@ -32,23 +32,25 @@ import sys
 import textwrap
 import xml.sax

+
 try:
    from lxml import etree as ElementTree
+    from titlecase import titlecase
+    import enchant
 except ImportError as exception:
-    print('[-] This script needs lxml',
+    print('[-] This script needs the lxml, pyenchant and titlecase libary ({0}'.format(exception),
+          file=sys.stderr)
+    print("    Install requirements using pip install -r requirements.txt",
          file=sys.stderr)
-    print("Install lxml with: sudo pip install lxml", file=sys.stderr)
    sys.exit(-1)


 # When set to True, the report will be validated using docbuilder
 DOCBUILDER = False
-VOCABULARY = 'project-vocabulary.pws'
+UPPERCASE = ['TCP', 'UDP', 'XSS']
+VOCABULARY = 'project-vocabulary.txt'
 # Snippets may contain XML fragments without the proper entities
 EXAMPLEDIR = 'examples/'
-NOT_CAPITALIZED = ['a', 'an', 'and', 'as', 'at', 'but', 'by', 'for', 'in',
-                   'jQuery', 'jQuery-UI', 'nor', 'of', 'on', 'or', 'the', 'to',
-                   'up']
 SNIPPETDIR = 'snippets/'
 STATUS = 25 # loglevel for 'generic' status messages
 TEMPLATEDIR = 'templates/'
@ -58,15 +60,6 @@ WARN_LINE = 80  # There should be a separation character after x characters...
 MAX_LINE = 86  # ... and before y


-if DOCBUILDER:
-    import docbuilder_proxy
-    import proxy_vagrant
-try:
-    import aspell
-except ImportError:
-    print('[-] aspell not installed: spelling not available',)
-
-
 class LogFormatter(logging.Formatter):
    """
    Format log messages according to their type.
@ -130,34 +123,14 @@ the Free Software Foundation, either version 3 of the License, or
    return vars(parser.parse_args())


-def initialize_speller():
-    """
-    Initialize and return speller module.
-    """
-    speller = None
-    try:
-        speller = aspell.Speller(('lang', 'en'),
-                                 ('personal-dir', '.'),
-                                 ('personal', VOCABULARY))
-    except aspell.AspellConfigError as exception:  # some versions of aspell use a different path
-        logging.debug('Encountered exception when trying to intialize spelling: %s',
-                      exception)
-        try:
-            speller = aspell.Speller(('lang', 'en'),
-                                     ('personal-path', './' + VOCABULARY))
-        except aspell.AspellSpellerError as exception:
-            logging.error('Could not initialize speller: %s', exception)
-    if speller:
-        [logging.debug('%s %s', i[0], i[2]) for i in speller.ConfigKeys()]
-    return speller
-
-
-def validate_spelling(tree, filename, options, speller):
+def validate_spelling(tree, filename, options):
    """
    Check spelling of text within tags.
    If options['learn'], then unknown words will be added to the dictionary.
    """
    result = True
+    learn = []
+    speller = enchant.DictWithPWL("en_US", VOCABULARY)
    if not speller:
        options['spelling'] = False
        return result
@ -168,17 +141,20 @@ def validate_spelling(tree, filename, options, speller):
               section.tag not in ('a', 'code', 'monospace', 'pre'):
                for word in re.findall('([a-zA-Z]+\'?[a-zA-Z]+)', section.text):
                    if not speller.check(word):
-                        if options['learn']:
-                            speller.addtoPersonal(word)
-                        else:
-                            result = False
-                            logging.warning('Misspelled (unknown) word %s in %s',
-                                            word.encode('utf-8'), filename)
-        if options['learn']:
-            speller.saveAllwords()
-    except aspell.AspellSpellerError as exception:
-        logging.error('Disabled spelling (%s)', exception)
-        options['spelling'] = False
+                        if word.upper() not in (learned.upper() for learned in learn):
+                            learn.append(word)
+                        result = False
+                        logging.warning('Misspelled (unknown) word %s in %s',
+                                        word.encode('utf-8'), filename)
+    except:
+        print('[-] Hmm. spell exception')
+    if options['learn'] and learn:
+        try:
+            with open(VOCABULARY, mode='a+') as open_file:
+                for word in learn:
+                    open_file.write(word + '\n')
+        except IOError:
+            logging.error('Could not write to %s', open_file)
    return result


@ -220,7 +196,6 @@ def validate_files(filenames, options):
    findings = []
    non_findings = []
    scans = []
-    speller = initialize_speller()
    for filename in filenames:
        if (filename.lower().endswith('.xml') or
                filename.lower().endswith('xml"')):
@ -229,7 +204,7 @@ def validate_files(filenames, options):
                   (REPORT in filename and not options['no_report']):
                    masters.append(filename)
                    # try:
-                type_result, xml_type = validate_xml(filename, options, speller)
+                type_result, xml_type = validate_xml(filename, options)
                result = result and type_result
                if 'non-finding' in xml_type:
                    non_findings.append(filename)
@ -255,7 +230,7 @@ def validate_report():
    return proxy_vagrant.execute_command(host, command)


-def validate_xml(filename, options, speller):
+def validate_xml(filename, options):
    """
    Validates XML file by trying to parse it.
    Returns True if the file validated successfully.
@ -270,7 +245,7 @@ def validate_xml(filename, options, speller):
        with open(filename, 'rb') as xml_file:
            xml.sax.parse(xml_file, xml.sax.ContentHandler())
            tree = ElementTree.parse(filename, ElementTree.XMLParser(strip_cdata=False))
-            type_result, xml_type = validate_type(tree, filename, options, speller)
+            type_result, xml_type = validate_type(tree, filename, options)
            result = validate_long_lines(tree, filename, options) and result and type_result
        if options['edit'] and not result:
            open_editor(filename)
@ -295,29 +270,24 @@ def get_all_text(node):
    return text_string.strip()


+def abbreviations(word, **kwargs):
+    """
+    Check whether word needs to be all caps
+    """
+    if word.upper() in (UPPERCASE):
+        return word.upper()
+
+
 def is_capitalized(line):
    """
    Checks whether all words in @line start with a capital.

    Returns True if that's the case.
    """
-    return not line or line.strip() == capitalize(line)
+    return not line or line.strip() == titlecase(line, callback=abbreviations).strip()


-def capitalize(line):
-    """
-    Returns a capitalized version of @line, where the first word and all other
-    words not in NOT_CAPITALIZED are capitalized.
-    """
-    capitalized = ''
-    for word in line.strip().split():
-        if word not in NOT_CAPITALIZED or not len(capitalized):
-            word = word[0].upper() + word[1:]
-        capitalized += word + ' '
-    return capitalized.strip()
-
-
-def validate_type(tree, filename, options, speller):
+def validate_type(tree, filename, options):
    """
    Performs specific checks based on type.
    Currently only finding and non-finding are supported.
@ -329,7 +299,7 @@ def validate_type(tree, filename, options, speller):
    attributes = []
    tags = []
    if options['spelling']:
-        result = validate_spelling(tree, filename, options, speller)
+        result = validate_spelling(tree, filename, options)
    if xml_type == 'pentest_report':
        attributes = ['findingCode']
    if xml_type == 'finding':
@ -362,7 +332,7 @@ def validate_type(tree, filename, options, speller):
                print('[A] Type missing capitalization (expected {0}, read {1})'.
                      format(capitalize(root.attrib[attribute]),
                             root.attrib[attribute]))
-                root.attrib[attribute] = capitalize(root.attrib[attribute])
+                root.attrib[attribute] = titlecase(root.attrib[attribute], callback=abbreviations)
                fix = True
    for tag in tags:
        if root.find(tag) is None:
@ -376,9 +346,9 @@ def validate_type(tree, filename, options, speller):
        if tag == 'title' and (options['capitalization'] and \
                               not is_capitalized(root.find(tag).text)):
            print('[A] Title missing capitalization in {0} (expected {1}, read {2})'.
-                  format(filename, capitalize(root.find(tag).text),
-                         root.find(tag).text))
-            root.find(tag).text = capitalize(root.find(tag).text)
+                  format(filename, titlecase(root.find(tag).text, callback=abbreviations).strip(),
+                         root.find(tag).text.strip()))
+            root.find(tag).text = titlecase(root.find(tag).text, callback=abbreviations)
            fix = True
        all_text = get_all_text(root.find(tag))
        if tag == 'description' and all_text.strip()[-1] != '.':