From 83865dddda864e458e40af840b526963e898d016 Mon Sep 17 00:00:00 2001
From: Carlos Polop <carlospolop@gmail.com>
Date: Tue, 13 Jul 2021 14:19:31 +0200
Subject: [PATCH] parser

---
 parser/linpeas-parser.py | 238 ---------------------------------------
 parser/peas-parser.py    | 144 +++++++++++++++++++++++
 2 files changed, 144 insertions(+), 238 deletions(-)
 delete mode 100755 parser/linpeas-parser.py
 create mode 100755 parser/peas-parser.py

diff --git a/parser/linpeas-parser.py b/parser/linpeas-parser.py
deleted file mode 100755
index e294a3d..0000000
--- a/parser/linpeas-parser.py
+++ /dev/null
@@ -1,238 +0,0 @@
-#!/usr/bin/env python3
-
-import sys
-import re
-import json
-import logging
-
-# Pattern to identify main section titles
-MAIN_section_PATTERN = "════════════════════════════════════╣"
-
-# Main sections (specific for linPEAS)
-BASIC_INFORMATION_SLUG = "basic-information"
-SOFTWARE_INFORMATION_SLUG = "software-information"
-SYSTEM_INFORMATION_SLUG = "system-information"
-AVAILABLE_SOFTWARE_SLUG = "available-software"
-NETWORK_INFORMATION_SLUG = "network-information"
-USERS_INFORMATION_SLUG = "users-information"
-INTERESTING_FILES_SLUG = "interesting-files"
-
-try:
-    linpeas_output_path = sys.argv[1]
-except IndexError as err:
-    # You can pipe the output to "jq" if you have setup 
-    print("Error: Needs to pass the .out file\n./linpeas-parser.py <output_file>")
-    sys.exit()
-
-def basic_information_parser(json_output: dict, row: str) -> dict:
-    """Returns a dict
-
-    Parses a row a following the boundaries of basic information
-    """
-    parsed_row = {}
-
-    if ":" in row:
-        parsed_row = {"label": row.replace(":", " ").strip()}
-    elif "[+]" in row:
-        parsed_row = {"label": row.replace("[+]", "").strip()}
-    else: 
-        parsed_row = {"label": row}
-
-    return parsed_row
- 
-def software_information_parser(json_output: dict, row: str) -> dict:
-    """Returns a dict
-
-    Parses a row a following the boundaries of software information
-    """
-    return {"row": row}
-
-def system_information_parser(json_output: dict, row: str) -> dict:
-    """Returns a dict
-
-    Parses a row a following the boundaries of system information
-    """
-    
-    return {"row": row}
-
-def available_software_parser(json_output: dict, row: str) -> dict:
-    """Returns a dict
-
-    Parses a row a following the boundaries of available software
-    """
-    return {"row": row}
-
-def network_information_parser(json_output: dict, row: str) -> dict:
-    """Returns a dict
-
-    Parses a row a following the boundaries of network information
-    """
-    return {"row": row}
-
-def users_information_parser(json_output: dict, row: str) -> dict:
-    """Returns a dict
-
-    Parses a row a following the boundaries of network information
-    """
-    return {"row": row}
-
-def interesting_files_parser(json_output: dict, row: str) -> dict:
-    """Returns a dict
-
-    Parses a row a following the boundaries of network information
-    """
-    return {"row": row}
-
-def get_parser_by_slug(slug: str):
-    """Returns a function
-
-    Returns the right parser based on the slug
-    """
-    return parsers[slug]
-
-parsers = {
-    BASIC_INFORMATION_SLUG: basic_information_parser,
-    SOFTWARE_INFORMATION_SLUG: software_information_parser,
-    SYSTEM_INFORMATION_SLUG: system_information_parser,
-    AVAILABLE_SOFTWARE_SLUG: available_software_parser,
-    NETWORK_INFORMATION_SLUG: network_information_parser,
-    USERS_INFORMATION_SLUG: users_information_parser,
-    INTERESTING_FILES_SLUG: interesting_files_parser,
-}
-
-def read_file(output_path: str) -> [str]:
-    """Returns a list of strings
-
-    Reads file from a specich path and returns it as a list
-    """
-    return [row.strip() for row in open(output_path, 'r').readlines() if row]
-    
-def is_starting_section(
-    row: str,
-    pattern: str = MAIN_section_PATTERN
-) -> bool:
-    """Returns a boolean
-
-    Checks if row matches the pattern and returns True or False
-    """
-    return row.find(pattern) > -1 
-
-def extracts_title_label(row: str) -> str:
-    """Returns a dict
-
-    Extracts a strings whose rows matches the pattern
-    """
-    return re.findall(r"\w+\s\w+", row)
-
-def slugify_text(title: str) -> str:
-    """Returns a dict
-
-    Returns a slugify version of the string. 
-    e.g Basic Information -> basic-information
-    """
-    return title.lower().replace(" ", "-")
-
-def create_new_main_entry(
-    json_output: object, 
-    title: str, 
-    row_number: int
-) -> None:
-    """Returns None
-
-    Adds a new entry based using "title" as key to return a 
-    json output with the initial row number and empty info
-    property where the upcoming information should be added 
-    """
-
-    slug_title = slugify_text(title)
-
-    json_output[slug_title] = {
-        "label": title, 
-        "initial_row_number": row_number, 
-        "items": {}
-    }
-
-def get_range_between(
-    json_output: object,
-    section1: str,
-    section2: str
-) -> list[int, int]:
-    """Returns a list with two integers
-
-    Extracts the range between one main block and the next one. 
-    """
-    row_number_section1 = json_output[section1]["initial_row_number"] + 1
-    row_number_section2 = json_output[section2]["initial_row_number"] - 1 
-    return [row_number_section1, row_number_section2]
-
-def parse_block(
-    json_output: object,
-    rows: list[str],
-    main_entry_key: str,
-    block_range: list[int, int]
-) -> None:
-    """Returns None
-    
-    Modifies the "items" from each main section, adding information
-    from the report
-    """
-    if len(block_range) > 1:
-        initial_row, last_row = block_range
-        row_range = rows[initial_row:last_row]
-    elif len(block_range) == 1:
-        row_range = rows[block_range[0]:]
-
-    slug = slugify_text(main_entry_key)
-    
-    items = []
-
-    for row in row_range:
-        sub_section_parser = get_parser_by_slug(slug)
-        items.append(sub_section_parser(json_output, row))
-        
-    json_output[main_entry_key]["items"] = items
-
-def parse_initial_structure(rows: list[str]) -> object:
-    """Returns an object
-
-    Generates the initial main structure for the json ouput
-    with all the main entries and additional meta properties
-    """
-    json_output = {}
-    row_number = 0
-
-    for row in rows:
-        if is_starting_section(row, MAIN_section_PATTERN): 
-            title = extracts_title_label(row)  
-            if len(title) > 0:
-                clean_title = title[0].replace('32m', '')
-                create_new_main_entry(json_output, clean_title, row_number)
-        
-        row_number += 1
-    
-    return json_output
-
-def main():
-    rows = read_file(linpeas_output_path)
-    json_output = parse_initial_structure(rows)
-    json_output_keys = list(json_output.keys())
-    keys_length = len(json_output_keys)
-
-    for index in range(0, keys_length):
-        next_index = index + 1
-        if next_index < keys_length:
-            current_label = json_output_keys[index]
-            next_label = json_output_keys[index + 1]
-            
-            block_range = get_range_between(json_output, current_label, next_label)
-            parse_block(json_output, rows, current_label, block_range)
-
-        else:
-            last_section_initial_row_number = json_output[next_label]["initial_row_number"] + 1
-            parse_block(json_output, rows, next_label, [last_section_initial_row_number])
-
-    print(json.dumps(json_output))
-
-if __name__ == "__main__":
-    # execute only if run as a script
-    main()
diff --git a/parser/peas-parser.py b/parser/peas-parser.py
new file mode 100755
index 0000000..eed2627
--- /dev/null
+++ b/parser/peas-parser.py
@@ -0,0 +1,144 @@
+#!/usr/bin/env python3
+
+import sys
+import re
+import json
+
+# Pattern to identify main section titles
+TITLE1_PATTERN = r"════════════════════════════════════╣"
+TITLE2_PATTERN = r"╔══════════╣"
+TITLE3_PATTERN = r"══╣"
+INFO_PATTERN = r"╚ "
+TITLE_CHARS = ['═', '╔', '╣', '╚']
+
+# Patterns for colors
+COLORS = {
+    "RED": [r"\x1b\[1;31m"],
+    "GREEN": [r"\x1b\[1;32m"],
+    "YELLOW": [r"\x1b\[1;33m"],
+    "REDYELLOW": [r"\x1b\[1;31;103m"],
+    "BLUE": [r"\x1b\[1;34m"],
+    "LIGHTGREY": [r"\x1b\[1;37m"],
+    "DARKGREY": [r"\x1b\[1;90m"],
+}
+
+
+# Final JSON structure
+FINAL_JSON = {}
+
+#Constructing the structure
+C_SECTION = FINAL_JSON
+C_MAIN_SECTION = FINAL_JSON
+C_2_SECTION = FINAL_JSON
+C_3_SECTION = FINAL_JSON
+
+
+ 
+    
+def is_section(line: str, pattern: str) -> bool:
+    """Returns a boolean
+
+    Checks if line matches the pattern and returns True or False
+    """
+    return line.find(pattern) > -1 
+
+def get_colors(line: str) -> dict:
+    """Given a line return the colored strings"""
+
+    colors = {}
+    for c,regexs in COLORS.items():
+        colors[c] = []
+        for reg in regexs:
+            for re_found in re.findall(reg+".*\x1b", line):
+                colors[c].append(clean_colors(re_found))
+    
+    return colors
+
+def clean_title(line: str) -> str:
+    """Given a title clean it"""
+    for c in TITLE_CHARS:
+        line = line.replace(c,"")
+    
+    line = line.encode("ascii", "ignore").decode() #Remove non ascii chars
+    line = line.strip()
+    return line
+
+def clean_colors(line: str) -> str:
+    """Given a line clean the colors inside of it"""
+
+    for reg in re.findall(r'\x1b[^ ]+\dm', line):
+        line = line.replace(reg,"")
+    
+    line = line.replace('\x1b',"") #Sometimes that byte stays
+    line = line.strip()
+    return line
+
+
+def parse_title(line: str) -> str:
+    """ Given a title, clean it"""
+
+    return clean_colors(clean_title(line))
+
+
+def parse_line(line: str):
+    """Parse the given line adding it to the FINAL_JSON structure"""
+
+    global FINAL_JSON, C_SECTION, C_MAIN_SECTION, C_2_SECTION, C_3_SECTION
+
+    if is_section(line, TITLE1_PATTERN):
+        title = parse_title(line)
+        FINAL_JSON[title] = { "sections": {}, "lines": [], "infos": [] }
+        C_MAIN_SECTION = FINAL_JSON[title]
+        C_SECTION = C_MAIN_SECTION
+    
+    elif is_section(line, TITLE2_PATTERN):
+        title = parse_title(line)
+        FINAL_JSON[C_MAIN_SECTION]["sections"][title] = { "sections": {}, "lines": [], "infos": [] }
+        C_2_SECTION = FINAL_JSON[C_MAIN_SECTION]["sections"][title]
+        C_SECTION = C_2_SECTION
+
+    elif is_section(line, TITLE3_PATTERN):
+        title = parse_title(line)
+        FINAL_JSON[C_MAIN_SECTION]["sections"][C_2_SECTION]["sections"][title] = { "sections": {}, "lines": [], "infos": [] }
+        C_3_SECTION = FINAL_JSON[C_MAIN_SECTION]["sections"][title]
+        C_SECTION = C_3_SECTION
+
+    elif is_section(line, INFO_PATTERN):
+        title = parse_title(line)
+        C_SECTION["infos"].append(title)
+    
+    #If here, then it's text
+    else:
+        #If no main section parsed yet, pass
+        if C_SECTION == {}:
+            return
+
+        C_SECTION["lines"].append({
+            "raw_text": line,
+            "clean_text": clean_colors(line),
+            "colors": get_colors(line)
+        })
+
+
+def main():
+    for line in open(OUTPUT_PATH, 'r').readlines():
+        line = line.strip()
+        if not line:
+            continue
+
+        parse_line(line)
+
+    with open(JSON_PATH, "w") as f:
+        json.dump(FINAL_JSON, f)
+
+
+# Start execution
+if __name__ == "__main__":
+    try:
+        OUTPUT_PATH = sys.argv[1]
+        JSON_PATH = sys.argv[2]
+    except IndexError as err:
+        print("Error: Please pass the peas.out file and the path to save the json\n./peas-parser.py <output_file> <json_file.json>")
+        sys.exit(1)
+    
+    main()