Skip to content

Commit

Permalink
Add the initial DFIQ Python code to read DFIQ YAML files and create M…
Browse files Browse the repository at this point in the history
…arkdown pages from them.
  • Loading branch information
obsidianforensics committed Jan 23, 2024
1 parent b2145d3 commit 3123bb6
Show file tree
Hide file tree
Showing 8 changed files with 240 additions and 73 deletions.
237 changes: 210 additions & 27 deletions dfiq.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import networkx as nx
import os
import yamale
import yaml

logging.basicConfig(
Expand All @@ -10,6 +11,26 @@


class Component(object):
"""Base class for DFIQ components.
Components are the building blocks of DFIQ. They represent different logical
entities: Scenarios, Facets, Questions, and Approaches. This base class
defines the attributes that all components share.
Attributes:
id (str): The unique ID of the component, as described at dfiq.org/spec.
name (str): The name of the component, often in the form of a question.
description (str, optional): A few sentence description of the component.
type (str, optional): The type of component.
tags (set[str], optional): A set of tags associated with the component.
parent_ids (tuple[str], optional): A tuple of IDs of the component's parents.
child_ids (tuple[str], optional): A tuple of IDs of the component's children.
is_internal (bool): Whether the component is private and for internal use only.
Methods:
set_children(child_ids: tuple[str]): Sets the component's children.
"""

def __init__(self, dfiq_id, name, description=None, tags=None, parent_ids=None):
self.id = dfiq_id
self.name = name
Expand Down Expand Up @@ -43,6 +64,18 @@ def set_children(self, child_ids):


class Approach(Component):
"""An Approach in DFIQ.
Approaches are detailed explanations of how to answer a Question using a specific
method, including the required data, processing, and analysis steps. As there
is often more than one way to answer a question, there can be multiple Approaches
that answer a given Question using different techniques.
Attributes:
view (dict): A concise representation of how to perform this investigative approach.
type (str): The type of component, which is always "approach".
"""

def __init__(self, dfiq_id, name, description, tags, view):
super().__init__(dfiq_id, name, description, tags, [dfiq_id.split(".")[0]])
self.view = view
Expand All @@ -53,6 +86,16 @@ def __init__(self, dfiq_id, name, description, tags, view):


class Question(Component):
"""A Question in DFIQ.
Questions are the fundamental "building blocks" of DFIQ. All other DFIQ
components are relative to Questions: Approaches describe how to answer the
Questions, and Scenarios and Facets organize the Questions logically.
Attributes:
type (str): The type of component, which is always "question".
"""

def __init__(self, dfiq_id, name, description, tags, parent_ids):
super().__init__(dfiq_id, name, description, tags, parent_ids)
self.type = "question"
Expand All @@ -63,6 +106,17 @@ def approaches(self):


class Facet(Component):
"""A Facet in DFIQ.
Facets are used for intermediate-level grouping in DFIQ. A particular Facet can
be part of multiple different Scenarios and will contain multiple Questions. A
Facet breaks the larger Scenario into smaller logical pieces, but a Facet is
still too broad to answer directly; it must also be broken down (into Questions).
Attributes:
type (str): The type of component, which is always "facet".
"""

def __init__(self, dfiq_id, name, description, tags, parent_ids):
super().__init__(dfiq_id, name, description, tags, parent_ids)
self.type = "facet"
Expand All @@ -73,6 +127,16 @@ def questions(self):


class Scenario(Component):
"""A Scenario in DFIQ.
A Scenario is the highest-level grouping in DFIQ. A Scenario is made of one or
more Facets (different "sides" of an investigation), which in turn are made up of
investigative Questions.
Attributes:
type (str): The type of component, which is always "scenario".
"""

def __init__(self, dfiq_id, name, description, tags):
super().__init__(dfiq_id, name, description, tags)
self.type = "scenario"
Expand All @@ -83,7 +147,57 @@ def facets(self):


class DFIQ:
def __init__(self, yaml_data_path=None, markdown_output_path=None):
"""A DFIQ knowledge base.
An instance of a DFIQ knowledge base. Upon initialization, it reads DFIQ YAML files from
yaml_data_path, converts them to Python objects, and builds a graph of the components'
relationships.
Attributes:
yaml_data_path (str): The path to the directory containing the DFIQ YAML files.
markdown_output_path (str, optional): The path to the directory where the
generated Markdown files should be saved.
plural_map (dict): A dictionary mapping from DFIQ component types to their
plural forms.
components (dict): A dictionary mapping from DFIQ component IDs to their
corresponding components.
graph (nx.DiGraph, optional): A directed graph representing the relationships
between DFIQ components.
jinja_env (jinja2.Environment): A Jinja2 environment used to generate Markdown
files.
Methods:
scenarios(): Returns a list of all Scenarios in the DFIQ knowledge base.
facets(): Returns a list of all Facets in the DFIQ knowledge base.
questions(): Returns a list of all Questions in the DFIQ knowledge base.
approaches(): Returns a list of all Approaches in the DFIQ knowledge base.
add_child_ids(): Sets the `child_ids` attribute of each DFIQ component to a
list of the IDs of its child components.
add_child_tags(): Adds the tags of all child components to the `all_tags`
attribute of the parent DFIQ component.
convert_yaml_object_to_dfiq_component(yaml_object): Converts a YAML DFIQ object
to a Python DFIQ component.
load_yaml_files_by_type(dfiq_type, yaml_data_path=None): Loads all DFIQ
components of the given type from the given YAML data path.
load_dfiq_items_from_yaml(yaml_data_path=None): Loads all DFIQ components from
the given YAML data path.
build_graph(): Builds a directed graph representing the relationships between
DFIQ components.
display_graph(): Displays the DFIQ graph.
generate_scenario_md(scenario_id, allow_internal=False): Generates a Markdown file for the
given Scenario (by default, only for external Scenarios).
generate_question_md(question_id, skip_if_no_approaches=True, allow_internal=False):
Generates a Markdown file for the given Question (by default, only for external Questions).
generate_question_index_md(allow_internal=False):
Generates a Markdown file for the index page listing all Questions (by default, only for external Questions).
"""

def __init__(
self,
yaml_data_path="data",
markdown_output_path=None,
templates_path="templates",
):
self.yaml_data_path = yaml_data_path
self.markdown_output_path = markdown_output_path
self.plural_map = {
Expand All @@ -95,15 +209,18 @@ def __init__(self, yaml_data_path=None, markdown_output_path=None):
self.components = {}
self.graph = None
self.jinja_env = jinja2.Environment(
loader=jinja2.FileSystemLoader("templates/"), trim_blocks=True
loader=jinja2.FileSystemLoader(templates_path), trim_blocks=True
)
self.schemas = {
"Scenario": None,
"Facet": None,
"Question": None,
"Approach": None,
}

if not yaml_data_path:
self.yaml_data_path = "data"
logging.info(
f'"yaml_data_path" not specified; set to "{self.yaml_data_path}"'
)

logging.info(f'"yaml_data_path" set to "{self.yaml_data_path}"')

self.load_dfiq_schema()
self.load_dfiq_items_from_yaml()
self.build_graph()
self.add_child_ids()
Expand Down Expand Up @@ -200,17 +317,49 @@ def load_yaml_files_by_type(self, dfiq_type, yaml_data_path=None):
for dfiq_file in dfiq_files:
if dfiq_file.endswith(("-template.yaml", "-blank.yaml")):
continue
with open(
os.path.join(yaml_data_path, self.plural_map.get(dfiq_type), dfiq_file),
mode="r",
) as file:
file_to_open = os.path.join(
yaml_data_path, self.plural_map.get(dfiq_type), dfiq_file
)

if not self.validate_yaml_file(file_to_open):
continue

if not self.validate_dfiq_schema(file_to_open, dfiq_type):
continue

with open(file_to_open, mode="r") as file:
component_from_yaml = yaml.safe_load(file)
component_dict[
component_from_yaml["id"]
] = self.convert_yaml_object_to_dfiq_component(component_from_yaml)

return component_dict

@staticmethod
def validate_yaml_file(yaml_file_path):
with open(yaml_file_path, mode="r") as file:
try:
_ = yaml.safe_load(file)
except (yaml.parser.ParserError, yaml.scanner.ScannerError) as e:
logging.warning(f"error parsing {yaml_file_path}:\n{e}")
return False
return True

def load_dfiq_schema(self):
self.schemas["Scenario"] = yamale.make_schema("utils/scenario_spec.yaml")
self.schemas["Facet"] = yamale.make_schema("utils/facet_spec.yaml")
self.schemas["Question"] = yamale.make_schema("utils/question_spec.yaml")
self.schemas["Approach"] = yamale.make_schema("utils/approach_spec.yaml")

def validate_dfiq_schema(self, yaml_file_path, component_type):
try:
yaml_to_validate = yamale.make_data(yaml_file_path)
yamale.validate(self.schemas[component_type], yaml_to_validate)
except yamale.YamaleError as e:
logging.warning(e)
return False
return True

def load_dfiq_items_from_yaml(self, yaml_data_path=None):
if not yaml_data_path:
yaml_data_path = self.yaml_data_path
Expand All @@ -236,58 +385,92 @@ def build_graph(self):
def display_graph(self):
nx.draw(self.graph, with_labels=True, font_weight="bold")

def generate_external_scenario_md(self, scenario_id):
def generate_scenario_md(self, scenario_id, allow_internal=False):
"""Generates Markdown for a Scenario page.
Args:
scenario_id (str): The ID of the scenario to generate the page for.
allow_internal (bool): Check if generating internal items is allowed.
"""
if not self.markdown_output_path:
raise ValueError("Markdown output path not specified")

s = self.components.get(scenario_id)

if s.is_internal:
if s.is_internal and not allow_internal:
logging.warning(
f"Will not generate external Scenario page for internal Scenario {scenario_id}"
f"Will not generate Scenario page for internal Scenario {scenario_id}"
)
return False

template = self.jinja_env.get_template("external_scenario_md.jinja2")
context = {"scenario": s, "components": self.components}
template = self.jinja_env.get_template("scenario.jinja2")
context = {
"scenario": s,
"components": self.components,
"allow_internal": allow_internal,
}
content = template.render(context)
with open(
os.path.join(self.markdown_output_path, "scenarios", f"{scenario_id}.md"),
mode="w",
) as file:
file.write(content)

def generate_external_question_md(self, question_id, skip_if_no_approaches=True):
def generate_question_md(
self, question_id, skip_if_no_approaches=True, allow_internal=False
):
"""Generates Markdown for a Question page.
Args:
question_id (str): The ID of the Question to generate the page for.
skip_if_no_approaches (bool, optional): Whether to skip generating the page
if the Question has no associated Approaches. Defaults to True.
allow_internal (bool): Check if generating internal items is allowed.
"""
if not self.markdown_output_path:
raise ValueError("Markdown output path not specified")

q = self.components.get(question_id)

if q.is_internal and not allow_internal:
logging.warning(
f"Will not generate Question page for internal Question {question_id}"
)
return False

if skip_if_no_approaches and not q.approaches:
logging.debug(
f"Skipped writing markdown for {question_id}; it had no Approaches"
f"Skipped writing Markdown for {question_id}; it had no Approaches"
)
return

template = self.jinja_env.get_template(
"external_question_with_approaches_md.jinja2"
)
context = {"question": q, "components": self.components}
template = self.jinja_env.get_template("question_with_approaches.jinja2")
context = {
"question": q,
"components": self.components,
"allow_internal": allow_internal,
}
content = template.render(context)
output_path = os.path.join(
self.markdown_output_path, "questions", f"{question_id}.md"
)
with open(output_path, mode="w") as file:
file.write(content)

logging.info(f"Wrote markdown for Question {question_id} to {output_path}")
logging.info(f"Wrote Markdown for Question {question_id} to {output_path}")

def generate_question_index_md(self, allow_internal=False):
"""Generates Markdown for the index page listing all Questions.
Args:
allow_internal (bool): Check if generating internal items is allowed.
"""

def generate_external_question_index_md(self):
if not self.markdown_output_path:
raise ValueError("Markdown output path not specified")

template = self.jinja_env.get_template("external_questions_index_md.jinja2")
context = {"components": self.components}
template = self.jinja_env.get_template("questions_index.jinja2")
context = {"components": self.components, "allow_internal": allow_internal}
content = template.render(context)
with open(
os.path.join(self.markdown_output_path, "questions", "index.md"), mode="w"
Expand Down
11 changes: 0 additions & 11 deletions generate_external_site_markdown.py

This file was deleted.

3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
networkx
pyyaml
jinja2
jinja2
yamale
11 changes: 11 additions & 0 deletions scripts/generate_site_markdown.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from dfiq import DFIQ

dfiq_instance = DFIQ(markdown_output_path=f"site/docs")

for scenario in dfiq_instance.scenarios():
dfiq_instance.generate_scenario_md(scenario.id)

for question in dfiq_instance.questions():
dfiq_instance.generate_question_md(question.id)

dfiq_instance.generate_question_index_md()
Loading

0 comments on commit 3123bb6

Please sign in to comment.