diff --git a/playbooks/files/rax-maas/plugins/managed_k8s_services_local_check.py b/playbooks/files/rax-maas/plugins/managed_k8s_services_local_check.py new file mode 100644 index 000000000..9ea273111 --- /dev/null +++ b/playbooks/files/rax-maas/plugins/managed_k8s_services_local_check.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python + +# Copyright 2017, Rackspace US, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import datetime + +import ipaddr +from maas_common import get_auth_ref +from maas_common import metric +from maas_common import metric_bool +from maas_common import print_output +from maas_common import status_err +from maas_common import status_ok +import requests + + +def check(auth_ref, args): + if args.path: + path = '/{path}'.format(path=args.path) + else: + path = '' + endpoint = '{protocol}://{ip}:{port}/{path}'.format( + ip=args.ip, + protocol=args.protocol, + port=args.port, + path=path + ) + + service_name = 'maas_mk8s_{service}'.format(service=args.service) + + try: + if args.protocol.upper() == 'HTTPS': + if args.certificate: + verify = args.certificate + else: + verify = False + else: + verify = None + + # time something arbitrary + start = datetime.datetime.now() + r = requests.head(endpoint, verify=verify) + end = datetime.datetime.now() + api_is_up = (r.status_code == 200) + except (requests.HTTPError, requests.Timeout, requests.ConnectionError): + api_is_up = False + metric_bool('client_success', False, m_name=service_name) + # Any other exception presumably isn't an API error + except Exception as e: + metric_bool('client_success', False, m_name=service_name) + status_err(str(e), m_name=service_name) + else: + metric_bool('client_success', True, m_name=service_name) + dt = (end - start) + milliseconds = (dt.microseconds + dt.seconds * 10 ** 6) / 10 ** 3 + + status_ok(m_name=service_name) + metric_bool('mk8s_{service}_local_status'.format(service=args.service), + api_is_up, m_name=service_name) + if api_is_up: + # only want to send other metrics if api is up + metric('mk8s_{service}_local_response_time'.format( + service=args.service), + 'double', '%.3f' % milliseconds, 'ms') + + +def main(args): + auth_ref = get_auth_ref() + check(auth_ref, args) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='Check Managed K8S service against local or' + 'remote address') + parser.add_argument('ip', nargs='?', type=ipaddr.IPv4Address, + help="Check Check Managed K8S service against " + " local or remote address") + parser.add_argument('--telegraf-output', + action='store_true', + default=False, + help='Set the output format to telegraf') + parser.add_argument('--port', + default='8889', + help='Port for the managed k8s service') + parser.add_argument('--protocol', + default='https', + help='Protocol used to contact the managed' + 'k8s service') + parser.add_argument('--certificate', + default=None, + help='Path to SSL certificate for managed' + 'k8s service') + parser.add_argument('--service', + default=None, + help='Name of the k8s service') + parser.add_argument('--path', + default=None, + help='Path after the endpoint') + args = parser.parse_args() + with print_output(print_telegraf=args.telegraf_output): + main(args) diff --git a/playbooks/maas-managed-k8.yml b/playbooks/maas-managed-k8.yml index e18663244..93238d980 100644 --- a/playbooks/maas-managed-k8.yml +++ b/playbooks/maas-managed-k8.yml @@ -129,10 +129,10 @@ when: - k8_config is defined - - name: Install local checks + - name: Install k8sapi local checks template: src: "templates/rax-maas/managed_k8_api_local_check.yaml.j2" - dest: "/etc/rackspace-monitoring-agent.conf.d/managed_k8_api_local_check-{{ item.clusters[0].name }}.yaml" + dest: "/etc/rackspace-monitoring-agent.conf.d/managed_k8_api_local_check-{{ inventory_hostname }}-{{ item.clusters[0].name }}.yaml" owner: "root" group: "root" mode: "0644" @@ -141,9 +141,145 @@ - "{{ k8_config }}" when: - k8_config is defined + vars_files: + - vars/main.yml + - vars/maas-managed-k8.yml + tags: + - maas-managed-k8 + +- name: Install checks for mk8s ui + hosts: "{{ mk8s_ui_hosts | default('mk8s_ui_all') }}" + gather_facts: false + tasks: + - name: Install mk8s ui process check + template: + src: "templates/rax-maas/mk8s_ui_process_check.yaml.j2" + dest: "/etc/rackspace-monitoring-agent.conf.d/mk8s_ui_process_check--{{ inventory_hostname }}.yaml" + owner: "root" + group: "root" + mode: "0644" + delegate_to: "{{ physical_host | default(ansible_host) }}" + + - name: Install mk8s ui local checks + template: + src: "templates/rax-maas/managed_k8s_ui_local_check.yaml.j2" + dest: "/etc/rackspace-monitoring-agent.conf.d/managed_k8s_ui_local_check-{{ inventory_hostname }}.yaml" + owner: "root" + group: "root" + mode: "0644" + delegate_to: "{{ physical_host | default(ansible_host) }}" + + - name: Install mk8s ui lb checks + template: + src: "templates/rax-maas/lb_ui_check_k8s.yaml.j2" + dest: "/etc/rackspace-monitoring-agent.conf.d/lb_ui_check_k8s.yaml" + owner: "root" + group: "root" + mode: "0644" + delegate_to: "{{ physical_host | default(ansible_host) }}" + when: + - maas_remote_check | bool + - not maas_private_monitoring_enabled + - name: Install mk8s ui private lb checks + template: + src: "templates/rax-maas/private_lb_ui_check_mk8s.yaml.j2" + dest: "/etc/rackspace-monitoring-agent.conf.d/private_lb_ui_check_mk8s.yaml" + owner: "root" + group: "root" + mode: "0644" + delegate_to: "{{ physical_host | default(ansible_host) }}" + when: + - maas_private_monitoring_enabled + - maas_private_monitoring_zone is defined vars_files: - vars/main.yml - vars/maas-managed-k8.yml tags: - maas-managed-k8 + +- name: Install checks for mk8s etp + hosts: "{{ mk8s_etp_hosts | default('mk8s_etp_all') }}" + gather_facts: false + tasks: + - name: Install mk8s etp process check + template: + src: "templates/rax-maas/mk8s_etp_process_check.yaml.j2" + dest: "/etc/rackspace-monitoring-agent.conf.d/mk8s_etp_process_check--{{ inventory_hostname }}.yaml" + owner: "root" + group: "root" + mode: "0644" + delegate_to: "{{ physical_host | default(ansible_host) }}" + + - name: Install mk8s etp local checks + template: + src: "templates/rax-maas/managed_k8s_etp_local_check.yaml.j2" + dest: "/etc/rackspace-monitoring-agent.conf.d/managed_k8s_etp_local_check-{{ inventory_hostname }}.yaml" + owner: "root" + group: "root" + mode: "0644" + delegate_to: "{{ physical_host | default(ansible_host) }}" + # Ideally ETP should not be accessible from the outside so skip those checks + vars_files: + - vars/main.yml + - vars/maas-managed-k8.yml + tags: + - maas-managed-k8 + +- name: Install checks for mk8s etg + hosts: "mk8s_etg_all" + gather_facts: false + tasks: + - name: Install mk8s etg process check + template: + src: "templates/rax-maas/mk8s_etg_process_check.yaml.j2" + dest: "/etc/rackspace-monitoring-agent.conf.d/mk8s_etg_process_check--{{ inventory_hostname }}.yaml" + owner: "root" + group: "root" + mode: "0644" + delegate_to: "{{ physical_host | default(ansible_host) }}" + + - name: Install mk8s etg local checks + template: + src: "templates/rax-maas/managed_k8s_etg_local_check.yaml.j2" + dest: "/etc/rackspace-monitoring-agent.conf.d/managed_k8s_etg_local_check-{{ inventory_hostname }}.yaml" + owner: "root" + group: "root" + mode: "0644" + delegate_to: "{{ physical_host | default(ansible_host) }}" + # Ideally etg should not be accessible from the outside so skip those checks + vars_files: + - vars/main.yml + - vars/maas-managed-k8.yml + tags: + - maas-managed-k8 + +- name: Install checks for mk8s auth + hosts: "{{ mk8s_auth_hosts | default('mk8s_auth_all') }}" + gather_facts: false + tasks: + - name: Install mk8s auth process check + template: + src: "templates/rax-maas/mk8s_auth_process_check.yaml.j2" + dest: "/etc/rackspace-monitoring-agent.conf.d/mk8s_auth_process_check--{{ inventory_hostname }}.yaml" + owner: "root" + group: "root" + mode: "0644" + delegate_to: "{{ physical_host | default(ansible_host) }}" + + - name: Install mk8s auth local checks + template: + src: "templates/rax-maas/managed_k8s_auth_local_check.yaml.j2" + dest: "/etc/rackspace-monitoring-agent.conf.d/managed_k8s_auth_local_check-{{ inventory_hostname }}.yaml" + owner: "root" + group: "root" + mode: "0644" + delegate_to: "{{ physical_host | default(ansible_host) }}" + # Ideally auth should not be accessible from the outside so skip those checks + vars_files: + - vars/main.yml + - vars/maas-managed-k8.yml + tags: + - maas-managed-k8 + + diff --git a/playbooks/templates/rax-maas/lb_ui_check_k8s.yaml.j2 b/playbooks/templates/rax-maas/lb_ui_check_k8s.yaml.j2 new file mode 100644 index 000000000..f7aeea6b8 --- /dev/null +++ b/playbooks/templates/rax-maas/lb_ui_check_k8s.yaml.j2 @@ -0,0 +1,29 @@ +{% from "templates/common/macros.jinja" import get_metadata with context %} +{% set label = "lb_ui_check_k8s" %} +{% set check_name = label+'--'+maas_lb_name %} +type : remote.http +label : "{{ check_name }}" +period : "{{ maas_check_period_override[label] | default(maas_check_period) }}" +timeout : "{{ maas_check_timeout_override[label] | default(maas_check_timeout) }}" +disabled : "{{ (inventory_hostname != groups['mk8s_ui_all'][0] or check_name | match(maas_excluded_checks_regex)) | ternary('true', 'false') }}" +target_resolver : "IPv4" +target_hostname : "{{ maas_external_ip_address }}" +details : + url : "{{ maas_k8s_ui_scheme | default(maas_scheme)}}://{{ maas_external_hostname }}:{{ mk8s_ui_port_lb }}" + method : "HEAD" +monitoring_zones_poll: +{% for zone in maas_monitoring_zones %} + - {{ zone }} +{% endfor %} +{{ get_metadata(label).strip() }} +{# Add extra metadata options with two leading white spaces #} +alarms : + lb_api_alarm_mk8s_ui : + label : lb_api_alarm_mk8s_ui + notification_plan_id: "{{ maas_notification_plan_override[label] | default(maas_notification_plan) }}" + disabled : {{ ('lb_api_alarm_mk8s_ui' | match(maas_excluded_alarms_regex)) | ternary('true', 'false') }} + criteria : | + :set consecutiveCount={{ maas_alarm_local_consecutive_count }} + if (metric['code'] != '200') { + return new AlarmStatus(CRITICAL, 'UI unavailable.'); + } diff --git a/playbooks/templates/rax-maas/managed_k8s_auth_local_check.yaml.j2 b/playbooks/templates/rax-maas/managed_k8s_auth_local_check.yaml.j2 new file mode 100644 index 000000000..8dae0da86 --- /dev/null +++ b/playbooks/templates/rax-maas/managed_k8s_auth_local_check.yaml.j2 @@ -0,0 +1,24 @@ +{% from "templates/common/macros.jinja" import get_metadata with context %} +{% set label = "mk8s_auth_local_check" %} +{% set check_name = label+'--'+inventory_hostname %} +type : agent.plugin +label : "{{ check_name }}" +period : "{{ maas_check_period_override[label] | default(maas_check_period) }}" +timeout : "{{ maas_check_timeout_override[label] | default(maas_check_timeout) }}" +disabled : "{{ (check_name | match(maas_excluded_checks_regex)) | ternary('true', 'false') }}" +details : + file : run_plugin_in_venv.sh + args : ["{{ maas_plugin_dir }}/managed_k8s_service_local_check.py", "{{ ansible_host }}", "--port", "{{ mk8s_auth_port }}", "--protocol", "{{ mk8s_auth_protocol }}", "--service auth", "--path healthcheck"] + timeout : {{ (maas_check_timeout_override[label] | default(maas_check_timeout) * 1000) }} +{{ get_metadata(label).strip() }} +{# Add extra metadata options with two leading white spaces #} +alarms : + mk8s_auth_local_status : + label : mk8s_auth_local_status-{{ inventory_hostname }} + notification_plan_id : "{{ maas_notification_plan_override[label] | default(maas_notification_plan) }}" + disabled : {{ (('mk8s_auth_local_status-'+inventory_hostname) | match(maas_excluded_alarms_regex)) | ternary('true', 'false') }} + criteria : | + :set consecutiveCount={{ maas_alarm_local_consecutive_count }} + if (metric["mk8s_auth_local_status"] != 1) { + return new AlarmStatus(CRITICAL, "Managed K8S auth unavailable"); + } diff --git a/playbooks/templates/rax-maas/managed_k8s_etg_local_check.yaml.j2 b/playbooks/templates/rax-maas/managed_k8s_etg_local_check.yaml.j2 new file mode 100644 index 000000000..fd4cbf90c --- /dev/null +++ b/playbooks/templates/rax-maas/managed_k8s_etg_local_check.yaml.j2 @@ -0,0 +1,24 @@ +{% from "templates/common/macros.jinja" import get_metadata with context %} +{% set label = "managed_k8s_etg_local_check" %} +{% set check_name = label+'--'+inventory_hostname %} +type : agent.plugin +label : "{{ check_name }}" +period : "{{ maas_check_period_override[label] | default(maas_check_period) }}" +timeout : "{{ maas_check_timeout_override[label] | default(maas_check_timeout) }}" +disabled : "{{ (check_name | match(maas_excluded_checks_regex)) | ternary('true', 'false') }}" +details : + file : run_plugin_in_venv.sh + args : ["{{ maas_plugin_dir }}/managed_k8s_service_local_check.py", "{{ ansible_host }}", "--port", "{{ mk8s_etg_port }}", "--protocol", "{{ mk8s_etg_protocol }}", "--service etg", "--path healthcheck"] + timeout : {{ (maas_check_timeout_override[label] | default(maas_check_timeout) * 1000) }} +{{ get_metadata(label).strip() }} +{# Add extra metadata options with two leading white spaces #} +alarms : + managed_k8s_etg_local_status : + label : managed_k8s_etg_local_status--{{ inventory_hostname }} + notification_plan_id : "{{ maas_notification_plan_override[label] | default(maas_notification_plan) }}" + disabled : {{ (('managed_k8s_etg_local_status--'+inventory_hostname) | match(maas_excluded_alarms_regex)) | ternary('true', 'false') }} + criteria : | + :set consecutiveCount={{ maas_alarm_local_consecutive_count }} + if (metric["managed_k8s_etg_local_status"] != 1) { + return new AlarmStatus(CRITICAL, "Managed K8S etg unavailable"); + } diff --git a/playbooks/templates/rax-maas/managed_k8s_etp_local_check.yaml.j2 b/playbooks/templates/rax-maas/managed_k8s_etp_local_check.yaml.j2 new file mode 100644 index 000000000..95ca949f8 --- /dev/null +++ b/playbooks/templates/rax-maas/managed_k8s_etp_local_check.yaml.j2 @@ -0,0 +1,24 @@ +{% from "templates/common/macros.jinja" import get_metadata with context %} +{% set label = "managed_k8s_etp_local_check" %} +{% set check_name = label+'--'+inventory_hostname %} +type : agent.plugin +label : "{{ check_name }}" +period : "{{ maas_check_period_override[label] | default(maas_check_period) }}" +timeout : "{{ maas_check_timeout_override[label] | default(maas_check_timeout) }}" +disabled : "{{ (check_name | match(maas_excluded_checks_regex)) | ternary('true', 'false') }}" +details : + file : run_plugin_in_venv.sh + args : ["{{ maas_plugin_dir }}/managed_k8s_service_local_check.py", "{{ ansible_host }}", "--port", "{{ mk8s_etp_port }}", "--protocol", "{{ mk8s_etp_protocol }}", "--service etp", "--path healthcheck"] + timeout : {{ (maas_check_timeout_override[label] | default(maas_check_timeout) * 1000) }} +{{ get_metadata(label).strip() }} +{# Add extra metadata options with two leading white spaces #} +alarms : + managed_k8s_etp_local_status : + label : managed_k8s_etp_local_status--{{ inventory_hostname }} + notification_plan_id : "{{ maas_notification_plan_override[label] | default(maas_notification_plan) }}" + disabled : {{ (('managed_k8s_etp_local_status--'+inventory_hostname) | match(maas_excluded_alarms_regex)) | ternary('true', 'false') }} + criteria : | + :set consecutiveCount={{ maas_alarm_local_consecutive_count }} + if (metric["managed_k8s_etp_local_status"] != 1) { + return new AlarmStatus(CRITICAL, "Managed K8S ETP unavailable"); + } diff --git a/playbooks/templates/rax-maas/managed_k8s_ui_local_check.yaml.j2 b/playbooks/templates/rax-maas/managed_k8s_ui_local_check.yaml.j2 new file mode 100644 index 000000000..402d0cff3 --- /dev/null +++ b/playbooks/templates/rax-maas/managed_k8s_ui_local_check.yaml.j2 @@ -0,0 +1,24 @@ +{% from "templates/common/macros.jinja" import get_metadata with context %} +{% set label = "managed_k8s_ui_local_check" %} +{% set check_name = label+'--'+inventory_hostname %} +type : agent.plugin +label : "{{ check_name }}" +period : "{{ maas_check_period_override[label] | default(maas_check_period) }}" +timeout : "{{ maas_check_timeout_override[label] | default(maas_check_timeout) }}" +disabled : "{{ (check_name | match(maas_excluded_checks_regex)) | ternary('true', 'false') }}" +details : + file : run_plugin_in_venv.sh + args : ["{{ maas_plugin_dir }}/managed_k8s_service_local_check.py", "{{ ansible_host }}", "--port", "{{ mk8s_ui_port }}", "--protocol", "{{ mk8s_ui_protocol }}", "--service", "ui"] + timeout : {{ (maas_check_timeout_override[label] | default(maas_check_timeout) * 1000) }} +{{ get_metadata(label).strip() }} +{# Add extra metadata options with two leading white spaces #} +alarms : + managed_k8s_ui_local_status : + label : managed_k8s_ui_local_status--{{ inventory_hostname }} + notification_plan_id : "{{ maas_notification_plan_override[label] | default(maas_notification_plan) }}" + disabled : {{ (('managed_k8s_ui_local_status--'+inventory_hostname) | match(maas_excluded_alarms_regex)) | ternary('true', 'false') }} + criteria : | + :set consecutiveCount={{ maas_alarm_local_consecutive_count }} + if (metric["managed_k8s_ui_local_status"] != 1) { + return new AlarmStatus(CRITICAL, "Managed K8S UI unavailable"); + } diff --git a/playbooks/templates/rax-maas/mk8s_auth_process_check.yaml.j2 b/playbooks/templates/rax-maas/mk8s_auth_process_check.yaml.j2 new file mode 100644 index 000000000..1967159cd --- /dev/null +++ b/playbooks/templates/rax-maas/mk8s_auth_process_check.yaml.j2 @@ -0,0 +1,24 @@ +{% from "templates/common/macros.jinja" import get_metadata with context %} +{% set label = "mk8s_auth_process_check" %} +{% set check_name = label+'--'+inventory_hostname %} +type : agent.plugin +label : "{{ check_name }}" +period : "{{ maas_check_period_override[label] | default(maas_check_period) }}" +timeout : "{{ maas_check_timeout_override[label] | default(maas_check_timeout) }}" +disabled : "{{ (check_name | match(maas_excluded_checks_regex)) | ternary('true', 'false') }}" +details : + file : run_plugin_in_venv.sh + args : ["{{ maas_plugin_dir }}/process_check_{% if inventory_hostname in groups['all_containers'] | default([]) %}container.py", "-c", "{{ inventory_hostname }}", "{% else %}host.py", "{% endif %}{{ maas_managed_k8s_auth_process_name }}"] + timeout : {{ (maas_check_timeout_override[label] | default(maas_check_timeout) * 1000) }} +{{ get_metadata(label).strip() }} +{# Add extra metadata options with two leading white spaces #} +alarms : + {{ maas_managed_k8s_auth_process_name }}_process_status: + label : {{ maas_managed_k8s_auth_process_name }}_process_status--{{ inventory_hostname }} + notification_plan_id : "{{ maas_notification_plan_override[label] | default(maas_notification_plan) }}" + disabled : {{ ((maas_managed_k8s_auth_process_name+'_process_status--'+inventory_hostname) | match(maas_excluded_alarms_regex)) | ternary('true', 'false') }} + criteria : | + :set consecutiveCount={{ maas_alarm_local_consecutive_count }} + if (metric["{{ maas_managed_k8s_auth_process_name }}_process_status"] != 1 ) { + return new AlarmStatus(CRITICAL, "mk8s auth process {{ maas_managed_k8s_auth_process_name }} not running on {{ inventory_hostname }}"); + } diff --git a/playbooks/templates/rax-maas/mk8s_etg_process_check.yaml.j2 b/playbooks/templates/rax-maas/mk8s_etg_process_check.yaml.j2 new file mode 100644 index 000000000..a90ce8756 --- /dev/null +++ b/playbooks/templates/rax-maas/mk8s_etg_process_check.yaml.j2 @@ -0,0 +1,24 @@ +{% from "templates/common/macros.jinja" import get_metadata with context %} +{% set label = "mk8s_etg_process_check" %} +{% set check_name = label+'--'+inventory_hostname %} +type : agent.plugin +label : "{{ check_name }}" +period : "{{ maas_check_period_override[label] | default(maas_check_period) }}" +timeout : "{{ maas_check_timeout_override[label] | default(maas_check_timeout) }}" +disabled : "{{ (check_name | match(maas_excluded_checks_regex)) | ternary('true', 'false') }}" +details : + file : run_plugin_in_venv.sh + args : ["{{ maas_plugin_dir }}/process_check_{% if inventory_hostname in groups['all_containers'] | default([]) %}container.py", "-c", "{{ inventory_hostname }}", "{% else %}host.py", "{% endif %}{{ maas_managed_k8s_etg_process_name }}"] + timeout : {{ (maas_check_timeout_override[label] | default(maas_check_timeout) * 1000) }} +{{ get_metadata(label).strip() }} +{# Add extra metadata options with two leading white spaces #} +alarms : + {{ maas_managed_k8s_etg_process_name }}_process_status: + label : {{ maas_managed_k8s_etg_process_name }}_process_status--{{ inventory_hostname }} + notification_plan_id : "{{ maas_notification_plan_override[label] | default(maas_notification_plan) }}" + disabled : {{ ((maas_managed_k8s_etg_process_name+'_process_status--'+inventory_hostname) | match(maas_excluded_alarms_regex)) | ternary('true', 'false') }} + criteria : | + :set consecutiveCount={{ maas_alarm_local_consecutive_count }} + if (metric["{{ maas_managed_k8s_etg_process_name }}_process_status"] != 1 ) { + return new AlarmStatus(CRITICAL, "mk8s etg process {{ maas_managed_k8s_etg_process_name }} not running on {{ inventory_hostname }}"); + } diff --git a/playbooks/templates/rax-maas/mk8s_etp_process_check.yaml.j2 b/playbooks/templates/rax-maas/mk8s_etp_process_check.yaml.j2 new file mode 100644 index 000000000..302d188e8 --- /dev/null +++ b/playbooks/templates/rax-maas/mk8s_etp_process_check.yaml.j2 @@ -0,0 +1,24 @@ +{% from "templates/common/macros.jinja" import get_metadata with context %} +{% set label = "mk8s_etp_process_check" %} +{% set check_name = label+'--'+inventory_hostname %} +type : agent.plugin +label : "{{ check_name }}" +period : "{{ maas_check_period_override[label] | default(maas_check_period) }}" +timeout : "{{ maas_check_timeout_override[label] | default(maas_check_timeout) }}" +disabled : "{{ (check_name | match(maas_excluded_checks_regex)) | ternary('true', 'false') }}" +details : + file : run_plugin_in_venv.sh + args : ["{{ maas_plugin_dir }}/process_check_{% if inventory_hostname in groups['all_containers'] | default([]) %}container.py", "-c", "{{ inventory_hostname }}", "{% else %}host.py", "{% endif %}{{ maas_managed_k8s_etp_process_name }}"] + timeout : {{ (maas_check_timeout_override[label] | default(maas_check_timeout) * 1000) }} +{{ get_metadata(label).strip() }} +{# Add extra metadata options with two leading white spaces #} +alarms : + {{ maas_managed_k8s_etp_process_name }}_process_status: + label : {{ maas_managed_k8s_etp_process_name }}_process_status--{{ inventory_hostname }} + notification_plan_id : "{{ maas_notification_plan_override[label] | default(maas_notification_plan) }}" + disabled : {{ ((maas_managed_k8s_etp_process_name+'_process_status--'+inventory_hostname) | match(maas_excluded_alarms_regex)) | ternary('true', 'false') }} + criteria : | + :set consecutiveCount={{ maas_alarm_local_consecutive_count }} + if (metric["{{ maas_managed_k8s_etp_process_name }}_process_status"] != 1 ) { + return new AlarmStatus(CRITICAL, "mk8s etp process {{ maas_managed_k8s_etp_process_name }} not running on {{ inventory_hostname }}"); + } diff --git a/playbooks/templates/rax-maas/mk8s_ui_process_check.yaml.j2 b/playbooks/templates/rax-maas/mk8s_ui_process_check.yaml.j2 new file mode 100644 index 000000000..73ca362e9 --- /dev/null +++ b/playbooks/templates/rax-maas/mk8s_ui_process_check.yaml.j2 @@ -0,0 +1,24 @@ +{% from "templates/common/macros.jinja" import get_metadata with context %} +{% set label = "mk8s_ui_process_check" %} +{% set check_name = label+'--'+inventory_hostname %} +type : agent.plugin +label : "{{ check_name }}" +period : "{{ maas_check_period_override[label] | default(maas_check_period) }}" +timeout : "{{ maas_check_timeout_override[label] | default(maas_check_timeout) }}" +disabled : "{{ (check_name | match(maas_excluded_checks_regex)) | ternary('true', 'false') }}" +details : + file : run_plugin_in_venv.sh + args : ["{{ maas_plugin_dir }}/process_check_{% if inventory_hostname in groups['all_containers'] | default([]) %}container.py", "-c", "{{ inventory_hostname }}", "{% else %}host.py", "{% endif %}{{ maas_managed_k8s_ui_process_name }}"] + timeout : {{ (maas_check_timeout_override[label] | default(maas_check_timeout) * 1000) }} +{{ get_metadata(label).strip() }} +{# Add extra metadata options with two leading white spaces #} +alarms : + {{ maas_managed_k8s_ui_process_name }}_process_status: + label : {{ maas_managed_k8s_ui_process_name }}_process_status--{{ inventory_hostname }} + notification_plan_id : "{{ maas_notification_plan_override[label] | default(maas_notification_plan) }}" + disabled : {{ ((maas_managed_k8s_ui_process_name+'_process_status--'+inventory_hostname) | match(maas_excluded_alarms_regex)) | ternary('true', 'false') }} + criteria : | + :set consecutiveCount={{ maas_alarm_local_consecutive_count }} + if (metric["{{ maas_managed_k8s_ui_process_name }}_process_status"] != 1 ) { + return new AlarmStatus(CRITICAL, "mk8s ui process {{ maas_managed_k8s_ui_process_name }} not running on {{ inventory_hostname }}"); + } diff --git a/playbooks/templates/rax-maas/private_lb_ui_check_mk8s.yaml.j2 b/playbooks/templates/rax-maas/private_lb_ui_check_mk8s.yaml.j2 new file mode 100644 index 000000000..922ace083 --- /dev/null +++ b/playbooks/templates/rax-maas/private_lb_ui_check_mk8s.yaml.j2 @@ -0,0 +1,27 @@ +{% from "templates/common/macros.jinja" import get_metadata with context %} +{% set label = "private_lb_ui_check_mk8s" %} +{% set check_name = label+'--'+maas_lb_name %} +type : remote.http +label : "{{ check_name }}" +period : "{{ maas_check_period_override[label] | default(maas_check_period) }}" +timeout : "{{ maas_check_timeout_override[label] | default(maas_check_timeout) }}" +disabled : "{{ (inventory_hostname != groups['mk8s_ui_all'][0] or check_name | match(maas_excluded_checks_regex)) | ternary('true', 'false') }}" +target_resolver : "IPv4" +target_hostname : "{{ maas_external_ip_address }}" +details : + url : "{{ maas_k8s_ui_scheme | default(maas_scheme)}}://{{ maas_external_hostname }}:{{ mk8s_ui_port_lb }}" + method : "HEAD" +monitoring_zones_poll: + - "{{ maas_private_monitoring_zone }}" +{{ get_metadata(label).strip() }} +{# Add extra metadata options with two leading white spaces #} +alarms : + private_lb_ui_alarm_mk8s : + label : private_lb_ui_alarm_mk8s + notification_plan_id: "{{ maas_notification_plan_override[label] | default(maas_notification_plan) }}" + disabled : {{ ('private_lb_ui_alarm_mk8s' | match(maas_excluded_alarms_regex)) | ternary('true', 'false') }} + criteria : | + :set consecutiveCount={{ maas_alarm_local_consecutive_count }} + if (metric['code'] != '200') { + return new AlarmStatus(CRITICAL, 'UI unavailable.'); + } diff --git a/playbooks/vars/maas-managed-k8.yml b/playbooks/vars/maas-managed-k8.yml index f0e3bcd0b..fcfe4aa69 100644 --- a/playbooks/vars/maas-managed-k8.yml +++ b/playbooks/vars/maas-managed-k8.yml @@ -13,9 +13,18 @@ # See the License for the specific language governing permissions and # limitations under the License. + +# maas_k8s_ui_scheme: https + maas_managed_k8_kube_config_dir: /root/.k8 maas_managed_k8_kubectl_release: v1.9.1 -maas_managed_k8_auth_url: "http://{{ internal_vip_address }}:8890" +maas_managed_k8_auth_url: "http://{{ internal_vip_address }}:{{ mk8s_auth_port }}" + +# processes +maas_managed_k8s_ui_process_name: controlpanel +maas_managed_k8s_etp_process_name: etp +maas_managed_k8s_etg_process_name: etg +maas_managed_k8s_auth_process_name: auth diff --git a/playbooks/vars/main.yml b/playbooks/vars/main.yml index 76ed8f9cc..e5ee64102 100644 --- a/playbooks/vars/main.yml +++ b/playbooks/vars/main.yml @@ -406,3 +406,44 @@ heat_local_api_port: '8004' # heat_local_api_protocol: 'http' + +# mk8s_auth_port: Port number for the mk8s auth service (local) +# +mk8s_auth_port: '8080' + +# +# mk8s_auth_protocol: Protocol used to contact the mk8s auth service +# +mk8s_auth_protocol: 'http' + +# mk8s_ui_port: Port number for the mk8s ui service (local) +# +mk8s_ui_port: '8080' + +# mk8s_ui_port: Port number for the mk8s ui service (global) +# +mk8s_ui_port_lb: '8891' + + +# +# mk8s_ui_protocol: Protocol used to contact the mk8s ui service +# +mk8s_ui_protocol: 'http' + +# mk8s_etp_port: Port number for the mk8s etp service +# +mk8s_etp_port: '8889' + +# +# mk8s_etp_protocol: Protocol used to contact the mk8s etp service +# +mk8s_etp_protocol: 'http' + +# mk8s_etg_port: Port number for the mk8s etg service +# +mk8s_etg_port: '8888' + +# +# mk8s_etg_protocol: Protocol used to contact the mk8s etg service +# +mk8s_etg_protocol: 'http' \ No newline at end of file diff --git a/releasenotes/notes/K8S-381_monitor_mk8s_containers-1a478428700d9039.yaml b/releasenotes/notes/K8S-381_monitor_mk8s_containers-1a478428700d9039.yaml new file mode 100644 index 000000000..deff8492e --- /dev/null +++ b/releasenotes/notes/K8S-381_monitor_mk8s_containers-1a478428700d9039.yaml @@ -0,0 +1,8 @@ +--- +features: + - | + Managed Kubernetes installs several lxc-conatiners for various + auhentication related functionality (etp, etg, auth, ui). This + will monitor if the respective processes are up and the API + endpoints are accessible. UI also includes checks for the LB + whereas the other services only have local checks.