--- - name: Collect metrics oracle.oci.oci_monitoring_metric_actions: compartment_id: "your-compartment-ocid" action: list namespace: oci_computeagent dimension_filters: { "resourceID": "your-compute-instance-ocid"} register: result - name: Set list of indexes (number of metrics) ansible.builtin.set_fact: l_index: "{{ range(result.metric | length) }}" - name: Show metrics ansible.builtin.debug: msg: - "{{ (result | dict2items)[1][\"value\"][index][\"name\"] }}" loop: "{{ l_index }}" # loop through list indexes loop_control: index_var: index ... # Role tests main.yml is like: --- - name: Test role connection: local hosts: localhost roles: - role: ../../oci-metrics ... # Run role: ansible-playbook -i inventory test.yml # Expect ten metrics in result: ok: [localhost] => (item=0) => "CpuUtilization" ok: [localhost] => (item=1) => "DiskBytesRead" ok: [localhost] => (item=2) => "DiskBytesWritten" ok: [localhost] => (item=3) => "DiskIopsRead" ok: [localhost] => (item=4) => "DiskIopsWritten" ok: [localhost] => (item=5) => "LoadAverage" ok: [localhost] => (item=6) => "MemoryAllocationStalls" ok: [localhost] => (item=7) => "MemoryUtilization" ok: [localhost] => (item=8) => "NetworksBytesIn" ok: [localhost] => (item=9) => "NetworksBytesOut" |
#!/bin/python3 import socket import argparse import oci import psutil from datetime import datetime # arguments are location (on-prem or OCI region) and partition (ex, /, /boot) parser = argparse.ArgumentParser(description=f"OCI metric disk_usage for {socket.gethostname()}") parser.add_argument("--location", help="Location", required=True) parser.add_argument("--partition", help="Partition", required=True) args = parser.parse_args() location = args.location partition = args.partition comp_ocid = "...your-compartment-ocid..." hostname = socket.gethostname() metric_name = "disk_usage" metric_namespace = "your-team" # ex. custom metrics for your team # Use default config file ~/.oci/config config = oci.config.from_file() # create monitoring service client monitoring_client = \ oci.monitoring.MonitoringClient(config, service_endpoint="https://telemetry-ingestion. |
15 * * * * disk-usage.py --location IAD --partition / 15 * * * * disk-usage.py --location IAD --partition /boot |
# # the only supported return value for oci metrics is number # workaround for plugins that return string # def check_service_state(service_name): try: subprocess.check_output(['systemctl', 'is-active', service_name]) return 0 except subprocess.CalledProcessError: return 1 |
15 * * * * service-state.py --location IAD --service mysqld |
#!/bin/python3 # Returns aggregated data from query. from datetime import datetime import oci import argparse parser = argparse.ArgumentParser(description=f"Disk usage metrics") parser.add_argument("--partition", help="Partition", required=True) parser.add_argument("--start", help="Start time for metric yyyy-mm-dd", required=True) parser.add_argument("--end", help="End time for metric yyyy-mm-dd", required=True) args = parser.parse_args() partition = args.partition start = args.start end = args.end # Use default config file ~/.oci/config config = oci.config.from_file() # Initialize service client with default config file monitoring_client = oci.monitoring.MonitoringClient(config) comp_id = "ocid1.compartment.oc1..your-id" namespace = "your-namespace" query = f"disk_usage[1h]{{partition = \"{partition}\"}}.mean()" summarize_metrics_data_response = monitoring_client.summarize_metrics_data( compartment_id=f"{comp_id}", summarize_metrics_data_details=oci.monitoring.models.SummarizeMetricsDataDetails( namespace=f"{namespace}", query=f"{query}", start_time=f"{start}T00:00:00+00", end_time=f"{end}T00:00:00+00")) # Get the data from response print(summarize_metrics_data_response.data) |
$ python3 summarize-disk-usage.py --partition / --start 2023-06-19 --end 2023-06-21 [{ "aggregated_datapoints": [ { "timestamp": "2023-06-19T00:00:00+00:00", "value": 17.4 }, -- shortened -- { "timestamp": "2023-06-21T00:00:00+00:00", "value": 17.0 } ], "compartment_id": "ocid1.compartment.oc1...a", "dimensions": { "hostname": "myhostname.domain.com", "partition": "/" }, "metadata": {}, "name": "disk_usage", "namespace": "your-namespace", "resolution": null, "resource_group": null }] |
--- - name: Get alarm mysqld-down oracle.oci.oci_monitoring_alarm_facts: alarm_id: "your-alarm-ocid" ... # expected result is like: "alarms": [ { "body": "Mysqld service is not online. ", "display_name": "mysql-down", "is_enabled": true, "lifecycle_state": "ACTIVE", "namespace": "your-custom-team-namespace", "query": "service_state[1h]{service = \"mysqld\"}.mean() not in (0, 0)", "severity": "CRITICAL", "suppression": null, } |