From bc496dc6e17555cd7f658d0fd812013749200fd6 Mon Sep 17 00:00:00 2001 From: Olivier Le Thanh Duong Date: Mon, 29 Apr 2024 16:45:01 +0200 Subject: [PATCH 1/9] CI check system usage endpoint --- .github/workflows/test-on-droplets-matrix.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/test-on-droplets-matrix.yml b/.github/workflows/test-on-droplets-matrix.yml index c67c1688f..f99f30a0b 100644 --- a/.github/workflows/test-on-droplets-matrix.yml +++ b/.github/workflows/test-on-droplets-matrix.yml @@ -238,6 +238,14 @@ jobs: -d '{"persistent_vms": [], "instances": ["${{ matrix.check_vm.item_hash }}"]}' \ "http://${DROPLET_IPV4}:4020/control/allocations" + - name: Get system usage + run: | + export DROPLET_IPV4="$(doctl compute droplet get aleph-vm-ci-${{ matrix.os_config.alias }}-${{ matrix.check_vm.alias }} --output json | ./.github/scripts/extract_droplet_ipv4.py)" + curl -X GET -H "Content-Type: application/json" \ + -H "X-Auth-Signature: test" \ + "http://${DROPLET_IPV4}:4020/about/usage/system" + + - name: Export aleph logs if: always() run: | From e58b9ad8b2b2e10af7cb660527d703d6043d72af Mon Sep 17 00:00:00 2001 From: Olivier Le Thanh Duong Date: Mon, 29 Apr 2024 17:42:46 +0200 Subject: [PATCH 2/9] add unit test for system usage --- tests/supervisor/test_views.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/supervisor/test_views.py b/tests/supervisor/test_views.py index 49a6fa91e..73bcfec45 100644 --- a/tests/supervisor/test_views.py +++ b/tests/supervisor/test_views.py @@ -24,3 +24,16 @@ async def test_allocation_fails_on_invalid_item_hash(aiohttp_client): "type": "value_error.unknownhash", }, ] + + +@pytest.mark.asyncio +async def test_system_usage(aiohttp_client): + """Test that the allocation endpoint fails when an invalid item_hash is provided.""" + client = await aiohttp_client(app) + settings.ALLOCATION_TOKEN_HASH = "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08" # = "test" + response: web.Response = await client.get("/about/usage/system") + assert response.status == 200 + # check if it is valid json + resp = await response.json() + assert "cpu" in resp + assert resp["cpu"]["count"] > 0 From 6174c965d04cd947f2ee03769d9ec90f1a702605 Mon Sep 17 00:00:00 2001 From: Olivier Le Thanh Duong Date: Tue, 30 Apr 2024 09:53:13 +0200 Subject: [PATCH 3/9] Set up a fresh web_app for each test as required by aiohttp --- src/aleph/vm/orchestrator/resources.py | 5 +- src/aleph/vm/orchestrator/supervisor.py | 112 ++++++++++++------------ tests/supervisor/test_views.py | 4 +- 3 files changed, 63 insertions(+), 58 deletions(-) diff --git a/src/aleph/vm/orchestrator/resources.py b/src/aleph/vm/orchestrator/resources.py index a40c6ff13..29e819079 100644 --- a/src/aleph/vm/orchestrator/resources.py +++ b/src/aleph/vm/orchestrator/resources.py @@ -87,8 +87,8 @@ def get_machine_properties() -> MachineProperties: cpu_info = cpuinfo.get_cpu_info() # Slow return MachineProperties( cpu=CpuProperties( - architecture=cpu_info["raw_arch_string"], - vendor=cpu_info["vendor_id"], + architecture=cpu_info.get("raw_arch_string"), + vendor=cpu_info.get("vendor_id"), ), ) @@ -118,6 +118,7 @@ async def about_system_usage(_: web.Request): ), properties=get_machine_properties(), ) + return web.json_response(text=usage.json(exclude_none=True)) diff --git a/src/aleph/vm/orchestrator/supervisor.py b/src/aleph/vm/orchestrator/supervisor.py index 4846104ae..892106ba0 100644 --- a/src/aleph/vm/orchestrator/supervisor.py +++ b/src/aleph/vm/orchestrator/supervisor.py @@ -74,62 +74,63 @@ async def http_not_found(request: web.Request): return web.HTTPNotFound() -app = web.Application(middlewares=[server_version_middleware]) -cors = setup( - app, - defaults={ - "*": ResourceOptions( - allow_credentials=True, - expose_headers="*", - allow_headers="*", - ) - }, -) +def setup_webapp(): + app = web.Application(middlewares=[server_version_middleware]) + cors = setup( + app, + defaults={ + "*": ResourceOptions( + allow_credentials=True, + expose_headers="*", + allow_headers="*", + ) + }, + ) -# Routes that need CORS enabled -cors_routes = [ - # /about APIs return information about the VM Orchestrator - web.get("/about/login", about_login), - web.get("/about/executions/list", list_executions), - web.get("/about/executions/details", about_executions), - web.get("/about/executions/records", about_execution_records), - web.get("/about/usage/system", about_system_usage), - web.get("/about/config", about_config), - # /control APIs are used to control the VMs and access their logs - web.post("/control/allocation/notify", notify_allocation), - web.get("/control/machine/{ref}/logs", stream_logs), - web.post("/control/machine/{ref}/expire", operate_expire), - web.post("/control/machine/{ref}/stop", operate_stop), - web.post("/control/machine/{ref}/erase", operate_erase), - web.post("/control/machine/{ref}/reboot", operate_reboot), - # /status APIs are used to check that the VM Orchestrator is running properly - web.get("/status/check/fastapi", status_check_fastapi), - web.get("/status/check/fastapi/legacy", status_check_fastapi_legacy), - web.get("/status/check/host", status_check_host), - web.get("/status/check/version", status_check_version), - web.get("/status/check/ipv6", status_check_ipv6), - web.get("/status/config", status_public_config), -] -routes = app.add_routes(cors_routes) -for route in routes: - cors.add(route) - - -# Routes that don't need CORS enabled -other_routes = [ - # /control APIs are used to control the VMs and access their logs - web.post("/control/allocations", update_allocations), - # Raise an HTTP Error 404 if attempting to access an unknown URL within these paths. - web.get("/about/{suffix:.*}", http_not_found), - web.get("/control/{suffix:.*}", http_not_found), - web.get("/status/{suffix:.*}", http_not_found), - # /static is used to serve static files - web.static("/static", Path(__file__).parent / "views/static"), - # /vm is used to launch VMs on-demand - web.route("*", "/vm/{ref}{suffix:.*}", run_code_from_path), - web.route("*", "/{suffix:.*}", run_code_from_hostname), -] -app.add_routes(other_routes) + # Routes that need CORS enabled + cors_routes = [ + # /about APIs return information about the VM Orchestrator + web.get("/about/login", about_login), + web.get("/about/executions/list", list_executions), + web.get("/about/executions/details", about_executions), + web.get("/about/executions/records", about_execution_records), + web.get("/about/usage/system", about_system_usage), + web.get("/about/config", about_config), + # /control APIs are used to control the VMs and access their logs + web.post("/control/allocation/notify", notify_allocation), + web.get("/control/machine/{ref}/logs", stream_logs), + web.post("/control/machine/{ref}/expire", operate_expire), + web.post("/control/machine/{ref}/stop", operate_stop), + web.post("/control/machine/{ref}/erase", operate_erase), + web.post("/control/machine/{ref}/reboot", operate_reboot), + # /status APIs are used to check that the VM Orchestrator is running properly + web.get("/status/check/fastapi", status_check_fastapi), + web.get("/status/check/fastapi/legacy", status_check_fastapi_legacy), + web.get("/status/check/host", status_check_host), + web.get("/status/check/version", status_check_version), + web.get("/status/check/ipv6", status_check_ipv6), + web.get("/status/config", status_public_config), + ] + routes = app.add_routes(cors_routes) + for route in routes: + cors.add(route) + + # Routes that don't need CORS enabled + other_routes = [ + # /control APIs are used to control the VMs and access their logs + web.post("/control/allocations", update_allocations), + # Raise an HTTP Error 404 if attempting to access an unknown URL within these paths. + web.get("/about/{suffix:.*}", http_not_found), + web.get("/control/{suffix:.*}", http_not_found), + web.get("/status/{suffix:.*}", http_not_found), + # /static is used to serve static files + web.static("/static", Path(__file__).parent / "views/static"), + # /vm is used to launch VMs on-demand + web.route("*", "/vm/{ref}{suffix:.*}", run_code_from_path), + web.route("*", "/{suffix:.*}", run_code_from_hostname), + ] + app.add_routes(other_routes) + return app async def stop_all_vms(app: web.Application): @@ -153,6 +154,7 @@ def run(): # Require a random token to access /about APIs secret_token = token_urlsafe(nbytes=32) + app = setup_webapp() # Store app singletons. Note that app["pubsub"] will also be created. app["secret_token"] = secret_token app["vm_pool"] = pool diff --git a/tests/supervisor/test_views.py b/tests/supervisor/test_views.py index 73bcfec45..58cad0d69 100644 --- a/tests/supervisor/test_views.py +++ b/tests/supervisor/test_views.py @@ -2,12 +2,13 @@ from aiohttp import web from aleph.vm.conf import settings -from aleph.vm.orchestrator.supervisor import app +from aleph.vm.orchestrator.supervisor import setup_webapp @pytest.mark.asyncio async def test_allocation_fails_on_invalid_item_hash(aiohttp_client): """Test that the allocation endpoint fails when an invalid item_hash is provided.""" + app = setup_webapp() client = await aiohttp_client(app) settings.ALLOCATION_TOKEN_HASH = "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08" # = "test" response: web.Response = await client.post( @@ -29,6 +30,7 @@ async def test_allocation_fails_on_invalid_item_hash(aiohttp_client): @pytest.mark.asyncio async def test_system_usage(aiohttp_client): """Test that the allocation endpoint fails when an invalid item_hash is provided.""" + app = setup_webapp() client = await aiohttp_client(app) settings.ALLOCATION_TOKEN_HASH = "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08" # = "test" response: web.Response = await client.get("/about/usage/system") From f9133980e5388e166bb52a52d2aa01b5c8cf3eba Mon Sep 17 00:00:00 2001 From: Olivier Le Thanh Duong Date: Tue, 30 Apr 2024 11:42:19 +0200 Subject: [PATCH 4/9] revert local compat change --- src/aleph/vm/orchestrator/resources.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/aleph/vm/orchestrator/resources.py b/src/aleph/vm/orchestrator/resources.py index 29e819079..1679c0525 100644 --- a/src/aleph/vm/orchestrator/resources.py +++ b/src/aleph/vm/orchestrator/resources.py @@ -87,8 +87,8 @@ def get_machine_properties() -> MachineProperties: cpu_info = cpuinfo.get_cpu_info() # Slow return MachineProperties( cpu=CpuProperties( - architecture=cpu_info.get("raw_arch_string"), - vendor=cpu_info.get("vendor_id"), + architecture=cpu_info["raw_arch_string"], + vendor=cpu_info["vendor_id"], ), ) From 6e84b2600880b381223814d9829c3f1d62230045 Mon Sep 17 00:00:00 2001 From: Olivier Le Thanh Duong Date: Tue, 30 Apr 2024 14:11:52 +0200 Subject: [PATCH 5/9] Apparently CI also don't have matching arch --- src/aleph/vm/orchestrator/resources.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/aleph/vm/orchestrator/resources.py b/src/aleph/vm/orchestrator/resources.py index 1679c0525..448a822c5 100644 --- a/src/aleph/vm/orchestrator/resources.py +++ b/src/aleph/vm/orchestrator/resources.py @@ -87,8 +87,8 @@ def get_machine_properties() -> MachineProperties: cpu_info = cpuinfo.get_cpu_info() # Slow return MachineProperties( cpu=CpuProperties( - architecture=cpu_info["raw_arch_string"], - vendor=cpu_info["vendor_id"], + architecture=cpu_info.get("raw_arch_string", cpu_info.get("arch_string_raw")), + vendor=cpu_info.get("vendor_id", cpu_info.get("vendor_id_raw")), ), ) From b80061242900de8590e940e95143a92cff5258a9 Mon Sep 17 00:00:00 2001 From: Olivier Le Thanh Duong Date: Tue, 30 Apr 2024 15:00:09 +0200 Subject: [PATCH 6/9] Fix test description --- tests/supervisor/test_views.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/supervisor/test_views.py b/tests/supervisor/test_views.py index 58cad0d69..abd375be1 100644 --- a/tests/supervisor/test_views.py +++ b/tests/supervisor/test_views.py @@ -29,10 +29,9 @@ async def test_allocation_fails_on_invalid_item_hash(aiohttp_client): @pytest.mark.asyncio async def test_system_usage(aiohttp_client): - """Test that the allocation endpoint fails when an invalid item_hash is provided.""" + """Test that the usage system endpoints responds. No auth needed""" app = setup_webapp() client = await aiohttp_client(app) - settings.ALLOCATION_TOKEN_HASH = "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08" # = "test" response: web.Response = await client.get("/about/usage/system") assert response.status == 200 # check if it is valid json From 91b2e9950e753ccfa0fd8c1b189a0fd99b6b3501 Mon Sep 17 00:00:00 2001 From: Olivier Le Thanh Duong Date: Tue, 30 Apr 2024 15:41:43 +0200 Subject: [PATCH 7/9] Better model real usage in Droplet test --- .github/workflows/test-on-droplets-matrix.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/test-on-droplets-matrix.yml b/.github/workflows/test-on-droplets-matrix.yml index f99f30a0b..cb47fbc31 100644 --- a/.github/workflows/test-on-droplets-matrix.yml +++ b/.github/workflows/test-on-droplets-matrix.yml @@ -238,11 +238,10 @@ jobs: -d '{"persistent_vms": [], "instances": ["${{ matrix.check_vm.item_hash }}"]}' \ "http://${DROPLET_IPV4}:4020/control/allocations" - - name: Get system usage + - name: Fetch system usage endpoint run: | export DROPLET_IPV4="$(doctl compute droplet get aleph-vm-ci-${{ matrix.os_config.alias }}-${{ matrix.check_vm.alias }} --output json | ./.github/scripts/extract_droplet_ipv4.py)" curl -X GET -H "Content-Type: application/json" \ - -H "X-Auth-Signature: test" \ "http://${DROPLET_IPV4}:4020/about/usage/system" From 0f538d5ff9ae1ad79d0d1cd0f36a4060bc526a20 Mon Sep 17 00:00:00 2001 From: Olivier Le Thanh Duong Date: Thu, 2 May 2024 11:29:03 +0200 Subject: [PATCH 8/9] Add a test with mock --- tests/supervisor/test_views.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tests/supervisor/test_views.py b/tests/supervisor/test_views.py index abd375be1..15b0c995d 100644 --- a/tests/supervisor/test_views.py +++ b/tests/supervisor/test_views.py @@ -1,3 +1,4 @@ +from unittest import mock import pytest from aiohttp import web @@ -38,3 +39,33 @@ async def test_system_usage(aiohttp_client): resp = await response.json() assert "cpu" in resp assert resp["cpu"]["count"] > 0 + + +@pytest.mark.asyncio +async def test_system_usage_mock(aiohttp_client, mocker): + """Test that the usage system endpoints response value. No auth needed""" + mocker.patch( + "cpuinfo.cpuinfo.get_cpu_info", + { + "arch_string_raw": "x86_64", + "vendor_id_raw": "AuthenticAMD", + }, + ) + mocker.patch( + "psutil.getloadavg", + lambda: [1, 2, 3], + ) + mocker.patch( + "psutil.cpu_count", + lambda: 200, + ) + app = setup_webapp() + client = await aiohttp_client(app) + response: web.Response = await client.get("/about/usage/system") + assert response.status == 200 + # check if it is valid json + resp = await response.json() + assert resp["properties"]["cpu"]["architecture"] == "x86_64" + assert resp["properties"]["cpu"]["vendor"] == "AuthenticAMD" + assert resp["cpu"]["load_average"] == {"load1": 1.0, "load15": 3.0, "load5": 2.0} + assert resp["cpu"]["count"] == 200 From f22c753e526fd8f79fa15a536805a8a985da6776 Mon Sep 17 00:00:00 2001 From: Olivier Le Thanh Duong Date: Thu, 2 May 2024 11:37:03 +0200 Subject: [PATCH 9/9] isort --- tests/supervisor/test_views.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/supervisor/test_views.py b/tests/supervisor/test_views.py index 15b0c995d..60fa9578d 100644 --- a/tests/supervisor/test_views.py +++ b/tests/supervisor/test_views.py @@ -1,4 +1,5 @@ from unittest import mock + import pytest from aiohttp import web