Add offlinepi

astral-sh · Dec 11, 2023 · c4cb290 · c4cb290
1 parent 2ecac4b
commit c4cb290
Show file tree

Hide file tree

Showing 7 changed files with 230 additions and 0 deletions.
diff --git a/scripts/offlinepi/README.md b/scripts/offlinepi/README.md
@@ -0,0 +1,50 @@
+# offlinepi
+
+Utilities for managing an offline version of PyPI.
+
+## Usage
+
+Record PyPI responses during a command:
+
+```
+./offlinepi record <command>
+```
+
+Replay PyPI responses during a command:
+
+```
+./offlinepi replay <command>
+```
+
+### Example
+
+Record server interactions during Puffin's tests:
+
+```
+./offlinepi record cargo test --features pypi -- --test-threads=1
+```
+
+**Note**: Recording tests without parallelism is helpful for reliable replays.
+
+Then, run it again using replayed responses:
+
+```
+./offlinepi replay cargo test --features pypi
+```
+
+## TLS Certificates
+
+In order to record HTTPS requests, the certificate generated by mitmproxy must be installed.
+See [the mitmproxy certificate documentation](https://docs.mitmproxy.org/stable/concepts-certificates/) for details.
+
+## Implementation
+
+[mitmproxy](https://mitmproxy.org/) is used to record and replay responses.
+
+The proxy is temporarily created for the execution of the provided command.
+
+The command _must_ respect the `HTTP_PROXY` and `HTTPS_PROXY` environment variables.
+
+Response recording is limited to `pypi.org` and `files.pythonhosted.org`.
+
+Responses are written to `responses.dat` in the `offlinepi` project root.
diff --git a/scripts/offlinepi/offlinepi b/scripts/offlinepi/offlinepi
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+#
+# Run a command, recording or replaying interaction with the PyPI server.
+#
+# Usage:
+#
+# offlinepi <record|replay> <command>
+#
+
+projectroot=$(realpath "$(dirname "$0")")
+responsefile=$projectroot/responses.dat
+
+mode=$1
+shift
+
+if [ -z "$mode" ]; then
+ echo 'A mode must be provided e.g. `offlinepi record ...`'
+ exit 1
+fi
+
+if [[ "${mode}" != @(record|replay) ]]; then
+ echo "Invalid mode \"$mode\"; expected either \"record\" or \"replay\"."
+ exit 1
+fi
+
+if $projectroot/offlinepi-healthcheck; then
+ echo "Proxy is already running at localhost:8080"
+ echo "Aborted!"
+ exit 1
+fi
+
+echo "Starting proxy server to $mode responses..."
+$projectroot/offlinepi-$mode $responsefile&
+PROXY_PID=$!
+
+if ! $projectroot/offlinepi-wait $PROXY_PID; then
+ echo "Server failed to start!"
+ echo "Aborted!"
+ $projectroot/offlinepi-stop $PROXY_PID
+ exit 1
+fi
+
+export HTTP_PROXY=http://localhost:8080
+export HTTPS_PROXY=https://localhost:8080
+
+echo "Running provided command..."
+"$@"
+
+echo "Stopping proxy server..."
+$projectroot/offlinepi-stop $PROXY_PID
diff --git a/scripts/offlinepi/offlinepi-healthcheck b/scripts/offlinepi/offlinepi-healthcheck
@@ -0,0 +1,12 @@
+#!/usr/bin/env sh
+#
+# Checks if the proxy is running.
+#
+# Usage:
+#
+# offlinepi-healthcheck
+
+exec curl --output /dev/null --silent --head --fail --proxy 127.0.0.1:8080 http://mitm.it
+
+# TODO(zanieb): We could consider looking at the response to determine if a _different_ proxy is being used.
+# TODO(zanieb): This could take a configurable host and port
diff --git a/scripts/offlinepi/offlinepi-record b/scripts/offlinepi/offlinepi-record
@@ -0,0 +1,34 @@
+#!/usr/bin/env bash
+#
+# Start a proxy that records client server interactions to a file.
+#
+# Usage:
+#
+# offlinepi-record <path>
+
+path=$1
+shift
+
+if [ -z "$path" ]; then
+ echo 'A recording path must be provided.'
+ exit 1
+fi
+
+if [ ! -z "$*" ]; then
+ echo "Unexpected extra arguments: $*"
+ exit 1
+fi
+
+# Remove the file before starting
+rm $path 2> /dev/null
+
+# N.B. Additional options must be added _before_ the filter string
+exec mitmdump \
+ -w $path \
+ --set stream_large_bodies=1000m \
+ "~d pypi.org|files.pythonhosted.org|mitm.it"
+
+# stream_large_bodies: must be set to a large value or large responses will not be recorded
+# resulting in an unexpected file endings during replays
+# ~d: only interactions with package index domains should be recorded
+# we also allow `mitm.it` so healthchecks succeed when replaying
diff --git a/scripts/offlinepi/offlinepi-replay b/scripts/offlinepi/offlinepi-replay
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+#
+# Start a proxy that replays server responses from a recording.
+# Unknown responses will result in a 500.
+# Each response can only be replayed once or it will be treated as unknown.
+#
+# Usage:
+#
+# offlinepi-start-replay <path>
+
+path=$1
+shift
+
+if [ -z "$path" ]; then
+ echo 'A recording path must be provided.'
+ exit 1
+fi
+
+if [ ! -z "$*" ]; then
+ echo "Unexpected extra arguments: $*"
+ exit 1
+fi
+
+exec mitmdump --server-replay $path \
+ --server-replay-extra 500 \
+ --set connection_strategy=lazy
+
+# server-replay-extra: configures behavior when a response is unknown.
+# connection_stategy: lazy is required to replay offline
diff --git a/scripts/offlinepi/offlinepi-stop b/scripts/offlinepi/offlinepi-stop
@@ -0,0 +1,24 @@
+#!/usr/bin/env sh
+#
+# Stops the proxy at the given PID.
+#
+# Usage:
+#
+# offlinepi-stop <pid>
+
+pid=$1
+shift
+
+if [ -z "$pid" ]; then
+ echo 'A PID must be provided.'
+ exit 1
+fi
+
+if [ ! -z "$*" ]; then
+ echo "Unexpected extra arguments: $*"
+ exit 1
+fi
+
+kill $pid 2> /dev/null
+wait $pid 2> /dev/null
+echo "Done!"
diff --git a/scripts/offlinepi/offlinepi-wait b/scripts/offlinepi/offlinepi-wait
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+#
+# Waits for the proxy to be ready.
+#
+# Usage:
+#
+# offlinepi-wait-ready <pid>
+
+projectroot=$(realpath "$(dirname "$0")")
+
+pid=$1
+shift
+
+if [ -z "$pid" ]; then
+ echo 'A PID must be provided.'
+ exit 1
+fi
+
+if [ ! -z "$*" ]; then
+ echo "Unexpected extra arguments: $*"
+ exit 1
+fi
+
+
+# Wait until the server is ready
+until $($projectroot/offlinepi-healthcheck); do
+ if ! kill -0 $pid 2> /dev/null; then
+ exit 1
+ fi
+ sleep 1
+done