Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BED-4194: PG Migrator Testing #582

Draft
wants to merge 19 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions cmd/api/src/api/tools/PG_MIGRATE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
## Migrating Graph Data from Neo4j to Postgres

### Endpoints
| Endpoint | HTTP Request | Usage | Expected Response |
| --- | --- | --- | --- |
| `/pg-migration/status/` | `GET` | Returns a status indicating whether the migrator is currently running. | **Status:** `200 OK`</br></br><pre>{</br>&nbsp;&nbsp;"state": "idle" \| "migrating" \| "canceling"</br>}</pre> |
| `/pg-migration/start/` | `PUT` | Kicks off the migration process from neo4j to postgres. | **Status:** `202 Accepted` |
| `/pg-migration/cancel/` | `PUT` | Cancels the currently running migration. | **Status:** `202 Accepted` |
| `/graph-db/switch/pg/` | `PUT` | Switches the current graph database driver to postgres. | **Status:** `200 OK` |
| `/graph-db/switch/ne04j/` | `PUT` | Switches the current graph database driver to ne04j. | **Status:** `200 OK` |

### Running a Migration
1. Confirm the migration status is currently "idle" before running a migration with the `/pg-migration/status/` endpoint. The migration will run in the same direction regardless of the currently selected graph driver.
2. Start the migration process using the `/pg-migration/start/` endpoint. Since the migration occurs asynchronously, you will want to monitor the API logs to see information regarding the currently running migration.
- When the migration starts, there should be a log with the message `"Dispatching live migration from Neo4j to PostgreSQL"`
- Upon completion, you should see the message `"Migration to PostgreSQL completed successfully"`
- Any errors that occur during the migration process will also surface here
- You can also poll the `/pg-migration/status/` endpoint and wait for an `"idle"` status to indicate the migration has completed
- An in-progess migration can be cancelled with the `pg-migration/cancel/` endpoint and run again at any time
3. Once you are ready to switch over to the postgres graph driver, you can use the `/graph-db/switch/pg/` endpoint.
97 changes: 55 additions & 42 deletions cmd/api/src/api/tools/pg.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,28 @@ package tools
import (
"context"
"fmt"
"net/http"
"sync"

"github.com/jackc/pgx/v5"
"github.com/neo4j/neo4j-go-driver/v5/neo4j/dbtype"
"github.com/specterops/bloodhound/dawgs"
"github.com/specterops/bloodhound/dawgs/drivers/neo4j"
"github.com/specterops/bloodhound/dawgs/drivers/pg"
"github.com/specterops/bloodhound/dawgs/drivers/pg/query"
"github.com/specterops/bloodhound/dawgs/graph"
"github.com/specterops/bloodhound/dawgs/util/size"
"github.com/specterops/bloodhound/log"
"github.com/specterops/bloodhound/src/api"
"github.com/specterops/bloodhound/src/config"
"net/http"
"sync"
)

type MigratorState string

const (
stateIdle MigratorState = "idle"
stateMigrating MigratorState = "migrating"
stateCanceling MigratorState = "canceling"
StateIdle MigratorState = "idle"
StateMigrating MigratorState = "migrating"
StateCanceling MigratorState = "canceling"
)

func migrateTypes(ctx context.Context, neoDB, pgDB graph.Database) error {
Expand Down Expand Up @@ -188,21 +191,21 @@ func migrateEdges(ctx context.Context, neoDB, pgDB graph.Database, nodeIDMapping
type PGMigrator struct {
graphSchema graph.Schema
graphDBSwitch *graph.DatabaseSwitch
serverCtx context.Context
ServerCtx context.Context
migrationCancelFunc func()
state MigratorState
State MigratorState
lock *sync.Mutex
cfg config.Configuration
Cfg config.Configuration
}

func NewPGMigrator(serverCtx context.Context, cfg config.Configuration, graphSchema graph.Schema, graphDBSwitch *graph.DatabaseSwitch) *PGMigrator {
return &PGMigrator{
graphSchema: graphSchema,
graphDBSwitch: graphDBSwitch,
serverCtx: serverCtx,
state: stateIdle,
ServerCtx: serverCtx,
State: StateIdle,
lock: &sync.Mutex{},
cfg: cfg,
Cfg: cfg,
}
}

Expand All @@ -213,29 +216,26 @@ func (s *PGMigrator) advanceState(next MigratorState, validTransitions ...Migrat
isValid := false

for _, validTransition := range validTransitions {
if s.state == validTransition {
if s.State == validTransition {
isValid = true
break
}
}

if !isValid {
return fmt.Errorf("migrator state is %s but expected one of: %v", s.state, validTransitions)
return fmt.Errorf("migrator state is %s but expected one of: %v", s.State, validTransitions)
}

s.state = next
s.State = next
return nil
}

func (s *PGMigrator) SwitchPostgreSQL(response http.ResponseWriter, request *http.Request) {
if pgDB, err := dawgs.Open(s.serverCtx, pg.DriverName, dawgs.Config{
GraphQueryMemoryLimit: size.Gibibyte,
DriverCfg: s.cfg.Database.PostgreSQLConnectionString(),
}); err != nil {
if pgDB, err := s.OpenPostgresGraphConnection(); err != nil {
api.WriteJSONResponse(request.Context(), map[string]any{
"error": fmt.Errorf("failed connecting to PostgreSQL: %w", err),
}, http.StatusInternalServerError, response)
} else if err := SetGraphDriver(request.Context(), s.cfg, pg.DriverName); err != nil {
} else if err := SetGraphDriver(request.Context(), s.Cfg, pg.DriverName); err != nil {
api.WriteJSONResponse(request.Context(), map[string]any{
"error": fmt.Errorf("failed updating graph database driver preferences: %w", err),
}, http.StatusInternalServerError, response)
Expand All @@ -248,14 +248,11 @@ func (s *PGMigrator) SwitchPostgreSQL(response http.ResponseWriter, request *htt
}

func (s *PGMigrator) SwitchNeo4j(response http.ResponseWriter, request *http.Request) {
if neo4jDB, err := dawgs.Open(s.serverCtx, neo4j.DriverName, dawgs.Config{
GraphQueryMemoryLimit: size.Gibibyte,
DriverCfg: s.cfg.Neo4J.Neo4jConnectionString(),
}); err != nil {
if neo4jDB, err := s.OpenNeo4jGraphConnection(); err != nil {
api.WriteJSONResponse(request.Context(), map[string]any{
"error": fmt.Errorf("failed connecting to Neo4j: %w", err),
}, http.StatusInternalServerError, response)
} else if err := SetGraphDriver(request.Context(), s.cfg, neo4j.DriverName); err != nil {
} else if err := SetGraphDriver(request.Context(), s.Cfg, neo4j.DriverName); err != nil {
api.WriteJSONResponse(request.Context(), map[string]any{
"error": fmt.Errorf("failed updating graph database driver preferences: %w", err),
}, http.StatusInternalServerError, response)
Expand All @@ -267,31 +264,27 @@ func (s *PGMigrator) SwitchNeo4j(response http.ResponseWriter, request *http.Req
}
}

func (s *PGMigrator) startMigration() error {
if err := s.advanceState(stateMigrating, stateIdle); err != nil {
func (s *PGMigrator) StartMigration() error {
if err := s.advanceState(StateMigrating, StateIdle); err != nil {
return fmt.Errorf("database migration state error: %w", err)
} else if neo4jDB, err := dawgs.Open(s.serverCtx, neo4j.DriverName, dawgs.Config{
GraphQueryMemoryLimit: size.Gibibyte,
DriverCfg: s.cfg.Neo4J.Neo4jConnectionString(),
}); err != nil {
} else if neo4jDB, err := s.OpenNeo4jGraphConnection(); err != nil {
return fmt.Errorf("failed connecting to Neo4j: %w", err)
} else if pgDB, err := dawgs.Open(s.serverCtx, pg.DriverName, dawgs.Config{
GraphQueryMemoryLimit: size.Gibibyte,
DriverCfg: s.cfg.Database.PostgreSQLConnectionString(),
}); err != nil {
} else if pgDB, err := s.OpenPostgresGraphConnection(); err != nil {
return fmt.Errorf("failed connecting to PostgreSQL: %w", err)
} else {
log.Infof("Dispatching live migration from Neo4j to PostgreSQL")

migrationCtx, migrationCancelFunc := context.WithCancel(s.serverCtx)
migrationCtx, migrationCancelFunc := context.WithCancel(s.ServerCtx)
s.migrationCancelFunc = migrationCancelFunc

go func(ctx context.Context) {
defer migrationCancelFunc()

log.Infof("Starting live migration from Neo4j to PostgreSQL")

if err := pgDB.AssertSchema(ctx, s.graphSchema); err != nil {
if err := dropCurrentGraphSchema(ctx, pgDB); err != nil {
log.Errorf("Unable to drop graph schema in PostgreSQL: %v", err)
} else if err := pgDB.AssertSchema(ctx, s.graphSchema); err != nil {
log.Errorf("Unable to assert graph schema in PostgreSQL: %v", err)
} else if err := migrateTypes(ctx, neo4jDB, pgDB); err != nil {
log.Errorf("Unable to migrate Neo4j kinds to PostgreSQL: %v", err)
Expand All @@ -303,7 +296,7 @@ func (s *PGMigrator) startMigration() error {
log.Infof("Migration to PostgreSQL completed successfully")
}

if err := s.advanceState(stateIdle, stateMigrating, stateCanceling); err != nil {
if err := s.advanceState(StateIdle, StateMigrating, StateCanceling); err != nil {
log.Errorf("Database migration state management error: %v", err)
}
}(migrationCtx)
Expand All @@ -313,7 +306,7 @@ func (s *PGMigrator) startMigration() error {
}

func (s *PGMigrator) MigrationStart(response http.ResponseWriter, request *http.Request) {
if err := s.startMigration(); err != nil {
if err := s.StartMigration(); err != nil {
api.WriteJSONResponse(request.Context(), map[string]any{
"error": err.Error(),
}, http.StatusInternalServerError, response)
Expand All @@ -322,8 +315,8 @@ func (s *PGMigrator) MigrationStart(response http.ResponseWriter, request *http.
}
}

func (s *PGMigrator) cancelMigration() error {
if err := s.advanceState(stateCanceling, stateMigrating); err != nil {
func (s *PGMigrator) CancelMigration() error {
if err := s.advanceState(StateCanceling, StateMigrating); err != nil {
return err
}

Expand All @@ -333,7 +326,7 @@ func (s *PGMigrator) cancelMigration() error {
}

func (s *PGMigrator) MigrationCancel(response http.ResponseWriter, request *http.Request) {
if err := s.cancelMigration(); err != nil {
if err := s.CancelMigration(); err != nil {
api.WriteJSONResponse(request.Context(), map[string]any{
"error": err.Error(),
}, http.StatusInternalServerError, response)
Expand All @@ -344,6 +337,26 @@ func (s *PGMigrator) MigrationCancel(response http.ResponseWriter, request *http

func (s *PGMigrator) MigrationStatus(response http.ResponseWriter, request *http.Request) {
api.WriteJSONResponse(request.Context(), map[string]any{
"state": s.state,
"state": s.State,
}, http.StatusOK, response)
}

func (s *PGMigrator) OpenPostgresGraphConnection() (graph.Database, error) {
return dawgs.Open(s.ServerCtx, pg.DriverName, dawgs.Config{
GraphQueryMemoryLimit: size.Gibibyte,
DriverCfg: s.Cfg.Database.PostgreSQLConnectionString(),
})
}

func (s *PGMigrator) OpenNeo4jGraphConnection() (graph.Database, error) {
return dawgs.Open(s.ServerCtx, neo4j.DriverName, dawgs.Config{
GraphQueryMemoryLimit: size.Gibibyte,
DriverCfg: s.Cfg.Neo4J.Neo4jConnectionString(),
})
}

func dropCurrentGraphSchema(ctx context.Context, db graph.Database) error {
return db.WriteTransaction(ctx, func(tx graph.Transaction) error {
return query.On(tx).DropSchema()
}, pg.OptionSetQueryExecMode(pgx.QueryExecModeSimpleProtocol))
}
Loading
Loading