Start
1: createdThree modes. Pick based on how the target should change over time, not on how the current batch happens to look.
Use append when each run only adds rows to the target.
append with validationMode=warnAndFilter and a target-side uniqueness rule.df.write .format("pgstyx") .option("url", "jdbc:postgresql://localhost:5432/warehouse") .option("dbtable", "events") .option("user", "postgres") .option("password", "secret") .option("writeMode", "append") .save()df.write \ .format("pgstyx") \ .option("url", "jdbc:postgresql://localhost:5432/warehouse") \ .option("dbtable", "events") \ .option("user", "postgres") \ .option("password", "secret") \ .option("writeMode", "append") \ .save()CREATE TEMP VIEW incoming_events USING json OPTIONS (path 'path/to/events.json');
CREATE TABLE events_append USING pgstyxOPTIONS ( url 'jdbc:postgresql://localhost:5432/warehouse', dbtable 'events', user 'postgres', password 'secret', writeMode 'append') AS SELECT * FROM incoming_events;Example progression:
Start
1: createdAppend run 1
Incoming: 2: paid
Target after run: 1: created, 2: paid
Append run 2
Incoming: 3: shipped
Target after run: 1: created, 2: paid, 3: shipped
Use upsert when rows should be created or refreshed based on a stable key.
mergeKeys is required.mergeKeys throws IllegalArgumentException('mergekeys required for upsert mode').df.write .format("pgstyx") .option("url", "jdbc:postgresql://localhost:5432/warehouse") .option("dbtable", "users") .option("user", "postgres") .option("password", "secret") .option("writeMode", "upsert") .option("mergeKeys", "user_id") .save()df.write \ .format("pgstyx") \ .option("url", "jdbc:postgresql://localhost:5432/warehouse") \ .option("dbtable", "users") \ .option("user", "postgres") \ .option("password", "secret") \ .option("writeMode", "upsert") \ .option("mergeKeys", "user_id") \ .save()CREATE TEMP VIEW incoming_users USING json OPTIONS (path 'path/to/users.json');
CREATE TABLE users_upsert USING pgstyxOPTIONS ( url 'jdbc:postgresql://localhost:5432/warehouse', dbtable 'users', user 'postgres', password 'secret', writeMode 'upsert', mergeKeys 'user_id') AS SELECT * FROM incoming_users;Example progression:
Start
1: alice@old.exampleUpsert run 1
Upsert run 2
Incoming: 2: bob@example
Target after run: 1: alice@new.example, 2: bob@example
Use overwrite for snapshots, full refreshes, or compacted rebuilds.
df.write .format("pgstyx") .option("url", "jdbc:postgresql://localhost:5432/warehouse") .option("dbtable", "events") .option("user", "postgres") .option("password", "secret") .option("writeMode", "overwrite") .save()df.write \ .format("pgstyx") \ .option("url", "jdbc:postgresql://localhost:5432/warehouse") \ .option("dbtable", "events") \ .option("user", "postgres") \ .option("password", "secret") \ .option("writeMode", "overwrite") \ .save()CREATE TEMP VIEW latest_snapshot USING json OPTIONS (path 'path/to/snapshot.json');
CREATE TABLE events_overwrite USING pgstyxOPTIONS ( url 'jdbc:postgresql://localhost:5432/warehouse', dbtable 'events', user 'postgres', password 'secret', writeMode 'overwrite') AS SELECT * FROM latest_snapshot;Example progression:
Start
Target: jan, feb, mar
Overwrite run 1
Incoming: apr, may
Target after run: apr, may
Overwrite run 2
junjunIf dbtable does not exist when the write starts, PGStyx creates it from the DataFrame schema.
writeMode=upsert uses mergeKeys as the primary key.writeMode=append has no primary key by default.schemaEvolution rules.| Mode | Best fit |
|---|---|
append | Event ingest, raw landing, additive feeds |
upsert | Dimension sync, idempotent pipelines, record-identity updates |
overwrite | Snapshots, full rebuilds, compacted tables |