c# – npgsql: How to select multiple rows (with multiple column values) with npgsql in one command with a collection as a parameter?

I have two tables defined below, supplier_balances and supplier_balance_items (btw there is a 1[supplier_balance]:N[supplier_balance_items] relationship between the two):

CREATE TABLE IF NOT EXISTS sch_brand_payment_data_lake_proxy.supplier_balances (
/* id is here for joining purposes with items table, instead of joining with the 4 columns used for sake
   of making sure a record is deemed as unique */
  id                             bigserial NOT NULL,
  accounting_document            text      NOT NULL,
  accounting_document_type       text      NOT NULL,
  company_code                   text      NOT NULL,
  document_date_year             int4      NOT NULL,
  accounting_doc_created_by_user text,
  accounting_clerk               text,
  assignment_reference           text,
  document_reference_id          text,
  original_reference_document    text,
  payment_terms                  text,
  supplier                       text,
  supplier_name                  text,
  document_date                  timestamp,
  posting_date                   timestamp,
  net_due_date                   timestamp,
  created_on                     timestamp default NULL,
  modified_on                    timestamp default NULL,
  pushed_on                      timestamp default NULL,
  is_modified bool GENERATED ALWAYS AS (modified_on IS NOT NULL AND modified_on > created_on) STORED,
  is_pushed   bool GENERATED ALWAYS AS (pushed_on   IS NOT NULL AND pushed_on > modified_on)  STORED,
  CONSTRAINT supplier_balances_pkey   PRIMARY KEY (id),
  /* accounting_document being the field of the composite unique index -> faster querying */
  CONSTRAINT supplier_balances_unique UNIQUE (
     accounting_document,
     accounting_document_type,
     company_code,
     document_date_year)
);
/* Creating other indexes for querying of those as well */
CREATE INDEX IF NOT EXISTS supplier_balances_accounting_document_type_idx
ON sch_brand_payment_data_lake_proxy.supplier_balances (accounting_document_type);
CREATE INDEX IF NOT EXISTS supplier_balances_company_code_idx
ON sch_brand_payment_data_lake_proxy.supplier_balances (company_code);
CREATE INDEX IF NOT EXISTS supplier_balances_document_date_year_idx
ON sch_brand_payment_data_lake_proxy.supplier_balances (document_date_year);

CREATE TABLE IF NOT EXISTS sch_brand_payment_data_lake_proxy.supplier_balance_items
(
    supplier_balance_id             bigserial NOT NULL,
    posting_view_item               text      NOT NULL,
    posting_key                     text,
    amount_in_company_code_currency numeric,
    amount_in_transaction_currency  numeric,
    cash_discount_1_percent         numeric,
    cash_discount_amount            numeric,
    clearing_accounting_document    text,
    document_item_text              text,
    gl_account                      text,
    is_cleared                      bool,
    clearing_date                   timestamp,
    due_calculation_base_date       timestamp,
    /* uniqueness is basically the posting_view_item for a given supplier balance */
    CONSTRAINT supplier_balance_items_pkey PRIMARY KEY (supplier_balance_id, posting_view_item),
    /* 1(supplier balance):N(supplier balance items) */
    CONSTRAINT supplier_balance_items_fkey FOREIGN KEY (supplier_balance_id)
               REFERENCES sch_brand_payment_data_lake_proxy.supplier_balances (id)
               ON DELETE CASCADE
               ON UPDATE CASCADE
);

Note: I’m just filling up the columns that can’t be NULL for the sake of simplicity.

INSERT INTO 
sch_brand_payment_data_lake_proxy.supplier_balances 
(accounting_document, accounting_document_type, company_code, document_date_year)
VALUES 
('A', 'B', 'C', 0),
('A', 'B', 'C', 1),
('A', 'B', 'C', 2),
('A', 'B', 'C', 3),
('A', 'B', 'C', 4),
('A', 'B', 'C', 5)
RETURNING id;

Output:

INSERT INTO 
sch_brand_payment_data_lake_proxy.supplier_balance_items 
(supplier_balance_id, posting_view_item)
VALUES 
(1, 'A'),
(1, 'B'),
(3, 'A'),
(3, 'B'),
(2, 'A'),
(1, 'C');
SELECT 
    accounting_document, 
    accounting_document_type, 
    company_code, 
    document_date_year
FROM sch_brand_payment_data_lake_proxy.supplier_balances;

Output:

id accounting_document accounting_document_type company_code document_date_year
1 A B C 0
2 A B C 1
3 A B C 2
4 A B C 3
5 A B C 4
6 A B C 5
SELECT 
    supplier_balance_id,
    posting_view_item
FROM sch_brand_payment_data_lake_proxy.supplier_balance_items;

Output:

supplier_balance_id posting_view_item
1 A
1 B
3 A
3 B
2 A
1 C

Now if we want to select multiple values ​​in a JOIN we can perform in a raw SQL:

SELECT 
    id,
    accounting_document, 
    accounting_document_type, 
    company_code, 
    document_date_year, 
    posting_view_item
FROM sch_brand_payment_data_lake_proxy.supplier_balances
LEFT OUTER JOIN sch_brand_payment_data_lake_proxy.supplier_balance_items
ON supplier_balances.id = supplier_balance_items.supplier_balance_id
WHERE (accounting_document, accounting_document_type, company_code, document_date_year)
IN  (('A', 'B', 'C', 1), ('A', 'B', 'C', 2))

Output:

id accounting_document accounting_document_type company_code document_date_year posting_view_item
2 A B C 1 A
3 A B C 2 A

https://github.com/npgsql/npgsql/issues/1199

Now, when using npgsql in C#, reproducing the query just above is an easy feat:

using System.Data;

using Npgsql;

var connectionStringBuilder = new NpgsqlConnectionStringBuilder
{
    Host     = "localhost",
    Port     = 5432,
    Username = "brand_payment_migration",
    Password = "secret",
    Database = "brand_payment"
};
using var connection = new NpgsqlConnection(connectionStringBuilder.ToString());
connection.Open();
using var command = connection.CreateCommand();
command.CommandText = 
"SELECT id, accounting_document, accounting_document_type, company_code, document_date_year, posting_view_item " +
"FROM sch_brand_payment_data_lake_proxy.supplier_balances " +
"LEFT OUTER JOIN sch_brand_payment_data_lake_proxy.supplier_balance_items " +
"ON supplier_balances.id = supplier_balance_items.supplier_balance_id " +
"WHERE (accounting_document, accounting_document_type, company_code, document_date_year) " +
"IN (('A', 'B', 'C', 1), ('A', 'B', 'C', 2));";

using var reader = command.ExecuteReader();
using var dataTable = new DataTable();
dataTable.Load(reader);
var cols = dataTable.Columns.Cast<DataColumn>().ToArray();
Console.WriteLine(string.Join(Environment.NewLine, cols.Select((x, i) => $"Col{i} = {x}")));
Console.WriteLine(string.Join("t", cols.Select((_, i) => $"Col{i}")));
foreach (var dataRow in dataTable.Rows.Cast<DataRow>())
{
    Console.WriteLine(string.Join("t", dataRow.ItemArray));
}

which, as expected outputs:

Col0 = id
Col1 = accounting_document
Col2 = accounting_document_type
Col3 = company_code
Col4 = document_date_year
Col5 = posting_view_item
Col0    Col1    Col2    Col3    Col4    Col5
2       A       B       C       1       A
3       A       B       C       2       A
3       A       B       C       2       B

Now, what I would like to achieve is that, instead of passing a raw string for (('A', 'B', 'C', 1), ('A', 'B', 'C', 2));I would love to use a NpgSqlParameter with a collection of value set (ie for each column)).

So I’ve changed the C# snippet above and added the parameter

// ...
"WHERE (accounting_document, accounting_document_type, company_code, document_date_year) " +
"IN @values;";
var parameter = command.CreateParameter();
parameter.ParameterName = "@values";
parameter.NpgsqlDbType = NpgsqlDbType.Array;
parameter.NpgsqlValue = new object[,]
{
    { "A", "B", "C", 1 }, 
    { "A", "B", "C", 2 }
};
// Note: the same kind of issue arises when using tuples, i.e.
// ( "A", "B", "C", 1 )
// ( "A", "B", "C", 2 )
command.Parameters.Add(parameter);
using var reader = command.ExecuteReader();
// ...

Then I’m getting this exception:

Unhandled exception. System.ArgumentOutOfRangeException: Cannot set NpgsqlDbType to just Array, Binary-Or with the element type (e.g. Array of Box is NpgsqlDbType.Array | Npg
sqlDbType.Box). (Parameter 'value')
   at Npgsql.NpgsqlParameter.set_NpgsqlDbType(NpgsqlDbType value)
   at Program.<Main>$(String[] args) in C:Usersnatalie-perretDesktopPersonalplaygroundcsharpCSharpPlaygroundProgram.cs:line 25

Then I’ve tried to work around that error with:

parameter.NpgsqlDbType = NpgsqlDbType.Array | NpgsqlDbType.Unknown;

but then getting another exception:

Unhandled exception. System.ArgumentException: No array type could be found in the database for element .<unknown>
   at Npgsql.TypeMapping.ConnectorTypeMapper.ResolveByNpgsqlDbType(NpgsqlDbType npgsqlDbType)
   at Npgsql.NpgsqlParameter.ResolveHandler(ConnectorTypeMapper typeMapper)
   at Npgsql.NpgsqlParameterCollection.ValidateAndBind(ConnectorTypeMapper typeMapper)
   at Npgsql.NpgsqlCommand.ExecuteReader(CommandBehavior behavior, Boolean async, CancellationToken cancellationToken)
   at Npgsql.NpgsqlCommand.ExecuteReader(CommandBehavior behavior, Boolean async, CancellationToken cancellationToken)
   at Npgsql.NpgsqlCommand.ExecuteReader(CommandBehavior behavior)
   at Program.<Main>$(String[] args) in C:Usersnatalie-perretDesktopPersonalplaygroundcsharpCSharpPlaygroundProgram.cs:line 32

Seems that the type needs to be registered for some reason, actually if I don’t specify the type:

Unhandled exception. System.NotSupportedException: The CLR type System.Object isn't natively supported by Npgsql or your PostgreSQL. To use it with a PostgreSQL composite
 you need to specify DataTypeName or to map it, please refer to the documentation.
   at Npgsql.TypeMapping.ConnectorTypeMapper.ResolveByClrType(Type type)
   at Npgsql.TypeMapping.ConnectorTypeMapper.ResolveByClrType(Type type)
   at Npgsql.NpgsqlParameter.ResolveHandler(ConnectorTypeMapper typeMapper)
   at Npgsql.NpgsqlParameter.Bind(ConnectorTypeMapper typeMapper)
   at Npgsql.NpgsqlParameterCollection.ValidateAndBind(ConnectorTypeMapper typeMapper)
   at Npgsql.NpgsqlCommand.ExecuteReader(CommandBehavior behavior, Boolean async, CancellationToken cancellationToken)
   at Npgsql.NpgsqlCommand.ExecuteReader(CommandBehavior behavior, Boolean async, CancellationToken cancellationToken)
   at Npgsql.NpgsqlCommand.ExecuteReader(CommandBehavior behavior)
   at Program.<Main>$(String[] args) in C:Usersnatalie-perretDesktopPersonalplaygroundcsharpCSharpPlaygroundProgram.cs:line 31

[EDIT]

The temporary solution I’ve ended up with is to rely on the jsonb support and in particular the jsonb_to_recordset function (see the PostgreSQL documentation section about json functions):

using System.Data;
using System.Text.Json;

using Npgsql;
using NpgsqlTypes;


var connectionStringBuilder = new NpgsqlConnectionStringBuilder
{
    Host     = "localhost",
    Port     = 5432,
    Username = "brand_payment_migration",
    Password = "secret",
    Database = "brand_payment"
};
using var connection = new NpgsqlConnection(connectionStringBuilder.ToString());
connection.Open();
using var command = connection.CreateCommand();
command.CommandText = 
"SELECT id, accounting_document, accounting_document_type, company_code, document_date_year, posting_view_item " +
"FROM sch_brand_payment_data_lake_proxy.supplier_balances " +
"LEFT OUTER JOIN sch_brand_payment_data_lake_proxy.supplier_balance_items " +
"ON supplier_balances.id = supplier_balance_items.supplier_balance_id " +
"WHERE (accounting_document, accounting_document_type, company_code, document_date_year) " +
"IN (SELECT * FROM jsonb_to_recordset(@values) " +
"AS params (accounting_document text, accounting_document_type text, company_code text, document_date_year integer));";
var parameter = command.CreateParameter();
parameter.ParameterName = "@values";
parameter.NpgsqlDbType = NpgsqlDbType.Jsonb;
parameter.NpgsqlValue = JsonSerializer.Serialize(new []
{
    new Params("A", "B", "C", 1), 
    new Params("A", "B", "C", 2)
});
command.Parameters.Add(parameter);
using var reader = command.ExecuteReader();
using var dataTable = new DataTable();
dataTable.Load(reader);
var cols = dataTable.Columns.Cast<DataColumn>().ToArray();
Console.WriteLine(string.Join(Environment.NewLine, cols.Select((x, i) => $"Col{i} = {x}")));
Console.WriteLine(string.Join("t", cols.Select((_, i) => $"Col{i}")));
foreach (var dataRow in dataTable.Rows.Cast<DataRow>())
{
    Console.WriteLine(string.Join("t", dataRow.ItemArray));
}


public Params(
    string accounting_document, 
    string accounting_document_type,
    string company_code,
    int document_date_year);

Output:

Col0 = id
Col1 = accounting_document
Col2 = accounting_document_type
Col3 = company_code
Col4 = document_date_year
Col5 = posting_view_item
Col0    Col1    Col2    Col3    Col4    Col5
2       A       B       C       1       A
3       A       B       C       2       A
3       A       B       C       2       B

But this comes at the cost of adding an additional step of json serialization when passing the parameters. So other than that and building an insanely long string, I’m kinda puzzled at the fact there is no way to pass directly the actual values, without extra steps, to the NpgsqlParameter.NpgsqlValueproperty.

[Edit 2]

Adding a DbFiddle

[Edit 3]

The same jsonb “trick” can be used for feeding the data (albeit, same hiccupes I’ve already mentioned above):

INSERT INTO sch_brand_payment_data_lake_proxy.supplier_balances
    (accounting_document, accounting_document_type, company_code, document_date_year)
SELECT * FROM jsonb_to_recordset(
    '[{"accounting_document":"E","accounting_document_type":"B","company_code":"C","document_date_year":1},
      {"accounting_document":"E","accounting_document_type":"B","company_code":"C","document_date_year":2}]'::jsonb)
       AS params (accounting_document text, accounting_document_type text, company_code text, document_date_year integer)
RETURNING id;

Leave a Comment