1
votes

with Azure Data Factory V2, through the portal

https://adf.azure.com

I created a Pipeline for incremental copying of data from multiple tables, from one Azure SQL database to another Azure SQL database.

To create it, I have adapted the following example to my needs: Incrementally load data from multiple tables

Following is the json file related to the pipeline created:

{
"name": "IncrementalCopyPipeline",
"properties": {
    "activities": [
        {
            "name": "IterateSQLTables",
            "type": "ForEach",
            "typeProperties": {
                "items": {
                    "value": "@pipeline().parameters.tableList",
                    "type": "Expression"
                },
                "activities": [
                    {
                        "name": "LookupOldWaterMarkActivity",
                        "type": "Lookup",
                        "policy": {
                            "timeout": "7.00:00:00",
                            "retry": 0,
                            "retryIntervalInSeconds": 30,
                            "secureOutput": false,
                            "secureInput": false
                        },
                        "typeProperties": {
                            "source": {
                                "type": "SqlSource",
                                "sqlReaderQuery": {
                                    "value": "select * \nfrom watermarktable \nwhere TableName  =  '@{item().TABLE_NAME}'",
                                    "type": "Expression"
                                }
                            },
                            "dataset": {
                                "referenceName": "WatermarkDataset",
                                "type": "DatasetReference"
                            }
                        }
                    },
                    {
                        "name": "LookupNewWaterMarkActivity",
                        "type": "Lookup",
                        "policy": {
                            "timeout": "7.00:00:00",
                            "retry": 0,
                            "retryIntervalInSeconds": 30,
                            "secureOutput": false,
                            "secureInput": false
                        },
                        "typeProperties": {
                            "source": {
                                "type": "SqlSource",
                                "sqlReaderQuery": {
                                    "value": "select MAX(@{item().WaterMark_Column}) as NewWatermarkvalue \nfrom @{item().TABLE_NAME}",
                                    "type": "Expression"
                                }
                            },
                            "dataset": {
                                "referenceName": "SourceDataset",
                                "type": "DatasetReference"
                            }
                        }
                    },
                    {
                        "name": "IncrementalCopyActivity",
                        "type": "Copy",
                        "dependsOn": [
                            {
                                "activity": "LookupNewWaterMarkActivity",
                                "dependencyConditions": [
                                    "Succeeded"
                                ]
                            },
                            {
                                "activity": "LookupOldWaterMarkActivity",
                                "dependencyConditions": [
                                    "Succeeded"
                                ]
                            }
                        ],
                        "policy": {
                            "timeout": "7.00:00:00",
                            "retry": 0,
                            "retryIntervalInSeconds": 30,
                            "secureOutput": false,
                            "secureInput": false
                        },
                        "typeProperties": {
                            "source": {
                                "type": "SqlSource",
                                "sqlReaderQuery": {
                                    "value": "select * from @{item().TABLE_NAME} \nwhere @{item().WaterMark_Column} > '@{activity('LookupOldWaterMarkActivity').output.firstRow.WatermarkValue}' and @{item().WaterMark_Column} <= '@{activity('LookupNewWaterMarkActivity').output.firstRow.NewWatermarkvalue}'",
                                    "type": "Expression"
                                }
                            },
                            "sink": {
                                "type": "SqlSink",
                                "writeBatchSize": 10000,
                                "sqlWriterStoredProcedureName": {
                                    "value": "@{item().StoredProcedureNameForMergeOperation}",
                                    "type": "Expression"
                                },
                                "sqlWriterTableType": {
                                    "value": "@{item().TableType}",
                                    "type": "Expression"
                                }
                            },
                            "enableStaging": false,
                            "dataIntegrationUnits": 0
                        },
                        "inputs": [
                            {
                                "referenceName": "SourceDataset",
                                "type": "DatasetReference"
                            }
                        ],
                        "outputs": [
                            {
                                "referenceName": "SinkDataset",
                                "type": "DatasetReference",
                                "parameters": {
                                    "SinkTableName": "@{item().TABLE_NAME}"
                                }
                            }
                        ]
                    },
                    {
                        "name": "StoredProceduretoWriteWatermarkActivity",
                        "type": "SqlServerStoredProcedure",
                        "dependsOn": [
                            {
                                "activity": "IncrementalCopyActivity",
                                "dependencyConditions": [
                                    "Succeeded"
                                ]
                            }
                        ],
                        "policy": {
                            "timeout": "7.00:00:00",
                            "retry": 0,
                            "retryIntervalInSeconds": 30,
                            "secureOutput": false,
                            "secureInput": false
                        },
                        "typeProperties": {
                            "storedProcedureName": "[dbo].[sp_write_watermark]",
                            "storedProcedureParameters": {
                                "LastModifiedtime": {
                                    "value": {
                                        "value": "@{activity('LookupNewWaterMarkActivity').output.firstRow.NewWatermarkvalue}",
                                        "type": "Expression"
                                    },
                                    "type": "DateTime"
                                },
                                "TableName": {
                                    "value": {
                                        "value": "@{activity('LookupOldWaterMarkActivity').output.firstRow.TableName}",
                                        "type": "Expression"
                                    },
                                    "type": "String"
                                }
                            }
                        },
                        "linkedServiceName": {
                            "referenceName": "SqlServerLinkedService_dest",
                            "type": "LinkedServiceReference"
                        }
                    }
                ]
            }
        }
    ],
    "parameters": {
        "tableList": {
            "type": "Object",
            "defaultValue": [
                {
                    "TABLE_NAME": "customer_table",
                    "WaterMark_Column": "LastModifytime",
                    "TableType": "DataTypeforCustomerTable",
                    "StoredProcedureNameForMergeOperation": "sp_upsert_customer_table"
                },
                {
                    "TABLE_NAME": "project_table",
                    "WaterMark_Column": "Creationtime",
                    "TableType": "DataTypeforProjectTable",
                    "StoredProcedureNameForMergeOperation": "sp_upsert_project_table"
                }
            ]
        }
    }
}
}

In my table I have a column that distinguishes between different companies and so I would like to add another parameter to this pipeline. I have a table like this:

NAME    LASTMODIFY                 COMPANY
John    2015-01-01 00:00:00.000    1
Mike    2016-02-02 01:23:00.000    2
Andy    2017-03-04 05:16:00.000    3
Annie   2018-09-08 00:00:00.000    1

Someone would know how to insert a parameter into the pipeline in order to specify which company to copy and which one to not copy?

Does any suggestion? Thanks in advance to everyone!

1

1 Answers

0
votes

Not exactly clear on what you're asking, so apologies if I am missing the mark, but:

Copy allows for a stored procedure that you can use to potentially solve your problem. Take a look at this example: https://docs.microsoft.com/en-us/azure/data-factory/connector-sql-server#invoking-stored-procedure-for-sql-sink

It uses a Stored Procedure to MERGE performing an UPDATE or INSERT dependent on the JOIN matching. It also allows for parameters to be passed.

So if you are trying to COPY only certain cases based on a parameter, the MERGE join may help.