Batching Data Changes

Batching data chances is encouraged when doing large UPDATE and DELETE statements which affect many rows.

Overview

The idea is that you

  1. Create #full_list and copy the primary ID into this table from prod_table
  2. Define the batch_size, example 1000 and other variables such as the number_of_batches
  3. Loop WHILE @rows_exist = 1
  4. Insert into #temp_batch by selecting TOP(@batch_size)
  5. Perform the UPDATE or DELETE
    1. on prod_table by joining to #temp_batch
    2. DELETE FROM #temp_full_list
  6. Set @rows_exist as infinite loops are frowned upon. Apparently.

Roll Backs

A simple way to roll the data back is to create a new dbo.temp_random_data and insert the at very least the id so you can join back to it. This is really for an UPDATE as DELETE should trigger auditing. You do audit deletes in your database right? :)

SQL

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
USE hoe_database;
SET NOCOUNT ON;

BEGIN
SET LOCK_TIMEOUT 1000;

IF (OBJECT_ID('[dbo].[temp_random_data]') IS NULL)
BEGIN
PRINT('CREATING temp_random_data')
CREATE TABLE dbo.temp_random_data (
[id] [int] IDENTITY(1,1) NOT NULL,
[insert_date] [datetime] DEFAULT GETDATE(),
[id_random_data] [int] NOT NULL,
[done] [int] DEFAULT 0 );

ALTER TABLE dbo.temp_random_data ADD CONSTRAINT PK_temp_random_data_id PRIMARY KEY(id);
END
ELSE
PRINT 'ALREADY EXISTS temp_random_data'

INSERT INTO dbo.temp_random_data
(id_random_data, done)
SELECT id, 0 as done
FROM random_data (NOLOCK)
WHERE some_bit = 1

SET LOCK_TIMEOUT -1
END

DECLARE @batch_size INT = 1000
DECLARE @rows_to_update INT = (SELECT COUNT(1) FROM dbo.temp_random_data WITH (NOLOCK))
DECLARE @number_of_batches INT = (@rows_to_update / @batch_size) + 1
DECLARE @current_batch INT = 1
DECLARE @message VARCHAR(255)
DECLARE @rows_exist INT = 1

DROP TABLE IF EXISTS #temp_batch;
CREATE TABLE #temp_batch( id_random_data INT );

WHILE @rows_exist = 1
BEGIN
BEGIN TRY

INSERT #temp_batch
SELECT TOP (@batch_size) id_random_data
FROM dbo.temp_random_data (NOLOCK)
WHERE done = 0
ORDER BY id_random_data ASC;

--- This is the prod table
UPDATE random_data WITH (ROWLOCK)
SET some_bit = 0
FROM random_data AS rd
INNER JOIN #temp_batch
ON rd.id = #temp_batch.id_random_data;

--- This temp_ table can be deleted once done and you are happy with the change
UPDATE dbo.temp_random_data
SET done = 1
FROM dbo.temp_random_data AS trd
INNER JOIN #temp_batch
ON trd.id_random_data = #temp_batch.id_random_data;

TRUNCATE TABLE #temp_batch;

SET @message = 'Finished batch ' + CAST(@current_batch AS VARCHAR) + ' of ' + CAST(@number_of_batches AS VARCHAR) + '. ' + CAST(CAST(GETDATE() AS SMALLDATETIME) AS VARCHAR)
RAISERROR(@message, 10, 1);

SET @current_batch = @current_batch + 1;

END TRY
BEGIN CATCH
PRINT ERROR_MESSAGE();
BREAK;
END CATCH

IF NOT EXISTS (SELECT TOP 1 1 FROM dbo.temp_random_data WHERE done = 0)
SET @rows_exist = 0;

END

DROP TABLE IF EXISTS #temp_batch;