SQLServerで大量のReplaceを実行する

はじめに

SQLServer上のテーブルに対して大量のReplaceを実行する必要があったので、その方法を調査しました。

テストデータ作成

以下のようなテストデータを作成します。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
CREATE TABLE SampleTable (
ID INT PRIMARY KEY IDENTITY(1,1),
名前 NVARCHAR(50),
年齢 INT,
性別 NVARCHAR(10),
住所 NVARCHAR(100),
電話番号 NVARCHAR(20),
メールアドレス NVARCHAR(100),
登録日 DATE,
更新日 DATE,
会社名 NVARCHAR(100),
部署 NVARCHAR(50),
役職 NVARCHAR(50),
給与 DECIMAL(10, 2),
契約期間 NVARCHAR(50),
メモ NVARCHAR(500),
プロジェクト NVARCHAR(100),
ステータス NVARCHAR(20),
コメント NVARCHAR(500),
承認者 NVARCHAR(50),
承認日 DATE
);

テストデータ挿入

以下のようなテストデータを挿入します。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
WITH Numbers AS (
SELECT TOP (1000000) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS n
FROM sys.all_columns a CROSS JOIN sys.all_columns b
),
RandomChars AS (
SELECT
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS id,
NCHAR(0x3040 + ABS(CHECKSUM(NEWID())) % 85) AS Char1,
NCHAR(0x30A0 + ABS(CHECKSUM(NEWID())) % 90) AS Char2,
NCHAR(0xD83C) + NCHAR(0xDF00 + ABS(CHECKSUM(NEWID())) % 0x100) AS Char3,
NCHAR(0x4E00 + ABS(CHECKSUM(NEWID())) % 20902) AS Char4
FROM sys.all_columns a CROSS JOIN sys.all_columns b
)

INSERT INTO SampleTable (名前, 年齢, 性別, 住所, 電話番号, メールアドレス, 登録日, 更新日, 会社名, 部署, 役職, 給与, 契約期間, メモ, プロジェクト, ステータス, コメント, 承認者, 承認日)
SELECT TOP (1000000)
(SELECT TOP (2 + ABS(CHECKSUM(NEWID())) % 9)
RC.Char1 + RC.Char2 + RC.Char3 + RC.Char4
FROM RandomChars AS RC
WHERE RC.id = N.n
ORDER BY NEWID()
FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)') AS 名前,
ABS(CHECKSUM(NEWID())) % 100 AS 年齢,
CASE ABS(CHECKSUM(NEWID())) % 2 WHEN 0 THEN N'男性' ELSE N'女性' END AS 性別,
N'ランダム住所' + CAST(n AS NVARCHAR(10)) AS 住所,
N'090-' + RIGHT('0000' + CAST(ABS(CHECKSUM(NEWID())) % 10000 AS NVARCHAR(4)), 4) + '-' + RIGHT('0000' + CAST(ABS(CHECKSUM(NEWID())) % 10000 AS NVARCHAR(4)), 4) AS 電話番号,
N'random' + CAST(n AS NVARCHAR(10)) + N'@example.com' AS メールアドレス,
DATEADD(DAY, ABS(CHECKSUM(NEWID())) % 3650, '2000-01-01') AS 登録日,
DATEADD(DAY, ABS(CHECKSUM(NEWID())) % 3650, '2010-01-01') AS 更新日,
N'会社名' + CAST(n AS NVARCHAR(10)) AS 会社名,
N'部署' + CAST(n AS NVARCHAR(10)) AS 部署,
N'役職' + CAST(n AS NVARCHAR(10)) AS 役職,
CAST(ABS(CHECKSUM(NEWID())) % 100000 AS DECIMAL(10, 2)) AS 給与,
N'契約期間' + CAST(n AS NVARCHAR(10)) AS 契約期間,
N'メモ' + CAST(n AS NVARCHAR(10)) AS メモ,
N'プロジェクト' + CAST(n AS NVARCHAR(10)) AS プロジェクト,
CASE ABS(CHECKSUM(NEWID())) % 3 WHEN 0 THEN N'進行中' WHEN 1 THEN N'完了' ELSE N'保留' END AS ステータス,
N'コメント' + CAST(n AS NVARCHAR(10)) AS コメント,
N'承認者' + CAST(n AS NVARCHAR(10)) AS 承認者,
DATEADD(DAY, ABS(CHECKSUM(NEWID())) % 3650, '2020-01-01') AS 承認日
FROM Numbers AS N
CROSS APPLY (
SELECT TOP (2 + ABS(CHECKSUM(NEWID())) % 9)
NCHAR(0x3040 + ABS(CHECKSUM(NEWID())) % 85) +
NCHAR(0x30A0 + ABS(CHECKSUM(NEWID())) % 90) +
NCHAR(0xD83C) + NCHAR(0xDF00 + ABS(CHECKSUM(NEWID())) % 0x100) +
NCHAR(0x4E00 + ABS(CHECKSUM(NEWID())) % 20902) as x1
FROM RandomChars
) AS NameXML(x1);
****
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
DECLARE @RowCount INT = 1;
DECLARE @TotalRows INT = 100000;
DECLARE @BatchSize INT = 2000;

WHILE @RowCount <= @TotalRows
BEGIN
WITH Numbers AS (
SELECT
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS n
FROM sys.all_columns a CROSS JOIN sys.all_columns b
),
RandomChars AS (
SELECT
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS id,
NCHAR(0x3040 + ABS(CHECKSUM(NEWID())) % 85) AS Char1,
NCHAR(0x30A0 + ABS(CHECKSUM(NEWID())) % 90) AS Char2,
NCHAR(0xD83C) + NCHAR(0xDF00 + ABS(CHECKSUM(NEWID())) % 0x100) AS Char3,
NCHAR(0x4E00 + ABS(CHECKSUM(NEWID())) % 20902) AS Char4
FROM sys.all_columns a CROSS JOIN sys.all_columns b
),
CTE AS (
SELECT
N.n,
(SELECT TOP (2 + ABS(CHECKSUM(NEWID())) % 9)
RC.Char1 + RC.Char2 + RC.Char3 + RC.Char4
FROM RandomChars AS RC
WHERE RC.id = N.n
ORDER BY NEWID()
FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)') AS 名前,
ABS(CHECKSUM(NEWID())) % 100 AS 年齢,
CASE ABS(CHECKSUM(NEWID())) % 2 WHEN 0 THEN N'男性' ELSE N'女性' END AS 性別,
N'ランダム住所' + CAST(n AS NVARCHAR(10)) AS 住所,
N'090-' + RIGHT('0000' + CAST(ABS(CHECKSUM(NEWID())) % 10000 AS NVARCHAR(4)), 4) + '-' + RIGHT('0000' + CAST(ABS(CHECKSUM(NEWID())) % 10000 AS NVARCHAR(4)), 4) AS 電話番号,
N'random' + CAST(n AS NVARCHAR(10)) + N'@example.com' AS メールアドレス,
DATEADD(DAY, ABS(CHECKSUM(NEWID())) % 3650, '2000-01-01') AS 登録日,
DATEADD(DAY, ABS(CHECKSUM(NEWID())) % 3650, '2010-01-01') AS 更新日,
N'会社名' + CAST(n AS NVARCHAR(10)) AS 会社名,
N'部署' + CAST(n AS NVARCHAR(10)) AS 部署,
N'役職' + CAST(n AS NVARCHAR(10)) AS 役職,
CAST(ABS(CHECKSUM(NEWID())) % 100000 AS DECIMAL(10, 2)) AS 給与,
N'契約期間' + CAST(n AS NVARCHAR(10)) AS 契約期間,
N'メモ' + CAST(n AS NVARCHAR(10)) AS メモ,
N'プロジェクト' + CAST(n AS NVARCHAR(10)) AS プロジェクト,
CASE ABS(CHECKSUM(NEWID())) % 3 WHEN 0 THEN N'進行中' WHEN 1 THEN N'完了' ELSE N'保留' END AS ステータス,
N'コメント' + CAST(n AS NVARCHAR(10)) AS コメント,
N'承認者' + CAST(n AS NVARCHAR(10)) AS 承認者,
DATEADD(DAY, ABS(CHECKSUM(NEWID())) % 3650, '2020-01-01') AS 承認日,
ROW_NUMBER() OVER (ORDER BY N.n) AS RowNumber
FROM Numbers AS N
)

INSERT INTO SampleTable (名前, 年齢, 性別, 住所, 電話番号, メールアドレス, 登録日, 更新日, 会社名, 部署, 役職, 給与, 契約期間, メモ, プロジェクト, ステータス, コメント, 承認者, 承認日)
SELECT
名前, 年齢, 性別, 住所, 電話番号, メールアドレス, 登録日, 更新日, 会社名, 部署, 役職, 給与, 契約期間, メモ, プロジェクト, ステータス, コメント, 承認者, 承認日
FROM CTE
WHERE RowNumber BETWEEN @RowCount AND @RowCount + @BatchSize - 1;

SET @RowCount += @BatchSize;

PRINT 'Inserted ' + CAST(@RowCount - 1 AS NVARCHAR(10)) + ' rows';
END;

Replace実行

以下のようなReplaceを実行します。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
DECLARE @RowCount INT = 1;
DECLARE @TotalRows INT = (SELECT COUNT(*) FROM SampleTable);
DECLARE @BatchSize INT = 2000;

WHILE @RowCount <= @TotalRows
BEGIN
BEGIN TRAN;

WITH CTE AS (
SELECT
*,
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS RowNumber
FROM SampleTable
)
UPDATE CTE
SET 名前 = REPLACE(名前, 'あ', '')
WHERE 名前 LIKE '%あ%'
AND RowNumber BETWEEN @RowCount AND @RowCount + @BatchSize - 1;

UPDATE CTE
SET 名前 = REPLACE(名前, 'い', '')
WHERE 名前 LIKE '%い%'
AND RowNumber BETWEEN @RowCount AND @RowCount + @BatchSize - 1;

UPDATE CTE
SET 名前 = REPLACE(名前, 'う', '')
WHERE 名前 LIKE '%う%'
AND RowNumber BETWEEN @RowCount AND @RowCount + @BatchSize - 1;

ROLLBACK;

SET @RowCount += @BatchSize;
END;
You forgot to set the business or currency_code for Paypal. Please set it in _config.yml.

コメント

You forgot to set the shortname for Disqus. Please set it in _config.yml.
You need to set client_id and slot_id to show this AD unit. Please set it in _config.yml.