Tuesday, August 4, 2015
SQL Server-- Row Logic Manipulation
In SQL sometime we are asked to compare data from two or more rows and manipulate data to meet certain business requirements.
Here I am going to show one common business requirement we see in different industries such as sales and marketing, healthcare, etc
Let's look at this transcational data in our sample table
Here you see that for ID 1, there are three rows, for ID 2, there are 3 rows and for ID 3, there is 1 rows.
Now say that your business requirement ask you that get the first Visit_Date and last Purchase_Date as one row. Whenever there is more than 3 day gap, treat them as new data set.
So your final result should look like this
Let's see how we can achieve this
--- Sample Data
-- Create following table to illustrate date tranformation
--Drop table dbo.sample_Data
Create Table dbo.sample_Data
( ID int
, Visit_Date Date
, Purchase_Date Date)
---INSERT sample Data
INSERT INTO dbo.sample_Data (ID, Visit_Date, Purchase_Date) VALUES (1, '2015-06-01','2015-06-02')
INSERT INTO dbo.sample_Data (ID, Visit_Date, Purchase_Date) VALUES (1, '2015-06-02','2015-06-03')
INSERT INTO dbo.sample_Data (ID, Visit_Date, Purchase_Date) VALUES (1, '2015-06-03','2015-06-04')
INSERT INTO dbo.sample_Data (ID, Visit_Date, Purchase_Date) VALUES (2, '2015-06-01','2015-06-02')
INSERT INTO dbo.sample_Data (ID, Visit_Date, Purchase_Date) VALUES (2, '2015-06-06','2015-06-07')
INSERT INTO dbo.sample_Data (ID, Visit_Date, Purchase_Date) VALUES (2, '2015-06-07','2015-06-08')
INSERT INTO dbo.sample_Data (ID, Visit_Date, Purchase_Date) VALUES (3, '2015-06-01','2015-06-02')
-- Check the data from table
SELECT * FROM dbo.sample_Data
- Now let's look data and what we are trying to do.
--Condition 1. In this case where ID = 1, we want one row of data with original Visit_Date and Last Purchase_Date here it is '2015-06-01' and 2015-06-04' as one row
-- Condition 2 if there are multiple different visit and purchase date, you want to get first visit_date and Last purchase_Date. If there is more then 3 day gap, you
--- want to have another row showing next visit_Date and Purchase_Date
-- ********************************
-- ==> STEP1: Create temp tables.
-- ********************************
-- Create the temp processing table.
-- Rows will be deleted from this table during merge process.
SELECT ROW_NUMBER () OVER (ORDER BY ID, Visit_Date) AS RowID, *
INTO #temp_Sample_Data
FROM dbo.sample_Data
-- Create table used for merging dates.
-- Initially, rows are associated with itself.
SELECT ID
, RowID AS RowID1
, Visit_Date As Visit_Date1
, Purchase_Date as Purchase_Date1
,RowID AS RowID2
, Visit_Date As Visit_Date2
, Purchase_Date as Purchase_Date2
INTO #tem_Sample_Date_Merge
FROM #temp_Sample_Data
-- *********************************************************
-- ==> STEP 2: Merge the records.
-- *********************************************************
DECLARE @MaxDayDifference INT =3
DECLARE @Continue INT = 1
BEGIN TRAN
WHILE (@Continue >0)
BEGIN
DELETE A
FROM #tem_Sample_Date_Merge AS A
INNER JOIN #tem_Sample_Date_Merge AS B
ON A.RowID2 = B.RowID2
AND A.RowID1 > B.RowID1
UPDATE A
SET RowID2 = B.RowID2
, Visit_Date2 = B.Visit_Date2
, Purchase_Date2 =B.Purchase_Date2
FROM #tem_Sample_Date_Merge AS A
INNER JOIN #tem_Sample_Date_Merge AS B
ON A.ID = B.ID
AND DATEDIFF(DAY, A.Purchase_Date2, B.Visit_Date1) <=@MaxDayDifference
AND B.RowID1 = (SELECT MIN(C.RowID1) FROM #tem_Sample_Date_Merge AS C
Where C.ID = B.ID
AND C.RowID1> A.RowID1)
SET @Continue = @@rowcount
END
COMMIT TRAN
SELECT A.ID
, A.Visit_Date
, C.Purchase_Date
FROM #temp_Sample_Data As A
INNER JOIN #tem_Sample_Date_Merge AS B
ON A.RowID = B.RowID1
INNER JOIN #temp_Sample_Data AS C
ON B.RowID2 = C.RowID
ORDER BY A.ID, A.Visit_Date
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment