Wednesday, January 16, 2008

Mining Fact tables in PPS databases

The following is a simple script that was developed by a team member to help dig into PPS fact tables. When data is validation necessary, or when enhancing PPS with real-time extensions, it is often necessary to see/use data in a PPS fact table prior to joining dimensions. The attached view takes the fact table name (automatically prefixes the MG_) and generates the SELECT with appropriate JOINs to the dimensions. This eliminates the repetitive and error prone SQL generation from fact view building.
“As-is” code, but it has been tested and provides some depth:



/****** Object: StoredProcedure [dbo].[spUTIL_CreateFactView] Script Date: 01/16/2008 04:56:27 ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE PROCEDURE [dbo].[spUTIL_CreateFactView] (@TableNameID nvarchar(10), @JoinType nvarchar(10) = 'INNER', @debug int = 0)
AS
-- Create view to PPS Fact table procedure, for diagnostic and real time extension use
-- Jim Lacenski, Solver Inc.
-- supply fact table name (no MG_) as parameter
-- customize / tailor result set SQL as needed
BEGIN
DECLARE @ViewName varchar(30), @FactTable varchar(80), @CrLf char(2)
DECLARE @sSQL varchar(3000), @sSQLRow varchar(100)
If OBJECT_ID('tempdb..#ColumnList') is not null DROP TABLE #ColumnList
If OBJECT_ID('tempdb..#CompleteSQL') is not null DROP TABLE #CompleteSQL

SET NOCOUNT ON
CREATE TABLE #ColumnList
( [ID] [int] IDENTITY(1,1),
SrcCol varchar(40),
ColType varchar(2),
DimTable varchar(40),
DimLabel varchar(40),
AliasName varchar(3),
JOINclause varchar (140)
)

CREATE TABLE #CompleteSQL
(
[ID] [int] IDENTITY(1,1),
sSQL varchar(200)
)
-- get fact table name
SELECT @FactTable = TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME LIKE 'MG_' + @TableNameID + '%'
PRINT @FactTable
SET @sSQL = 'SELECT TOP 5000'
SET @CrLf = CHAR(13)
INSERT INTO #CompleteSQL (sSQL) VALUES (@sSQL)

-- get dimensions
INSERT INTO #ColumnList (SrcCol, ColType, DimTable)
SELECT LEFT(COLUMN_NAME,CHARINDEX('_MemberId',COLUMN_NAME)-1), 'D',
'D_' + LEFT(COLUMN_NAME,CHARINDEX('_MemberId',COLUMN_NAME)-1)
FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = @FactTable
AND COLUMN_NAME LIKE '%_MemberID'

-- add time dimension
INSERT INTO #ColumnList (SrcCol, ColType)
SELECT COLUMN_NAME,'T'
FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = @FactTable
AND COLUMN_NAME LIKE 'Time_%' AND NOT COLUMN_NAME LIKE '%_MemberId'

-- create alias names, column names and JOIN clauses
UPDATE #ColumnList SET AliasName = 'D' + cast([ID] as nvarchar(2))
UPDATE #ColumnList SET DimLabel = AliasName + '.Label [' + SrcCol + ']'
WHERE ColType = 'D'
UPDATE #ColumnList SET JOINclause = @JoinType + ' JOIN '+ DimTable + ' ' + AliasName + ' ON Fct.[' + SrcCol + '_MemberId] = ' + AliasName + '.MemberId'
WHERE ColType = 'D'
UPDATE #ColumnList SET DimLabel = SrcCol
WHERE ColType = 'T'

IF @Debug = 1 SELECT * from #ColumnList

-- iterate through and build sql
DECLARE Col_cursor CURSOR FOR SELECT DimLabel FROM #ColumnList
OPEN Col_cursor
FETCH NEXT FROM Col_cursor INTO @sSQLRow
WHILE @@FETCH_STATUS = 0
BEGIN
SET @sSQL = @sSQLRow + ', ' + @CrLf
INSERT INTO #CompleteSQL (sSQL) VALUES(@sSQL)
FETCH NEXT FROM Col_cursor INTO @sSQLRow
END
CLOSE Col_cursor

-- trim last row
UPDATE #CompleteSQL SET sSQL = LEFT(sSQL,LEN(sSQL)-3)
WHERE [ID] = (SELECT MAX([ID]) FROM #CompleteSQL)

INSERT INTO #CompleteSQL (sSQL) VALUES(' FROM [' + @FactTable + '] Fct ')

-- Insert JOIN clauses
DECLARE JOIN_cursor CURSOR FOR SELECT JOINclause FROM #ColumnList
OPEN JOIN_cursor
FETCH NEXT FROM JOIN_cursor INTO @sSQLRow
WHILE @@FETCH_STATUS = 0
BEGIN
SET @sSQL = @sSQLRow + @CrLf
INSERT INTO #CompleteSQL (sSQL) VALUES(@sSQL)
FETCH NEXT FROM JOIN_cursor INTO @sSQLRow
END
CLOSE JOIN_cursor

-- preseed WHERE
INSERT INTO #CompleteSQL (sSQL) VALUES('WHERE 1 = 1')
-- show results
IF @Debug = 0 SELECT sSQL FROM #CompleteSQL WHERE sSQL is not NULL

END

No comments: