User Tools

Site Tools


notes:sql:techniques_treeanalysis

Tree Analysis

We present two techniques of analyzing tree-like data structures:

  • using a non-recursive procedure
  • using a recursive procedure

The first technique uses a non-recursive stored procedure to find the path and the level of each leaf in a tree-like table. Copy and execute the script:

-- Create a tree structure
IF OBJECT_ID('DataTree') IS NOT NULL DROP TABLE DataTree
GO
CREATE TABLE DataTree
(
    NodeID INT NOT NULL PRIMARY KEY,
    ParentID INT NOT NULL DEFAULT 0,
    [Name] VARCHAR(1000) NOT NULL DEFAULT ''
)
GO
 
-- Insert sample data
INSERT INTO DataTree VALUES(1,0,'Canada')
INSERT INTO DataTree VALUES(2,0,'Poland')
INSERT INTO DataTree VALUES(3,1,'Ontario')
INSERT INTO DataTree VALUES(4,3,'Toronto')
INSERT INTO DataTree VALUES(5,3,'Mississauga')
INSERT INTO DataTree VALUES(6,2,'Warszawa')
INSERT INTO DataTree VALUES(7,2,'Lublin')
INSERT INTO DataTree VALUES(8,1,'Quebec')
INSERT INTO DataTree VALUES(9,8,'Montreal')
INSERT INTO DataTree VALUES(10,4,'North York')
INSERT INTO DataTree VALUES(11,4,'Etobicoke')
INSERT INTO DataTree VALUES(12,6,'Ursus')
INSERT INTO DataTree VALUES(13,6,'Bielany')
 
 
IF OBJECT_ID('dbo.sp_ProcessTree') IS NOT NULL DROP PROC dbo.sp_ProcessTree
GO
 
CREATE PROC dbo.sp_ProcessTree
    @NodeID INT -- root node
AS
    SET NOCOUNT ON
 
    -- Create a flat structure to hold level numbers and paths
    CREATE TABLE #data
    (
        NodeID INT NOT NULL PRIMARY KEY,
        ParentID INT NOT NULL DEFAULT 0,
        [Name] VARCHAR(1000) NOT NULL DEFAULT '',
        [Path] VARCHAR(2000) NOT NULL DEFAULT '',
        [Level] INT NOT NULL DEFAULT 0
    )
 
    DECLARE @ParentID INT, @Level INT, @Name VARCHAR(1000), @Path VARCHAR(2000)
    DECLARE @LevelCounter INT, @RowCounter INT
 
    INSERT INTO #data SELECT NodeID, ParentID, [Name], [Name], 1 FROM DataTree WHERE ParentID = @NodeID
 
    SET @LevelCounter = 1
    SET @RowCounter = 1
 
    WHILE @RowCounter > 0
    BEGIN
        SET @RowCounter = 0
 
        DECLARE cur CURSOR FOR SELECT NodeID, ParentID, [Name], [Path] 
                               FROM #data WHERE [Level] = @LevelCounter
        OPEN cur FETCH NEXT FROM cur INTO @NodeID, @ParentID, @Name, @Path
 
        WHILE @@FETCH_STATUS=0
        BEGIN
            INSERT INTO #data SELECT NodeID, ParentID, [Name], @Path + '/' + [Name], @LevelCounter + 1
                              FROM DataTree WHERE ParentID = @NodeID
 
            SET @RowCounter = @RowCounter + @@ROWCOUNT
            FETCH NEXT FROM cur INTO @NodeID, @ParentID, @Name, @Path
        END
 
        CLOSE cur
        DEALLOCATE cur
 
        SET @LevelCounter = @LevelCounter + 1
    END
 
    -- Show results
    SELECT * FROM #data
 
    -- Clean-up
    DROP TABLE #data
 
    SET NOCOUNT OFF
GO

Show results:

EXEC dbo.sp_ProcessTree 1 -- root node = Canada
NodeID  ParentID  Name          Path                        Level 
------  --------  ------------  --------------------------  -----
3	1         Ontario	Ontario                     1
4	3         Toronto	Ontario/Toronto             2
5	3         Mississauga	Ontario/Mississauga         2
8	1         Quebec	Quebec                      1
9	8         Montreal	Quebec/Montreal             2
10	4         North York	Ontario/Toronto/North York  3
11	4         Etobicoke	Ontario/Toronto/Etobicoke   3
EXEC dbo.sp_ProcessTree 2 -- root node = Poland
NodeID  ParentID  Name          Path                        Level 
------  --------  ------------  --------------------------  -----
6	2         Warszawa      Warszawa                    1
7	2         Lublin        Lublin                      1
12	6         Ursus         Warszawa/Ursus              2
13	6         Bielany       Warszawa/Bielany            2

Another stored procedure determines levels in a tree-like table using recursion. “Root messages” (i.e. messages that do not have any parent) have ID = 0. All other messages have exactly one parent: a root message or another message.

Create our tree-like table:

IF OBJECT_ID('Message') IS NOT NULL DROP TABLE [Message]
GO
 
CREATE TABLE [Message]
(
    Id INT NOT NULL PRIMARY KEY,
    ParentId INT NOT NULL DEFAULT 0,
    MessageText NVARCHAR(200) NOT NULL DEFAULT '',
    Level INT NOT NULL DEFAULT 0,
 
    --  a self-reference foreign key
    CONSTRAINT FK_Message FOREIGN KEY (Id) REFERENCES [Message](Id)
)
GO

Create the recursive stored procedure sp_UpdateMessageLevel:

IF OBJECT_ID('sp_UpdateMessageLevel') IS NOT NULL DROP PROC sp_UpdateMessageLevel
GO
CREATE PROC sp_UpdateMessageLevel AS DECLARE @Dummy INT
GO
 
ALTER PROC sp_UpdateMessageLevel
    @MessageId INT = NULL,
    @ParentId INT = NULL,
    @Level INT = 1
AS
    SET NOCOUNT ON
 
    IF @MessageId IS NULL 
        SELECT TOP 1 @MessageId=Id, @ParentId=ParentId FROM [Message] WHERE Level=0
 
    IF @MessageId IS NOT NULL
    BEGIN   
        IF @ParentID=0
        BEGIN
            UPDATE [Message] SET Level=@Level WHERE Id=@MessageId
            SELECT @MessageId=NULL, @Level=1
        END
        ELSE
        BEGIN
            SET @Level=@Level+1
            SELECT @ParentId=ParentId FROM [Message] WHERE Id=@ParentId         
        END
 
        EXEC sp_UpdateMessageLevel @MessageId, @ParentId, @Level
    END
 
    SET NOCOUNT OFF
GO

Insert some sample data to the Message table:

INSERT INTO [Message] VALUES (1, 0, 'Message 1', 0) 
INSERT INTO [Message] VALUES (2, 1, 'Message 1.1', 0) 
INSERT INTO [Message] VALUES (3, 1, 'Message 1.2', 0) 
INSERT INTO [Message] VALUES (4, 2, 'Message 1.1.1', 0) 
INSERT INTO [Message] VALUES (5, 2, 'Message 1.1.2', 0) 
INSERT INTO [Message] VALUES (6, 3, 'Message 1.2.1', 0) 
INSERT INTO [Message] VALUES (7, 1, 'Message 1.3', 0) 
INSERT INTO [Message] VALUES (8, 0, 'Message 2', 0) 
INSERT INTO [Message] VALUES (9, 8, 'Message 2.1', 0)

Execute the stored procedure and show the results:

EXEC sp_UpdateMessageLevel
SELECT * FROM [Message]
Id  ParentId  MessageText    Level
--  --------  -------------  -----
1         0   Message 1          1
2         1   Message 1.1        2
3         1   Message 1.2        2
4         2   Message 1.1.1      3
5         2   Message 1.1.2      3
6         3   Message 1.2.1      3
7         1   Message 1.3        2
8         0   Message 2          1
9         8   Message 2.1        2

As an additional test, let's use an unsorted set of messages:

DELETE FROM [Message]
INSERT INTO [Message] VALUES (10, 18, 'Message 2.1', 0) 
INSERT INTO [Message] VALUES (13, 23, 'Message 1.2.1', 0) 
INSERT INTO [Message] VALUES (12, 81, 'Message 1.1', 0) 
INSERT INTO [Message] VALUES (18, 0,  'Message 2', 0) 
INSERT INTO [Message] VALUES (23, 81, 'Message 1.2', 0) 
INSERT INTO [Message] VALUES (51, 12, 'Message 1.1.2', 0) 
INSERT INTO [Message] VALUES (75, 81, 'Message 1.3', 0) 
INSERT INTO [Message] VALUES (81, 0,  'Message 1', 0) 
INSERT INTO [Message] VALUES (92, 12, 'Message 1.1.1', 0) 

The stored procedure produces the same results (albeit unsorted):

Id  ParentId   MessageText    Level
--  --------   -------------  -----
10        18   Message 2.1        2
12        81   Message 1.1        2
13        23   Message 1.2.1      3
18         0   Message 2          1
23        81   Message 1.2        2
51        12   Message 1.1.2      3
75        81   Message 1.3        2
81         0   Message 1          1
92        12   Message 1.1.1      3
notes/sql/techniques_treeanalysis.txt · Last modified: 2015/06/24 by admin