Thursday, January 2, 2014

Estimating the size of a table in SQL Server 2005 and 2008

I have read this (http://msdn.microsoft.com/en-us/library/ms178085.aspx) article and decided to provide us all a great script that can be easily executed on a specific database and extract estimated table sizes , based on NumberOfRows you will provide.
The final table size will be displayed in the Clustered_Index_Size_Display.

What's on the roadmap? Provide ability for Estimating non clustered index size ,partition tables and variable-length(max) columns data types and more.


1. How to execute the stored procedure :
    EXEC stp_GetTableSizeEstimates 1000000--Number of rows possible - reflects all tables found

    SELECT *
    fROM vew_tableSizeEstimate
    order by Clustered_Index_Size desc


2. How to setup the stored procedure? Please copy paste the script below:




IF NOT EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[tbl_tableSizeEstimate]') AND type in (N'U'))
create table tbl_tableSizeEstimate(
iobject_id bigint
,table_name varchar(255)
,Fixed_Data_Size FLOAT  default(0)
,Null_Bitmap FLOAT   default(0)
,Num_Cols FLOAT  default(0)
,Num_Variable_Cols bigint  default(0)
,Max_Var_Size float  default(0)
,Row_Size float   default(0)
,Rows_Per_Page FLOAT  default(0)
,Free_Rows_Per_Page FLOAT  default(0)
,Num_Leaf_Pages FLOAT  default(0)
,Leaf_space_used FLOAT  default(0)
,Num_Variable_Key_Cols bigint  default(0)
,Max_Var_Key_Size FLOAT  default(0)
,Num_Key_Cols bigint  default(0)
,Index_Null_Bitmap bigint default(0)
,Variable_Key_Size FLOAT  default(0)
,Index_Row_Size FLOAT
,Index_Rows_Per_Page bigint
,Non_Leaf_Levels bigint default(1)
,Num_Index_Pages bigint default(0)
,Index_Space_Used FLOAT  default(0)
,Clustered_Index_Size FLOAT  default(0)
,Summand float  default(0)
)



IF NOT  EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[stp_GetTableSizeEstimates]') AND type in (N'P', N'PC'))
exec sp_executesql N'CREATE PROCEDURE [dbo].[stp_GetTableSizeEstimates] as select 1'
GO


ALTER procedure stp_GetTableSizeEstimates
@Num_Rows int = 1000
as
truncate table tbl_tableSizeEstimate
insert into tbl_tableSizeEstimate(iobject_id,table_name,Fixed_Data_Size,Num_Cols,Num_Variable_Cols,Max_Var_Size)
select t.object_id, t.name
,(select CEILING(ISNULL(SUM(s.max_length), 0))
from sys.columns s
inner join sys.types st
on st.user_type_id = s.user_type_id
where t.object_id = s.object_id
and st.name not like N'%var%'
) as Fixed_Data_Size
--------------------
,(select count(1)+ case when i.is_unique=0 then 1 else 0 end from sys.columns s where t.object_id = s.object_id) as Num_Cols
,(select count(1)+ case when i.is_unique=0 and COUNT(1)>0 then 1 else 0 end from sys.columns s
inner join sys.types st
on st.user_type_id = s.user_type_id
where t.object_id = s.object_id
and st.name like N'%var%'
) as Num_Variable_Cols
,(select (CEILING(ISNULL(SUM(s.max_length), 0)) ) + 2 + case when i.is_unique=0 then 4 else 0 end
from sys.columns s
inner join sys.types st
on st.user_type_id = s.user_type_id
where t.object_id = s.object_id
and st.name like N'%var%'
) as Max_Var_Size
From sys.tables t
inner join sys.indexes i
on t.object_id = i.object_id
where i.type=1
and i.index_id=1
order by t.name


--------------------
update T set Null_Bitmap = CEILING(2 + ((T.Num_Cols + 7) / 8))
From tbl_tableSizeEstimate T
inner join sys.indexes i
on T.iobject_id = i.object_id
where i.type=1
and i.index_id=1

update x set Row_Size = CEILING(Fixed_Data_Size + case when Num_Variable_Cols >0 then 2 + (Num_Variable_Cols * 2) + Max_Var_Size else 0 end  + Null_Bitmap + 4 )
From tbl_tableSizeEstimate x

update x set Rows_Per_Page = case when FLOOR(8096 / (Row_Size + 2)) = 0 then 1 else FLOOR(8096 / (Row_Size + 2)) end
From tbl_tableSizeEstimate x

update x set Free_Rows_Per_Page = FLOOR(8096 * ((100 - case when i.fill_factor=0 then 100 else i.fill_factor end) / 100) / (x.Row_Size + 2) )
From tbl_tableSizeEstimate x
inner join sys.indexes i
on x.iobject_id = i.object_id
where i.type=1
and i.index_id=1

update x set x.Num_Leaf_Pages  = CEILING(@Num_Rows / (Rows_Per_Page - Free_Rows_Per_Page) )
From tbl_tableSizeEstimate x

update x set x.Leaf_space_used  = 8192 * Num_Leaf_Pages
From tbl_tableSizeEstimate x

-- Index Information
update x set x.Num_Variable_Key_Cols  =  (
select count(1)
from sys.index_columns ic
inner join sys.columns c
on c.object_id = ic.object_id
and c.column_id = ic.column_id
inner join sys.types st
on st.user_type_id = c.user_type_id
where ic.index_id = i.index_id
and ic.object_id = i.object_id
and st.name like N'%var%'
)
From tbl_tableSizeEstimate x
inner join sys.indexes i
on i.object_id = x.iobject_id
where i.type=1
and i.index_id=1

update x set x.Num_Variable_Key_Cols  +=1
From tbl_tableSizeEstimate x
inner join sys.indexes i
on i.object_id = x.iobject_id
where i.type=1
and i.index_id=1
and i.is_unique = 0
and x.Num_Variable_Key_Cols >0

update x set x.Max_Var_Key_Size  = (select ISNULL(SUM(c.max_length), 0)
from sys.indexes i
inner join sys.index_columns ic
on ic.object_id = i.object_id
and ic.index_id = i.index_id
inner join sys.columns c
on c.object_id = ic.object_id
and c.column_id = ic.column_id
inner join sys.types st
on st.user_type_id = c.user_type_id
where x.iobject_id = i.object_id
and st.name like N'%var%'
and i.type=1
and i.index_id=1
)
From tbl_tableSizeEstimate x

update x set x.Max_Var_Key_Size  +=4
From tbl_tableSizeEstimate x
inner join sys.indexes i
on i.object_id = x.iobject_id
where i.type=1
and i.index_id=1
and i.is_unique = 0
and x.Max_Var_Key_Size >0

update x set x.Num_Key_Cols  =
 (
select count(1)
from sys.index_columns ic
inner join sys.columns c
on c.object_id = ic.object_id
and c.column_id = ic.column_id
where ic.index_id = i.index_id
and ic.object_id = i.object_id
)
From tbl_tableSizeEstimate x
inner join sys.indexes i
on i.object_id = x.iobject_id
where i.type=1
and i.index_id=1


update x set x.Num_Key_Cols  +=1
From tbl_tableSizeEstimate x
inner join sys.indexes i
on i.object_id = x.iobject_id
where i.type=1
and i.index_id=1
and i.is_unique = 0

/*
The bytes added to Max_Var_Key_Size are for tracking each variable-length column. This formula assumes that all variable-length columns are 100 percent full.
If you anticipate that a smaller percentage of the variable-length column storage space will be used, you can adjust the Max_Var_Key_Size value by that percentage to yield a more accurate estimate of the overall table size.
*/
update x set x.Variable_Key_Size = 2 + (Num_Variable_Key_Cols * 2) + Max_Var_Key_Size
From tbl_tableSizeEstimate x
where Max_Var_Key_Size > 0


update x set x.Index_Null_Bitmap  = 2 + ((Num_Key_Cols + 7) / 8)
From tbl_tableSizeEstimate x
inner join sys.indexes i
on x.iobject_id = i.object_id
inner join sys.index_columns ic
on ic.object_id = i.object_id
and ic.index_id = i.index_id
inner join sys.columns c
on c.object_id = ic.object_id
and c.column_id = ic.column_id
inner join sys.types st
on st.user_type_id = c.user_type_id
where i.type=1
and i.index_id=1
and c.is_nullable = 1

/*
 (for row header overhead of an index row) + 6 (for the child page ID pointer)
*/
update x set x.Index_Row_Size  = (select ISNULL(SUM(c.max_length), 0)
from sys.indexes i
inner join sys.index_columns ic
on ic.object_id = i.object_id
and ic.index_id = i.index_id
inner join sys.columns c
on c.object_id = ic.object_id
and c.column_id = ic.column_id
inner join sys.types st
on st.user_type_id = c.user_type_id
where x.iobject_id = i.object_id
and st.name not like N'%var%'
and i.type=1
and i.index_id=1
) + Variable_Key_Size + Index_Null_Bitmap + 1 + 6
From tbl_tableSizeEstimate x

update x set x.Index_Rows_Per_Page = FLOOR(8096 / (Index_Row_Size + 2))
From tbl_tableSizeEstimate x

/*
Round each summand up to the nearest whole number.
As a simple example, consider an index where Num_Leaf_Pages = 1000 and Index_Rows_Per_Page = 25.
The first index level above the leaf level stores 1000 index rows,
which is one index row per leaf page, and 25 index rows can fit per page.
This means that 40 pages are required to store those 1000 index rows.
The next level of the index has to store 40 rows. This means it requires 2 pages.
The final level of the index has to store 2 rows. This means it requires 1 page.
This gives 43 non-leaf index pages. When these numbers are used in the previous formulas,
the outcome is as follows:
Non-leaf_Levels = 1 + log25 (1000 / 25) = 3
*/

declare @rc table(cCount bit) insert @rc select 1

WHILE @@ROWCOUNT>0
update x set x.Non_Leaf_Levels +=1
From tbl_tableSizeEstimate x
where (Num_Leaf_Pages / Index_Rows_Per_Page) > POWER(Index_Rows_Per_Page,Non_Leaf_Levels)
update x set x.Non_Leaf_Levels +=1
From tbl_tableSizeEstimate x

update x set x.Summand = Non_Leaf_Levels
From tbl_tableSizeEstimate x

insert @rc select 1
WHILE @@ROWCOUNT>0
begin
update x  SET Num_Index_Pages += CEILING(Num_Leaf_Pages/POWER(Index_Rows_Per_Page, Summand)) ,Summand -= 1
From tbl_tableSizeEstimate x
where Summand>0

end
update x set x.Index_Space_Used = 8192 * Num_Index_Pages
From tbl_tableSizeEstimate x
update x set x.Clustered_Index_Size = Leaf_Space_Used + Index_Space_used
From tbl_tableSizeEstimate x
GO


/****** Object:  View [dbo].[vew_tableSizeEstimate]    Script Date: 01/02/2014 10:10:57 ******/
IF   EXISTS (SELECT * FROM sys.views WHERE object_id = OBJECT_ID(N'[dbo].[vew_tableSizeEstimate]'))
drop VIEW [dbo].[vew_tableSizeEstimate]
GO
create  view vew_tableSizeEstimate
as
select TABLE_NAME,iobject_id,Fixed_Data_Size,
  CASE
WHEN Fixed_Data_Size < 1024 THEN CAST(CAST(ROUND(Fixed_Data_Size, 3) AS NUMERIC(16,0)) AS VARCHAR(16)) + ' B'
WHEN Fixed_Data_Size < 1024*1024 THEN CAST(CAST(ROUND(Fixed_Data_Size/1024, 3) AS NUMERIC(16,3)) AS VARCHAR(16)) + ' KB'
WHEN Fixed_Data_Size < 1024*1024*1024 THEN CAST(CAST(ROUND(Fixed_Data_Size/1024/1024, 3) AS NUMERIC(16,3)) AS VARCHAR(16)) + ' MB'
ELSE CAST(CAST(ROUND(Fixed_Data_Size/1024/1024/1024, 3) AS NUMERIC(16,3)) AS VARCHAR(16)) + ' GB'
END AS Fixed_Data_Size_Display
,Null_Bitmap
,Num_Cols
,Num_Variable_Cols
, CASE
WHEN Max_Var_Size < 1024 THEN CAST(CAST(ROUND(Max_Var_Size, 3) AS NUMERIC(16,0)) AS VARCHAR(16)) + ' B'
WHEN Max_Var_Size < 1024*1024 THEN CAST(CAST(ROUND(Max_Var_Size/1024, 3) AS NUMERIC(16,3)) AS VARCHAR(16)) + ' KB'
WHEN Max_Var_Size < 1024*1024*1024 THEN CAST(CAST(ROUND(Max_Var_Size/1024/1024, 3) AS NUMERIC(16,3)) AS VARCHAR(16)) + ' MB'
ELSE CAST(CAST(ROUND(Max_Var_Size/1024/1024/1024, 3) AS NUMERIC(16,3)) AS VARCHAR(16)) + ' GB'
END AS Max_Var_Size
, CASE
WHEN Row_Size < 1024 THEN CAST(CAST(ROUND(Row_Size, 3) AS NUMERIC(16,0)) AS VARCHAR(16)) + ' B'
WHEN Row_Size < 1024*1024 THEN CAST(CAST(ROUND(Row_Size/1024, 3) AS NUMERIC(16,3)) AS VARCHAR(16)) + ' KB'
WHEN Row_Size < 1024*1024*1024 THEN CAST(CAST(ROUND(Row_Size/1024/1024, 3) AS NUMERIC(16,3)) AS VARCHAR(16)) + ' MB'
ELSE CAST(CAST(ROUND(Row_Size/1024/1024/1024, 3) AS NUMERIC(16,3)) AS VARCHAR(16)) + ' GB'
END AS Row_Size
,Rows_Per_Page
,Free_Rows_Per_Page
,Num_Leaf_Pages
,Leaf_space_used
,  CASE
WHEN Leaf_space_used < 1024 THEN CAST(CAST(ROUND(Leaf_space_used, 3) AS NUMERIC(16,0)) AS VARCHAR(16)) + ' B'
WHEN Leaf_space_used < 1024*1024 THEN CAST(CAST(ROUND(Leaf_space_used/1024, 3) AS NUMERIC(16,3)) AS VARCHAR(16)) + ' KB'
WHEN Leaf_space_used < 1024*1024*1024 THEN CAST(CAST(ROUND(Leaf_space_used/1024/1024, 3) AS NUMERIC(16,3)) AS VARCHAR(16)) + ' MB'
ELSE CAST(CAST(ROUND(Leaf_space_used/1024/1024/1024, 3) AS NUMERIC(16,3)) AS VARCHAR(16)) + ' GB'
END AS Leaf_space_used_display
,Num_Variable_Key_Cols
, CASE
WHEN Max_Var_Key_Size < 1024 THEN CAST(CAST(ROUND(Max_Var_Key_Size, 3) AS NUMERIC(16,0)) AS VARCHAR(16)) + ' B'
WHEN Max_Var_Key_Size < 1024*1024 THEN CAST(CAST(ROUND(Max_Var_Key_Size/1024, 3) AS NUMERIC(16,3)) AS VARCHAR(16)) + ' KB'
WHEN Max_Var_Key_Size < 1024*1024*1024 THEN CAST(CAST(ROUND(Max_Var_Key_Size/1024/1024, 3) AS NUMERIC(16,3)) AS VARCHAR(16)) + ' MB'
ELSE CAST(CAST(ROUND(Max_Var_Key_Size/1024/1024/1024, 3) AS NUMERIC(16,3)) AS VARCHAR(16)) + ' GB'
END AS Max_Var_Key_Size
,Num_Key_Cols
,Index_Null_Bitmap
, CASE
WHEN Variable_Key_Size < 1024 THEN CAST(CAST(ROUND(Variable_Key_Size, 3) AS NUMERIC(16,0)) AS VARCHAR(16)) + ' B'
WHEN Variable_Key_Size < 1024*1024 THEN CAST(CAST(ROUND(Variable_Key_Size/1024, 3) AS NUMERIC(16,3)) AS VARCHAR(16)) + ' KB'
WHEN Variable_Key_Size < 1024*1024*1024 THEN CAST(CAST(ROUND(Variable_Key_Size/1024/1024, 3) AS NUMERIC(16,3)) AS VARCHAR(16)) + ' MB'
ELSE CAST(CAST(ROUND(Variable_Key_Size/1024/1024/1024, 3) AS NUMERIC(16,3)) AS VARCHAR(16)) + ' GB'
END AS Variable_Key_Size
, CASE
WHEN Index_Row_Size < 1024 THEN CAST(CAST(ROUND(Index_Row_Size, 3) AS NUMERIC(16,0)) AS VARCHAR(16)) + ' B'
WHEN Index_Row_Size < 1024*1024 THEN CAST(CAST(ROUND(Index_Row_Size/1024, 3) AS NUMERIC(16,3)) AS VARCHAR(16)) + ' KB'
WHEN Index_Row_Size < 1024*1024*1024 THEN CAST(CAST(ROUND(Index_Row_Size/1024/1024, 3) AS NUMERIC(16,3)) AS VARCHAR(16)) + ' MB'
ELSE CAST(CAST(ROUND(Index_Row_Size/1024/1024/1024, 3) AS NUMERIC(16,3)) AS VARCHAR(16)) + ' GB'
END AS Index_Row_Size
,Index_Rows_Per_Page
,Non_Leaf_Levels
,Num_Index_Pages
, Index_Space_Used, CASE
WHEN Index_Space_Used < 1024 THEN CAST(CAST(ROUND(Index_Space_Used, 3) AS NUMERIC(16,0)) AS VARCHAR(16)) + ' B'
WHEN Index_Space_Used < 1024*1024 THEN CAST(CAST(ROUND(Index_Space_Used/1024, 3) AS NUMERIC(16,3)) AS VARCHAR(16)) + ' KB'
WHEN Index_Space_Used < 1024*1024*1024 THEN CAST(CAST(ROUND(Index_Space_Used/1024/1024, 3) AS NUMERIC(16,3)) AS VARCHAR(16)) + ' MB'
ELSE CAST(CAST(ROUND(Index_Space_Used/1024/1024/1024, 3) AS NUMERIC(16,3)) AS VARCHAR(16)) + ' GB'
END AS Index_Space_Used_Display
,Clustered_Index_Size, CASE
WHEN Clustered_Index_Size < 1024 THEN CAST(CAST(ROUND(Clustered_Index_Size, 3) AS NUMERIC(16,0)) AS VARCHAR(16)) + ' B'
WHEN Clustered_Index_Size < 1024*1024 THEN CAST(CAST(ROUND(Clustered_Index_Size/1024, 3) AS NUMERIC(16,3)) AS VARCHAR(16)) + ' KB'
WHEN Clustered_Index_Size < 1024*1024*1024 THEN CAST(CAST(ROUND(Clustered_Index_Size/1024/1024, 3) AS NUMERIC(16,3)) AS VARCHAR(16)) + ' MB'
ELSE CAST(CAST(ROUND(Clustered_Index_Size/1024/1024/1024, 3) AS NUMERIC(16,3)) AS VARCHAR(16)) + ' GB'
END AS Clustered_Index_Size_Display
from tbl_tableSizeEstimate
GO







No comments:

Post a Comment