Hassle free locating / reporting or deletion of duplicate rows in a SQL table based on the uniqueness of a column or columns that the user provides with a few nifty hands off features. Keywords: delete unique column record records(This code may also be viewed at [url]http://www.planet-source-code.com/vb/scripts/ShowCode.asp?txtCodeId=576&lngWId=5[/url] /* Description: UTILITY - Locate in MASTER Syntax: EXEC sp_RemoveDups TableName, DupQualifierFieldNameList, DeleteDups, UniqueColName, CreateIdentColIfNeeded, StoredProcedureResult Only the first two arguments are required For HELP, enter the stored procedures name without any arguments or see the PRINT statements below NO DELETION WILL OCCUR by default - only duplicate recordset returned. To delete records, pass in a 0 for the DeleteDups argument. Example: EXEC sp_RemoveDups 'MyTable','LastName,FirstName,HomePhone' Purpose: Allow removal of duplicate rows where 1. We define what fields qualify the duplicate 2. We select the unique rowid or it is detected automatically else no action takes place Method: Delete by RowID all duplicate rows except the highest RowID (in alpha-sort sequence) of each group of duplicates. DATE BY CHANGE 09-23-2002 Frank Original v1.0 09-23-2002 Frank Changed the name from sp_RemoveDupsByRowID to sp_RemoveDups 10-8-2002 Sean P. O. MacCath-Moran Made @UniqueColName optional Added logic to auto-detect RowGUID and Identity columns Added logic to check for unique value column if no RowGUID or Identity column exists Added logic to create a temporary ID field as a last resort if no unique column could be located Added HELP */CREATE PROCEDURE sp_RemoveDups@TableName as varchar(50) = NULL,@DupQualifierFieldNameList as varchar(200) = NULL,@DeleteDups as bit = NULL,@UniqueColName as varchar(50) = NULL,@CreateIdentColIfNeeded as bit = NULL,@StoredProcedureResult int = NULL OUTPUTASSET NOCOUNT ONDECLARE @SQL nvarchar(2000)DECLARE @SQL_DetermineUniqueTemplate nvarchar(2000)DECLARE @TempIdentColName varchar(20)DECLARE @HostName varchar(50)DECLARE @ActionText varchar(10)DECLARE @SUM intDECLARE @COUNT intDECLARE @NextColumn varchar(50)/*==================================================================================*//*========================VARIABLE INITIALIZATION AND SETUP========================*//*=================================================================================*//*If no unique column is located then a temporary Identity column will be created useing the name specified in this TempIdentColName string*/SET @TempIdentColName = 'TempIdentityColXY123'SET @SQL_DetermineUniqueTemplate = 'SELECT @COUNT = COUNT(Count), @SUM = sum(Count) from 'SET @SQL_DetermineUniqueTemplate = @SQL_DetermineUniqueTemplate + CHAR(13) + '(SELECT TOP 100 PERCENT <COLUMN_NAME>, COUNT(*) as Count FROM ' + @TableNameSET @SQL_DetermineUniqueTemplate = @SQL_DetermineUniqueTemplate + CHAR(13) + ' GROUP BY <COLUMN_NAME> ORDER BY <COLUMN_NAME>) a'/*Retrieve the Host Name. This will be used later in this SP as a test to determine if the user is making this call from within SQL Query Analyzer*/SELECT @HostName = hostname FROM master..sysprocesses WHERE spid = @@SPID AND program_name = 'SQL Query Analyzer'/*Set ActionText to be used in message output*/IF (@DeleteDups IS NULL) OR (@DeleteDups = 1) SET @ActionText = 'Selection' ELSE SET @ActionText = 'Deletion' /*If a value is specified for use by UniqueColName it cannot exist in the columns from the DupQualifierFieldNameList*/IF CHARINDEX(@UniqueColName, @DupQualifierFieldNameList) > 0 BEGIN /*The value in UniqueColName was detected in DupQualifierFieldNameList.*/ IF NOT (@HostName IS NULL) PRINT 'The UniqueColName provided (' + @UniqueColName + ') must not exist in DupQualifierFieldNameList (' + @DupQualifierFieldNameList + '). Other solutions will be sought automatically.' SET @UniqueColName = NULL END/*If UniqueColName is provided then perform check to ensure that all the values in that column are, in fact, unique.*/IF NOT (@UniqueColName IS NULL) BEGIN SET @SQL = REPLACE(@SQL_DetermineUniqueTemplate,'<COLUMN_NAME>', @UniqueColName) /*Perform a check of this column to determine if all of it's values are unique*/ EXEC sp_executesql @SQL, N'@SUM as int OUTPUT,@COUNT as int OUTPUT',@SUM OUTPUT,@COUNT OUTPUT /*Test to determine if this column contains unique values*/ If @SUM <> @COUNT BEGIN /*The column specified by UniqueColName does not contain unique values.*/ IF NOT (@HostName IS NULL) PRINT 'The UniqueColName provided (' + @UniqueColName + ') does not contain unique values. Other solutions will be sought automatically.' SET @UniqueColName = NULL END END/*==============================================================*//*======================HELP OUTPUT TEXT======================*//*==============================================================*/IF (@TableName IS NULL) OR (@TableName = '/?') OR (@TableName = '?') OR (@DupQualifierFieldNameList IS NULL) OR (@DupQualifierFieldNameList = '/?') OR (@DupQualifierFieldNameList = '?') BEGIN IF NOT (@HostName IS NULL) BEGIN PRINT 'sp_RemoveDups ''TableName'', ''DupQualifierFieldNameList'', [''DeleteDups''], [''UniqueColName''], [''CreateIdentColIfNeeded''], <''StoredProcedureResult''>' PRINT '=====================================================================================================================================================================' PRINT 'TableName: Required - String - Name of the table to detect duplicate records in.' PRINT 'DupQualifierFieldNameList: Required - String - Comma seperated list of columns that make up the unique record within TableName.' PRINT 'DeleteDups: Optional - Bit, Set to 0 to delete duplicate records. A value of NULL or 1 will return the duplicate records to be deleted.' PRINT 'UniqueColName: Optional - Bit - A table must have a unique column value in it to perform the deletion logic. If no UniqueColName is provided then an attemp will be made to locate the RowGUID column. If that fails then an attempt will be made to locate the Identity column. If that fails then all of the columns of the table will be examined and the first one with all unique values will be selected.' PRINT 'CreateIdentColIfNeeded: Optional - Bit - By default this SP will create an identity column if no unique column can be located. Pass in a 1 here to run this feature off.' PRINT 'StoredProcedureResult: Optional - OUTPUT - Int - Returns a 3 if an error occured, otherwise returns a 0.' END SET @StoredProcedureResult = 3 RETURN END/*========================================================================*//*======================DETECT USABLE UniqueColName======================*//*========================================================================*/IF @UniqueColName IS NULL BEGIN /*Check for a RowGUID or Identity column in this table. If one exists, then utilze it as the unique value for the purposes of this deletion*/ IF EXISTS(SELECT * FROM SysColumns WHERE ID = Object_ID(@TableName) and ColumnProperty(ID,Name,'IsRowGUIDCol') = 1) SET @UniqueColName = 'RowGUIDCol' IF EXISTS(SELECT * FROM SysColumns WHERE ID = Object_ID(@TableName) and ColumnProperty(ID,Name,'IsIdentity') = 1) SET @UniqueColName = 'IdentityCol' IF @UniqueColName IS NULL /*If no RowGUID or Identity column was found then check all of the columns in this table to see if one of them can be utilized as a unique value column*/ BEGIN /*Select all of the columns from the table in question...*/ DECLARE MyCursor CURSOR LOCAL SCROLL STATIC FOR SELECT name FROM syscolumns WHERE OBJECT_ID(@TableName)=ID OPEN MyCursor FETCH NEXT FROM MyCursor INTO @NextColumn WHILE @@fetch_status = 0 BEGIN /*Create SQL string with correct column name in place.*/ SET @SQL = REPLACE(@SQL_DetermineUniqueTemplate,'<COLUMN_NAME>', @NextColumn) /*Perform a check of this column to determine if all of it's values are unique*/ EXEC sp_executesql @SQL, N'@SUM as int OUTPUT,@COUNT as int OUTPUT',@SUM OUTPUT,@COUNT OUTPUT /*Test to determine if this column contains unique values*/ If @SUM = @COUNT BEGIN /*A unique values column is detected. Use it and break out of the loop UNLESS column is specified in DupQualifierFieldNameList*/ IF CHARINDEX(@NextColumn, @DupQualifierFieldNameList) = 0 BEGIN /*NextColumn was NOT detected in DupQualifierFieldNameList, so this is the column we will use.*/ SET @UniqueColName = @NextColumn BREAK END END ELSE FETCH NEXT FROM MyCursor INTO @NextColumn END CLOSE MyCursor DEALLOCATE MyCursor END END/*If no UniqueColName has been found then create one UNLESS @CreateIdentColIfNeeded = 1*/IF (@UniqueColName IS NULL) AND ( (@CreateIdentColIfNeeded IS NULL) OR (@CreateIdentColIfNeeded = 0) ) BEGIN /*Add a sequence column to the table...*/ IF NOT (@HostName IS NULL) PRINT 'Creating temporary identity column in the ' + @TableName + ' table named ' + @TempIdentColName + ' for use in this ' + LOWER(@ActionText) + ' process...' EXEC('ALTER TABLE ' + @TableName + ' ADD ' + @TempIdentColName + ' [int] IDENTITY (1, 1)') SET @UniqueColName = @TempIdentColName END/*============================================================================*//*======================EXECUTE DELETION OR SELECTION======================*//*===========================================================================*/IF @UniqueColName IS NULL BEGIN /*No UniqueColName was provided by the user and none were detected by the script. This deletion algorythm cannot run.*/ IF NOT (@HostName IS NULL) PRINT 'Could not perform ' + LOWER(@ActionText) + ' process. No unique columns were located and the UniqueColName flag is set to 1 (False).' SET @StoredProcedureResult = 3 RETURN ENDELSE BEGIN IF NOT (@HostName IS NULL) PRINT 'Performing ' + LOWER(@ActionText) + ' utilizing the unique values in the ' + @UniqueColName + ' column as a reference...' /* Create and execute an SQL statement in the form of: SELECT * (or DELETE) FROM TableName WHERE UniqueColName IN ( SELECT UniqueColName FROM TableName WHERE UniqueColName NOT IN ( SELECT MAX(Cast(UniqueColName AS varchar(36))) FROM TableName GROUP BY DupQualifierFieldNameList, DupQualifierFieldNameList, etc ) ) */ /*Delete all duplicate records useing @UniqueColName as a unique ID column */ IF (@DeleteDups IS NULL) OR (@DeleteDups = 1) SET @SQL = 'SELECT * ' ELSE SET @SQL = 'DELETE ' SET @SQL = @SQL + 'FROM ' + @TableName + ' WHERE ' + @UniqueColName + ' IN ' SET @SQL = @SQL + CHAR(13) + CHAR(9) + '(' + CHAR(13) + CHAR(9) SET @SQL = @SQL + 'SELECT ' + @UniqueColName + ' FROM ' + @TableName + ' WHERE ' + @UniqueColName + ' NOT IN ' SET @SQL = @SQL + CHAR(13) + CHAR(9) + CHAR(9) + '(' + CHAR(13) + CHAR(9)+CHAR(9) SET @SQL = @SQL + 'SELECT MAX(Cast(' + @UniqueColName + ' AS varchar(36))) FROM ' SET @SQL = @SQL + @TableName + ' GROUP BY ' + @DupQualifierFieldNameList SET @SQL = @SQL + CHAR(13) + CHAR(9) + CHAR(9) + ')' + CHAR(13) + CHAR(9) + ')' EXEC (@SQL) IF @@ERROR <> 0 BEGIN IF NOT (@HostName IS NULL) PRINT @ActionText + ' process failed.' SET @StoredProcedureResult = 3 END ELSE BEGIN IF NOT (@HostName IS NULL) PRINT @ActionText + ' completed successfully with this SQL: ' + CHAR(13) + @SQL SET @StoredProcedureResult = 0 END ENDIF (@UniqueColName = @TempIdentColName) AND ( (@CreateIdentColIfNeeded IS NULL) OR (@CreateIdentColIfNeeded = 0) ) BEGIN /*Remove the sequence column from the table...*/ IF NOT (@HostName IS NULL) PRINT 'Removing temporary identity column named ' + @TempIdentColName + ' from the ' + @TableName + ' table...' EXEC('ALTER TABLE ' + @TableName + ' DROP COLUMN ' + @TempIdentColName) ENDGO
Edited by - emanaton on 10/30/2002 11:28:34