Monday, May 7, 2012

Remove duplicate rows

Sometimes you have tables with duplicate rows and you need to get rid of the duplicates. Here you can see a trick to accomplish this task by using Common Tables Expression (CTE)

-- Create a example table with duplicate rows
IF (object_id( 'tempdb..#MyTable' ) IS NOT NULL) DROP TABLE ..#MyTable ;

CREATE TABLE #MyTable(
      [State] varchar(2)  not null
      ,City  varchar(50) not null
) ;

INSERT INTO #MyTable ( [State], City )
values
       ( 'NY' , 'New York' )
      ,( 'CA' , 'Long Beach' )
      ,( 'IL' , 'Chicago' )
      ,( 'FL' , 'Miami' )
      ,( 'MI' , 'Detroit' )
      ,( 'NY' , 'New York' )
      ,( 'IL' , 'Chicago' )
      ,( 'MI' , 'Detroit' )
      ,( 'CA' , 'Long Beach' )
      ,( 'IL' , 'Chicago' )
      ,( 'NY' , 'New York' )
      ,( 'IL' , 'Chicago' )
      ,( 'MI' , 'Detroit' )

SELECT * FROM #MyTable



SELECT COUNT(*) as [Row_Count],* FROM #MyTable
GROUP BY [State],City

-- Delete the duplicate rows
; WITH CTE  as
(
      SELECT ROW_NUMBER() OVER (PARTITION BY [State]  
      ORDER BY ( SELECT 0 ) ) RN                                       
      FROM   #MyTable
) 
DELETE
FROM CTE 
WHERE  RN > 1

-- Table with the unquie rows
SELECT * FROM #MyTable