'This function lists files in a folder and sub-folders
'and mark the files as duplicates where name and size are same
'Note: The code does not delete any file
Public Sub FindDuplicateFiles()
'Variable Declaration
Dim objFSO As Object
Dim objFile As Object
Dim objFolder As Object
Dim vFile As Variant
Dim strPath As String
Dim iCurRow As Integer
'Remove filter (if already applied)
Sheet1.AutoFilterMode = False
'Clear old data
Sheet1.Range("B9:E1000").ClearContents
'Set the path of the folder
strPath = Sheet1.Range("B4").Value
'Validate if the given folder path is valid
If Dir(strPath, vbDirectory) = "" Then
MsgBox "Invalid Folder path", vbInformation
Exit Sub
End If
'Initialize file system objects
Set objFSO = CreateObject("Scripting.FileSystemObject")
Set objFolder = objFSO.getFolder(strPath)
'Set Directory to folder path
ChDir objFolder.Path
vFile = Dir(objFolder.Path & "\*.*") 'Change or add formats to get specific file types
iCurRow = 9
Do While vFile <> "" 'LOOP until all files in folder strPath have been looped through
Sheet1.Cells(iCurRow, 2).Value = vFile
Sheet1.Cells(iCurRow, 3).Value = objFolder.Path
Set objFile = objFSO.Getfile(objFolder.Path & "\" & vFile) 'Set the object to file
Sheet1.Cells(iCurRow, 4).Value = Round(objFile.Size / 1024, 0) 'Divide the size by 1024 to convert to KB
vFile = Dir
iCurRow = iCurRow + 1
Loop
'Call the function to list files in sub-folders
Call ListFilesInSubFolder(objFolder)
'Add formula to find duplicate file based on file name and file size
Sheet1.Range("E9").Value = "=IF(COUNTIFS(B:B,B9,D:D,D9)>1,""Duplicate"","""")"
'Find the row number where formula needs to be copied
iCurRow = Sheet1.Range("B" & Rows.Count).End(xlUp).Row
'Copy the formula to all records
Sheet1.Range("E9").Copy Sheet1.Range(Sheet1.Range("E9"), Sheet1.Range("E" & iCurRow))
'Calculate sheet
Sheet1.Calculate
'Sort the data based on File Name and Size
Sheet1.Sort.SortFields.Clear 'First Clear old sort field (if any)
'Add sort field on column B (File Name)
Sheet1.Sort.SortFields.Add Key:=Sheet1.Range("B8:B" & iCurRow), SortOn:=xlSortOnValues, Order:=xlAscending, DataOption:=xlSortNormal
'Add sort field on column D (Size (KB))
Sheet1.Sort.SortFields.Add Key:=Sheet1.Range("D8:D" & iCurRow), SortOn:=xlSortOnValues, Order:=xlAscending, DataOption:=xlSortNormal
With Sheet1.Sort
.SetRange Range("B8:E" & iCurRow)
.Header = xlYes
.MatchCase = False
.Orientation = xlTopToBottom
.SortMethod = xlPinYin
.Apply
End With
'Add filter in column E (Is Duplicate?)
Sheet1.Range("B8:E" & iCurRow).AutoFilter Field:=4, Criteria1:="Duplicate"
MsgBox "Done"
End Sub
'This function lists files in the sub-folder
Public Sub ListFilesInSubFolder(objFolder As Object)
Dim vFile As Variant
Dim iCurRow As Integer
Dim objSubFolder As Object
Dim objsubfld As Object
Dim objFSO As Object
Dim objFile As Object
'Initialize file system object
Set objFSO = CreateObject("Scripting.FileSystemObject")
'Find the row number where data needs to be entered
iCurRow = Sheet1.Range("B" & Rows.Count).End(xlUp).Row + 1
'Run the loop for each sub-folder
For Each objSubFolder In objFolder.SubFolders
'Set Directory to folder path
ChDir objSubFolder.Path
vFile = Dir(objSubFolder.Path & "\*.*")
Do While vFile <> "" 'LOOP until all files in folder strPath have been looped through
Sheet1.Cells(iCurRow, 2).Value = vFile
Sheet1.Cells(iCurRow, 3).Value = objSubFolder.Path
Set objFile = objFSO.Getfile(objSubFolder.Path & "\" & vFile) 'Set the object to file
Sheet1.Cells(iCurRow, 4).Value = Round(objFile.Size / 1024, 0) 'Divide the size by 1024 to convert to KB
vFile = Dir
iCurRow = iCurRow + 1
Loop
Next
'If the sub-folder contains more sub-folders then call the same function
For Each objsubfld In objFolder.SubFolders
Call ListFilesInSubFolder(objsubfld)
Next
End Sub