@@ -31,7 +31,7 @@ SOFTWARE.
3131#include " PMurHash.h"
3232
3333#define BUFFERSIZE (64 *1024 *1024 )
34- #define VERSION " 0.2 "
34+ #define VERSION " 0.3 "
3535
3636using namespace std ;
3737
@@ -46,7 +46,7 @@ class oneFile
4646map<uint64_t ,list<oneFile>> g_files; // Map of sizes,map of hashes, list of objects
4747void RecurseFilePath (wstring path);
4848uint32_t CalculateFileHash (const wstring& newPath, uint64_t fileSize);
49- void checkDuplicates (bool deleteFiles,bool showDuplicates);
49+ void checkDuplicates (bool deleteFiles,bool showDuplicates, bool linkFiles );
5050bool AreDuplicates (const wstring& file1, const wstring& file2, uint64_t fileSize);
5151vector<BYTE> g_buffer;
5252vector<BYTE> g_buffer2;
@@ -58,6 +58,7 @@ int wmain(int argc, wchar_t* argv[])
5858 DWORD timeTaken=GetTickCount ();
5959 bool deleteFiles=false ;
6060 bool showDuplicates=false ;
61+ bool linkFiles=false ;
6162 printf (" Dedup v%s (c) 2015 Logicore Software\n " ,VERSION);
6263 printf (" www.logicore.se\n " );
6364 printf (" The software is provided as is. Use at your own risk.\n " );
@@ -67,6 +68,7 @@ int wmain(int argc, wchar_t* argv[])
6768 printf (" This program will find duplicate files within a folder\n " );
6869 printf (" Options:\n " );
6970 printf (" -D - delete duplicates, keeping the file with the oldest creation date\n " );
71+ printf (" -L - create links for duplicates, keeping the file with the oldest creation date\n " );
7072 printf (" -S - show each duplicate\n " );
7173 return 0 ;
7274 }
@@ -78,23 +80,32 @@ int wmain(int argc, wchar_t* argv[])
7880 deleteFiles=true ;
7981 if (!wcscmp (argv[i],L" -S" ))
8082 showDuplicates=true ;
83+ if (!wcscmp (argv[i],L" -L" ))
84+ linkFiles=true ;
85+
8186 }
8287 }
88+
89+ if (linkFiles && deleteFiles)
90+ {
91+ printf (" You cannot both link and delete files!\n " );
92+ return 0 ;
93+ }
8394 g_buffer.resize (BUFFERSIZE);
8495 g_buffer2.resize (BUFFERSIZE);
8596
8697 wstring path=argv[1 ];
8798 printf (" Scanning files...\n " );
8899 RecurseFilePath (path);
89100 printf (" %I64d Files found. Performing comparisons\n " ,g_filesProcessed);
90- checkDuplicates (deleteFiles,showDuplicates);
101+ checkDuplicates (deleteFiles,showDuplicates,linkFiles );
91102
92103 timeTaken=GetTickCount ()-timeTaken;
93104 printf (" Time taken: %d seconds\n " ,timeTaken/1000 );
94105 return 0 ;
95106}
96107
97- void checkDuplicates (bool deleteFiles,bool showDuplicates)
108+ void checkDuplicates (bool deleteFiles,bool showDuplicates, bool linkFiles )
98109{
99110 uint64_t duplicates=0 ;
100111 uint64_t bytesSaved=0 ;
@@ -139,6 +150,25 @@ void checkDuplicates(bool deleteFiles,bool showDuplicates)
139150 if (!DeleteFileW (fileName2.c_str ()));
140151 wprintf (L" Could not delete %s\n " ,fileName2.c_str ());
141152 }
153+ else if (linkFiles)
154+ {
155+ wchar_t fname[MAX_PATH];
156+ // Not really pleased with the MAX_PATH limitation here...
157+ UINT res=GetTempFileNameW (o3->path .c_str (),L" DED" ,0 ,fname); // We will use a temp file first in case CreateHardLink fails
158+ if (res)
159+ {
160+ DeleteFileW (fname); // Delete the new empty file, we just wanted a file name to use in CreateHardLink
161+ if (CreateHardLinkW (fname,fileName1.c_str (),NULL ))
162+ {
163+ DeleteFileW (fileName2.c_str ()); // Delete the original duplicate
164+ _wrename (fname,fileName2.c_str ()); // rename temp file to original duplicate
165+ }
166+ else
167+ wprintf (L" Could not link %s to %s\n " ,fileName2.c_str (),fileName1.c_str ());
168+ }
169+ else
170+ wprintf (L" Could not link %s to %s\n " ,fileName2.c_str (),fileName1.c_str ());
171+ }
142172 stillOK=true ;
143173 o3=o2.second .erase (o3);
144174 duplicates++;
0 commit comments