Deleting unused images

The XHTML WYSIWYG Editor For Desktop & Web Applications

Deleting unused images

Overview

While editing a document using a WYSIWYG editor, content authors can use the editor to upload images to the server. The images are physically stored on the server and the WYSIWYG editor creates <img> elements that link to the uploaded images. However, when content authors delete images in the WYSIWYG editor, only the links to the images are removed. The images themselves remain on the server, leaving a large number of redundant, unused images.

In order to physically remove unused images from the server, the CMS must parse the content generated by the WYSIWYG editor and delete any images that are not being linked to.

The following examples assume that images (and attachments) for each document in the CMS are stored on the server in a folder named with the ID of the document. Therefore, the src attribute in the <img> element will contain the document ID. In the following example, "1000" is the ID of a document:

  1. <img src="/resources/1000/headshot.jpg" alt="John Smith" />

C# example

  1. using System;
  2. using System.Text;
  3. using System.IO;
  4. using System.Collections;
  5. using System.Xml;
  6. using System.Web;
  7. ...
  8. void RemoveUnusedFiles(string content, string documentID)
  9. {
  10. ArrayList filesList = new ArrayList();
  11. XmlDocument doc = new XmlDocument();
  12. XmlNamespaceManager namespaceManager = new XmlNamespaceManager(doc.NameTable);
  13.  
  14. //Content is an XML fragement, so you need to add a root element.
  15. doc.LoadXml("<root>" + content + "</root>");
  16.  
  17. //Get a list of images
  18. foreach (XmlNode attribute in doc.SelectNodes("//img/@src"))
  19. {
  20. if (attribute.Value.Contains(documentID + "/"))
  21. {
  22. string file = System.Web.HttpUtility.UrlDecode( attribute.Value.Substring( attribute.Value.LastIndexOf("/") + 1).ToLower() );
  23. if (!filesList.Contains(file))
  24. {
  25. filesList.Add(file);
  26. }
  27. }
  28. }
  29.  
  30. //Get a list of attachments
  31. foreach (XmlNode attribute in doc.SelectNodes("//a/@href"))
  32. {
  33. if (attribute.Value.Contains(documentID + "/"))
  34. {
  35. string file = System.Web.HttpUtility.UrlDecode( attribute.Value.Substring( attribute.Value.LastIndexOf("/") + 1).ToLower() );
  36. if (!filesList.Contains(file))
  37. {
  38. filesList.Add(file);
  39. }
  40. }
  41. }
  42.  
  43. //Delete unused files
  44. // ***IMPORTANT*** Change the path to where document folders are located on the hard drive
  45. string[] files = Directory.GetFiles(@"C:\Web Site\resources\" + documentID);
  46. foreach (string path in files)
  47. {
  48. if (!filesList.Contains(Path.GetFileName(path).ToLower()))
  49. {
  50. File.Delete(path);
  51. }
  52. }
  53. }

VBScript example

  1. Sub RemoveUnusedFiles(ByVal sContent, ByVal sDocumentID)
  2. Dim objDoc, objAttribute, dictFilesList, objFS, objFile, varParts, strFile
  3.  
  4. Set objDoc = CreateObject("MSXML2.DOMDocument.4.0")
  5. objDoc.async = False
  6. Set dictFilesList = CreateObject("Scripting.Dictionary")
  7. Set objFS = CreateObject("Scripting.FileSystemObject")
  8.  
  9. 'Content is an XML fragement, so you need to add a root element.
  10. objDoc.loadXML "<root>" & sContent & "</root>"
  11.  
  12. 'Get a list of images
  13. For Each objAttribute In objDoc.selectNodes("//img/@src")
  14. If InStr(1, objAttribute.Text, sDocumentID & "/") > 0 Then
  15. strFile = UrlDecode(Mid(objAttribute.Text, InStrRev(objAttribute.Text, "/") + 1))
  16. dictFilesList(LCase(strFile)) = ""
  17. End If
  18. Next
  19.  
  20. 'Get a list of attachments
  21. For Each objAttribute In objDoc.selectNodes("//a/@href")
  22. If InStr(1, objAttribute.Text, sDocumentID & "/") > 0 Then
  23. strFile = UrlDecode(Mid(objAttribute.Text, InStrRev(objAttribute.Text, "/") + 1))
  24. dictFilesList(LCase(strFile)) = ""
  25. End If
  26. Next
  27.  
  28. 'Delete unused files
  29. ' ***IMPORTANT*** Change the path to where document folders are located on the hard drive
  30. For Each objFile In objFS.getFolder("C:\Web Site\resources\" & sDocumentID).Files
  31. If Not dictFilesList.Exists(LCase(objFile.Name)) Then
  32. objFile.Delete
  33. End If
  34. Next
  35.  
  36. Set objDoc = Nothing
  37. Set objAttribute = Nothing
  38. Set dictFilesList = Nothing
  39. Set objFS = Nothing
  40. Set objFile = Nothing
  41. End Sub
  42.  
  43. Function UrlDecode(ByVal sText)
  44. Dim varParts, i
  45. varParts = Split(sText, "%")
  46. For i = 1 To UBound(varParts)
  47. varParts(i) = Chr(CInt("&H" & Left(varParts(i), 2))) & Mid(varParts(i), 3)
  48. Next
  49. UrlDecode = Replace(Join(varParts, ""), "+", " ")
  50. End Function

PHP example

  1. function remove_unused_files($content, $document_id) {
  2. $file_list = array();
  3. $files = array();
  4.  
  5. //Get a list of images
  6. $offset = 0;
  7. while (strpos($content, ' src="', $offset) !== false) {
  8. $start = strpos($content, ' src="', $offset);
  9. $finish = strpos($content, '"', $start + 6);
  10. $url = substr($content, $start + 6, $finish - $start - 6);
  11. if (strpos($url, $document_id . '/') !== false) {
  12. $file_name = urldecode(substr($url, strrpos($url, '/') + 1));
  13. $file_list[] = strtolower($file_name);
  14. }
  15. $offset = $finish;
  16. }
  17.  
  18. //Get a list of attachments
  19. $offset = 0;
  20. while (strpos($content, ' href="', $offset) !== false) {
  21. $start = strpos($content, ' href="', $offset);
  22. $finish = strpos($content, '"', $start + 7);
  23. $url = substr($content, $start + 7, $finish - $start - 7);
  24. if (strpos($url, $document_id . '/') !== false) {
  25. $file_name = urldecode(substr($url, strrpos($url, '/') + 1));
  26. $file_list[] = strtolower($file_name);
  27. }
  28. $offset = $finish;
  29. }
  30.  
  31. // ***IMPORTANT*** Change the path to where document folders are located on the hard drive
  32. $path = "C:/Web Site/resources/" . $document_id;
  33. if ($handle = @opendir($path)) {
  34. while (false !== ($file = @readdir($handle))) {
  35. if ($file != "." && $file != "..") {
  36. $files[] = $file;
  37. }
  38. }
  39. closedir($handle);
  40. }
  41.  
  42. //Delete unused files
  43. foreach($files as $file) {
  44. if (!in_array(strtolower($file), $file_list)) {
  45. @unlink($path . "/" . $file);
  46. }
  47. }
  48. }