Deleting unused images
Overview
While editing a document using a WYSIWYG editor, content authors can use the editor to upload images to the server. The images are physically stored on the server and the WYSIWYG editor creates <img> elements that link to the uploaded images. However, when content authors delete images in the WYSIWYG editor, only the links to the images are removed. The images themselves remain on the server, leaving a large number of redundant, unused images.
In order to physically remove unused images from the server, the CMS must parse the content generated by the WYSIWYG editor and delete any images that are not being linked to.
The following examples assume that images (and attachments) for each document in the CMS are stored on the server in a folder named with the ID of the document. Therefore, the src attribute in the <img> element will contain the document ID. In the following example, "1000" is the ID of a document:
<img src="/resources/1000/headshot.jpg" alt="John Smith" />
C# example
using System;using System.Text;using System.IO;using System.Collections;using System.Xml;using System.Web;- ...
void RemoveUnusedFiles(string content, string documentID){ArrayList filesList = new ArrayList();XmlDocument doc = new XmlDocument();XmlNamespaceManager namespaceManager = new XmlNamespaceManager(doc.NameTable);-
//Content is an XML fragement, so you need to add a root element.doc.LoadXml("<root>" + content + "</root>");-
//Get a list of imagesforeach (XmlNode attribute in doc.SelectNodes("//img/@src")){if (attribute.Value.Contains(documentID + "/")){string file = System.Web.HttpUtility.UrlDecode( attribute.Value.Substring( attribute.Value.LastIndexOf("/") + 1).ToLower() );if (!filesList.Contains(file)){filesList.Add(file);}}}-
//Get a list of attachmentsforeach (XmlNode attribute in doc.SelectNodes("//a/@href")){if (attribute.Value.Contains(documentID + "/")){string file = System.Web.HttpUtility.UrlDecode( attribute.Value.Substring( attribute.Value.LastIndexOf("/") + 1).ToLower() );if (!filesList.Contains(file)){filesList.Add(file);}}}-
//Delete unused files// ***IMPORTANT*** Change the path to where document folders are located on the hard drivestring[] files = Directory.GetFiles(@"C:\Web Site\resources\" + documentID);foreach (string path in files){if (!filesList.Contains(Path.GetFileName(path).ToLower())){File.Delete(path);}}}
VBScript example
Sub RemoveUnusedFiles(ByVal sContent, ByVal sDocumentID)Dim objDoc, objAttribute, dictFilesList, objFS, objFile, varParts, strFile-
Set objDoc = CreateObject("MSXML2.DOMDocument.4.0")objDoc.async = FalseSet dictFilesList = CreateObject("Scripting.Dictionary")Set objFS = CreateObject("Scripting.FileSystemObject")-
'Content is an XML fragement, so you need to add a root element.objDoc.loadXML "<root>" & sContent & "</root>"-
'Get a list of imagesFor Each objAttribute In objDoc.selectNodes("//img/@src")If InStr(1, objAttribute.Text, sDocumentID & "/") > 0 ThenstrFile = UrlDecode(Mid(objAttribute.Text, InStrRev(objAttribute.Text, "/") + 1))dictFilesList(LCase(strFile)) = ""End IfNext-
'Get a list of attachmentsFor Each objAttribute In objDoc.selectNodes("//a/@href")If InStr(1, objAttribute.Text, sDocumentID & "/") > 0 ThenstrFile = UrlDecode(Mid(objAttribute.Text, InStrRev(objAttribute.Text, "/") + 1))dictFilesList(LCase(strFile)) = ""End IfNext-
'Delete unused files' ***IMPORTANT*** Change the path to where document folders are located on the hard driveFor Each objFile In objFS.getFolder("C:\Web Site\resources\" & sDocumentID).FilesIf Not dictFilesList.Exists(LCase(objFile.Name)) ThenobjFile.DeleteEnd IfNext-
Set objDoc = NothingSet objAttribute = NothingSet dictFilesList = NothingSet objFS = NothingSet objFile = NothingEnd Sub-
Function UrlDecode(ByVal sText)Dim varParts, ivarParts = Split(sText, "%")For i = 1 To UBound(varParts)varParts(i) = Chr(CInt("&H" & Left(varParts(i), 2))) & Mid(varParts(i), 3)NextUrlDecode = Replace(Join(varParts, ""), "+", " ")End Function
PHP example
function remove_unused_files($content, $document_id) {$file_list = array();$files = array();-
//Get a list of images$offset = 0;while (strpos($content, ' src="', $offset) !== false) {$start = strpos($content, ' src="', $offset);$finish = strpos($content, '"', $start + 6);$url = substr($content, $start + 6, $finish - $start - 6);if (strpos($url, $document_id . '/') !== false) {$file_name = urldecode(substr($url, strrpos($url, '/') + 1));$file_list[] = strtolower($file_name);}$offset = $finish;}-
//Get a list of attachments$offset = 0;while (strpos($content, ' href="', $offset) !== false) {$start = strpos($content, ' href="', $offset);$finish = strpos($content, '"', $start + 7);$url = substr($content, $start + 7, $finish - $start - 7);if (strpos($url, $document_id . '/') !== false) {$file_name = urldecode(substr($url, strrpos($url, '/') + 1));$file_list[] = strtolower($file_name);}$offset = $finish;}-
// ***IMPORTANT*** Change the path to where document folders are located on the hard drive$path = "C:/Web Site/resources/" . $document_id;if ($handle = @opendir($path)) {while (false !== ($file = @readdir($handle))) {if ($file != "." && $file != "..") {$files[] = $file;}}closedir($handle);}-
//Delete unused filesforeach($files as $file) {if (!in_array(strtolower($file), $file_list)) {@unlink($path . "/" . $file);}}}