Deleting unused images
Overview
While editing a document using a WYSIWYG editor, content authors can use the editor to upload images to the server. The images are physically stored on the server and the WYSIWYG editor creates <img>
elements that link to the uploaded images. However, when content authors delete images in the WYSIWYG editor, only the links to the images are removed. The images themselves remain on the server, leaving a large number of redundant, unused images.
In order to physically remove unused images from the server, the CMS must parse the content generated by the WYSIWYG editor and delete any images that are not being linked to.
The following examples assume that images (and attachments) for each document in the CMS are stored on the server in a folder named with the ID of the document. Therefore, the src
attribute in the <img>
element will contain the document ID. In the following example, "1000" is the ID of a document:
<img src="/resources/1000/headshot.jpg" alt="John Smith" />
C# example
using System;
using System.Text;
using System.IO;
using System.Collections;
using System.Xml;
using System.Web;
- ...
void RemoveUnusedFiles(string content, string documentID)
{
ArrayList filesList = new ArrayList();
XmlDocument doc = new XmlDocument();
XmlNamespaceManager namespaceManager = new XmlNamespaceManager(doc.NameTable);
-
//Content is an XML fragement, so you need to add a root element.
doc.LoadXml("<root>" + content + "</root>");
-
//Get a list of images
foreach (XmlNode attribute in doc.SelectNodes("//img/@src"))
{
if (attribute.Value.Contains(documentID + "/"))
{
string file = System.Web.HttpUtility.UrlDecode( attribute.Value.Substring( attribute.Value.LastIndexOf("/") + 1).ToLower() );
if (!filesList.Contains(file))
{
filesList.Add(file);
}
}
}
-
//Get a list of attachments
foreach (XmlNode attribute in doc.SelectNodes("//a/@href"))
{
if (attribute.Value.Contains(documentID + "/"))
{
string file = System.Web.HttpUtility.UrlDecode( attribute.Value.Substring( attribute.Value.LastIndexOf("/") + 1).ToLower() );
if (!filesList.Contains(file))
{
filesList.Add(file);
}
}
}
-
//Delete unused files
// ***IMPORTANT*** Change the path to where document folders are located on the hard drive
string[] files = Directory.GetFiles(@"C:\Web Site\resources\" + documentID);
foreach (string path in files)
{
if (!filesList.Contains(Path.GetFileName(path).ToLower()))
{
File.Delete(path);
}
}
}
VBScript example
Sub RemoveUnusedFiles(ByVal sContent, ByVal sDocumentID)
Dim objDoc, objAttribute, dictFilesList, objFS, objFile, varParts, strFile
-
Set objDoc = CreateObject("MSXML2.DOMDocument.4.0")
objDoc.async = False
Set dictFilesList = CreateObject("Scripting.Dictionary")
Set objFS = CreateObject("Scripting.FileSystemObject")
-
'Content is an XML fragement, so you need to add a root element.
objDoc.loadXML "<root>" & sContent & "</root>"
-
'Get a list of images
For Each objAttribute In objDoc.selectNodes("//img/@src")
If InStr(1, objAttribute.Text, sDocumentID & "/") > 0 Then
strFile = UrlDecode(Mid(objAttribute.Text, InStrRev(objAttribute.Text, "/") + 1))
dictFilesList(LCase(strFile)) = ""
End If
Next
-
'Get a list of attachments
For Each objAttribute In objDoc.selectNodes("//a/@href")
If InStr(1, objAttribute.Text, sDocumentID & "/") > 0 Then
strFile = UrlDecode(Mid(objAttribute.Text, InStrRev(objAttribute.Text, "/") + 1))
dictFilesList(LCase(strFile)) = ""
End If
Next
-
'Delete unused files
' ***IMPORTANT*** Change the path to where document folders are located on the hard drive
For Each objFile In objFS.getFolder("C:\Web Site\resources\" & sDocumentID).Files
If Not dictFilesList.Exists(LCase(objFile.Name)) Then
objFile.Delete
End If
Next
-
Set objDoc = Nothing
Set objAttribute = Nothing
Set dictFilesList = Nothing
Set objFS = Nothing
Set objFile = Nothing
End Sub
-
Function UrlDecode(ByVal sText)
Dim varParts, i
varParts = Split(sText, "%")
For i = 1 To UBound(varParts)
varParts(i) = Chr(CInt("&H" & Left(varParts(i), 2))) & Mid(varParts(i), 3)
Next
UrlDecode = Replace(Join(varParts, ""), "+", " ")
End Function
PHP example
function remove_unused_files($content, $document_id) {
$file_list = array();
$files = array();
-
//Get a list of images
$offset = 0;
while (strpos($content, ' src="', $offset) !== false) {
$start = strpos($content, ' src="', $offset);
$finish = strpos($content, '"', $start + 6);
$url = substr($content, $start + 6, $finish - $start - 6);
if (strpos($url, $document_id . '/') !== false) {
$file_name = urldecode(substr($url, strrpos($url, '/') + 1));
$file_list[] = strtolower($file_name);
}
$offset = $finish;
}
-
//Get a list of attachments
$offset = 0;
while (strpos($content, ' href="', $offset) !== false) {
$start = strpos($content, ' href="', $offset);
$finish = strpos($content, '"', $start + 7);
$url = substr($content, $start + 7, $finish - $start - 7);
if (strpos($url, $document_id . '/') !== false) {
$file_name = urldecode(substr($url, strrpos($url, '/') + 1));
$file_list[] = strtolower($file_name);
}
$offset = $finish;
}
-
// ***IMPORTANT*** Change the path to where document folders are located on the hard drive
$path = "C:/Web Site/resources/" . $document_id;
if ($handle = @opendir($path)) {
while (false !== ($file = @readdir($handle))) {
if ($file != "." && $file != "..") {
$files[] = $file;
}
}
closedir($handle);
}
-
//Delete unused files
foreach($files as $file) {
if (!in_array(strtolower($file), $file_list)) {
@unlink($path . "/" . $file);
}
}
}