I want to clean up the content of many text columns in the database which contain HTML code, generated by different wysiwyg editors.
In this case, I want to remove the align
attributes and append it to the style
attribute style='text-align: left|right|center'
in every paragraphs. I also want to be sure I don't delete the existing values of the style
attribute.
I decided for this reason I will use the DOMDocument.
I didn't find a way to simple append a new value to an existing attribute.
Let me know if there is a better or more performance wise way of doing it, without 3rd party library (like SQL queries or REGEXP).
$DOMDoc = new DOMDocument('1.0', 'UTF-8');
$DOMDoc->loadHTML(mb_convert_encoding($text, 'HTML-ENTITIES', 'UTF-8'), LIBXML_HTML_NODEFDTD | LIBXML_COMPACT | LIBXML_NOERROR | LIBXML_NOWARNING);
/** @var DOMElement $paragraph */
foreach($DOMDoc->getElementsByTagName('p') as $paragraph) {
moveAttributeToStyle($paragraph, 'align', 'text-align');
}
/**
* @param DOMElement $paragraph
* @param string $htmlAttrName
* @param string $cssAttrName
*/
function moveAttributeToStyle(&$paragraph, $htmlAttrName, $cssAttrName)
{
if ($paragraph->hasAttribute($htmlAttrName)) {
$htmlAttrValue = $paragraph->getAttribute($htmlAttrName);
$paragraph->removeAttribute($htmlAttrName);
if ($paragraph->hasAttribute('style')) {
$attributes = explode(';', $paragraph->getAttribute('style'));
$attributes[] = sprintf('%s: %s;', $cssAttrName, $htmlAttrValue);
$paragraph->setAttribute('style', implode('; ', array_filter(array_map('trim', $attributes))));
} else {
$paragraph->setAttribute('style', sprintf('%s: %s;', $cssAttrName, $htmlAttrValue));
}
}
}
$text = preg_replace('~<(?:!DOCTYPE|/?(?:html|head|body))[^>]*>\s*~i', '', $DOMDoc->saveHTML());