File: /var/www/vhosts/uyarreklam.com.tr/httpdocs/pelago.tar
emogrifier/LICENSE 0000644 00000002054 15154122651 0007704 0 ustar 00 MIT License
Copyright (c) 2008-2018 Pelago
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
emogrifier/src/Caching/SimpleStringCache.php 0000644 00000004031 15154122651 0015074 0 ustar 00 <?php
declare(strict_types=1);
namespace Pelago\Emogrifier\Caching;
/**
* This cache caches string values with string keys. It is not PSR-6-compliant.
*
* Usage:
*
* ```php
* $cache = new SimpleStringCache();
* $cache->set($key, $value);
* …
* if ($cache->has($key) {
* $cachedValue = $cache->get($value);
* }
* ```
*
* @internal
*/
class SimpleStringCache
{
/**
* @var array<string, string>
*/
private $values = [];
/**
* Checks whether there is an entry stored for the given key.
*
* @param string $key the key to check; must not be empty
*
* @throws \InvalidArgumentException
*/
public function has(string $key): bool
{
$this->assertNotEmptyKey($key);
return isset($this->values[$key]);
}
/**
* Returns the entry stored for the given key, and throws an exception if the value does not exist
* (which helps keep the return type simple).
*
* @param string $key the key to of the item to retrieve; must not be empty
*
* @return string the retrieved value; may be empty
*
* @throws \BadMethodCallException
*/
public function get(string $key): string
{
if (!$this->has($key)) {
throw new \BadMethodCallException('You can only call `get` with a key for an existing value.', 1625996246);
}
return $this->values[$key];
}
/**
* Sets or overwrites an entry.
*
* @param string $key the key to of the item to set; must not be empty
* @param string $value the value to set; can be empty
*
* @throws \BadMethodCallException
*/
public function set(string $key, string $value): void
{
$this->assertNotEmptyKey($key);
$this->values[$key] = $value;
}
/**
* @throws \InvalidArgumentException
*/
private function assertNotEmptyKey(string $key): void
{
if ($key === '') {
throw new \InvalidArgumentException('Please provide a non-empty key.', 1625995840);
}
}
}
emogrifier/src/Css/CssDocument.php 0000644 00000014747 15154122651 0013172 0 ustar 00 <?php
declare(strict_types=1);
namespace Pelago\Emogrifier\Css;
use Sabberworm\CSS\CSSList\AtRuleBlockList as CssAtRuleBlockList;
use Sabberworm\CSS\CSSList\Document as SabberwormCssDocument;
use Sabberworm\CSS\Parser as CssParser;
use Sabberworm\CSS\Property\AtRule as CssAtRule;
use Sabberworm\CSS\Property\Charset as CssCharset;
use Sabberworm\CSS\Property\Import as CssImport;
use Sabberworm\CSS\Renderable as CssRenderable;
use Sabberworm\CSS\RuleSet\DeclarationBlock as CssDeclarationBlock;
use Sabberworm\CSS\RuleSet\RuleSet as CssRuleSet;
/**
* Parses and stores a CSS document from a string of CSS, and provides methods to obtain the CSS in parts or as data
* structures.
*
* @internal
*/
class CssDocument
{
/**
* @var SabberwormCssDocument
*/
private $sabberwormCssDocument;
/**
* `@import` rules must precede all other types of rules, except `@charset` rules. This property is used while
* rendering at-rules to enforce that.
*
* @var bool
*/
private $isImportRuleAllowed = true;
/**
* @param string $css
*/
public function __construct(string $css)
{
$cssParser = new CssParser($css);
/** @var SabberwormCssDocument $sabberwormCssDocument */
$sabberwormCssDocument = $cssParser->parse();
$this->sabberwormCssDocument = $sabberwormCssDocument;
}
/**
* Collates the media query, selectors and declarations for individual rules from the parsed CSS, in order.
*
* @param array<array-key, string> $allowedMediaTypes
*
* @return array<int, StyleRule>
*/
public function getStyleRulesData(array $allowedMediaTypes): array
{
$ruleMatches = [];
/** @var CssRenderable $rule */
foreach ($this->sabberwormCssDocument->getContents() as $rule) {
if ($rule instanceof CssAtRuleBlockList) {
$containingAtRule = $this->getFilteredAtIdentifierAndRule($rule, $allowedMediaTypes);
if (\is_string($containingAtRule)) {
/** @var CssRenderable $nestedRule */
foreach ($rule->getContents() as $nestedRule) {
if ($nestedRule instanceof CssDeclarationBlock) {
$ruleMatches[] = new StyleRule($nestedRule, $containingAtRule);
}
}
}
} elseif ($rule instanceof CssDeclarationBlock) {
$ruleMatches[] = new StyleRule($rule);
}
}
return $ruleMatches;
}
/**
* Renders at-rules from the parsed CSS that are valid and not conditional group rules (i.e. not rules such as
* `@media` which contain style rules whose data is returned by {@see getStyleRulesData}). Also does not render
* `@charset` rules; these are discarded (only UTF-8 is supported).
*
* @return string
*/
public function renderNonConditionalAtRules(): string
{
$this->isImportRuleAllowed = true;
/** @var array<int, CssRenderable> $cssContents */
$cssContents = $this->sabberwormCssDocument->getContents();
$atRules = \array_filter($cssContents, [$this, 'isValidAtRuleToRender']);
if ($atRules === []) {
return '';
}
$atRulesDocument = new SabberwormCssDocument();
$atRulesDocument->setContents($atRules);
/** @var string $renderedRules */
$renderedRules = $atRulesDocument->render();
return $renderedRules;
}
/**
* @param CssAtRuleBlockList $rule
* @param array<array-key, string> $allowedMediaTypes
*
* @return ?string
* If the nested at-rule is supported, it's opening declaration (e.g. "@media (max-width: 768px)") is
* returned; otherwise the return value is null.
*/
private function getFilteredAtIdentifierAndRule(CssAtRuleBlockList $rule, array $allowedMediaTypes): ?string
{
$result = null;
if ($rule->atRuleName() === 'media') {
/** @var string $mediaQueryList */
$mediaQueryList = $rule->atRuleArgs();
[$mediaType] = \explode('(', $mediaQueryList, 2);
if (\trim($mediaType) !== '') {
$escapedAllowedMediaTypes = \array_map(
static function (string $allowedMediaType): string {
return \preg_quote($allowedMediaType, '/');
},
$allowedMediaTypes
);
$mediaTypesMatcher = \implode('|', $escapedAllowedMediaTypes);
$isAllowed = \preg_match('/^\\s*+(?:only\\s++)?+(?:' . $mediaTypesMatcher . ')/i', $mediaType) > 0;
} else {
$isAllowed = true;
}
if ($isAllowed) {
$result = '@media ' . $mediaQueryList;
}
}
return $result;
}
/**
* Tests if a CSS rule is an at-rule that should be passed though and copied to a `<style>` element unmodified:
* - `@charset` rules are discarded - only UTF-8 is supported - `false` is returned;
* - `@import` rules are passed through only if they satisfy the specification ("user agents must ignore any
* '@import' rule that occurs inside a block or after any non-ignored statement other than an '@charset' or an
* '@import' rule");
* - `@media` rules are processed separately to see if their nested rules apply - `false` is returned;
* - `@font-face` rules are checked for validity - they must contain both a `src` and `font-family` property;
* - other at-rules are assumed to be valid and treated as a black box - `true` is returned.
*
* @param CssRenderable $rule
*
* @return bool
*/
private function isValidAtRuleToRender(CssRenderable $rule): bool
{
if ($rule instanceof CssCharset) {
return false;
}
if ($rule instanceof CssImport) {
return $this->isImportRuleAllowed;
}
$this->isImportRuleAllowed = false;
if (!$rule instanceof CssAtRule) {
return false;
}
switch ($rule->atRuleName()) {
case 'media':
$result = false;
break;
case 'font-face':
$result = $rule instanceof CssRuleSet
&& $rule->getRules('font-family') !== []
&& $rule->getRules('src') !== [];
break;
default:
$result = true;
}
return $result;
}
}
emogrifier/src/Css/StyleRule.php 0000644 00000004174 15154122651 0012664 0 ustar 00 <?php
declare(strict_types=1);
namespace Pelago\Emogrifier\Css;
use Sabberworm\CSS\Property\Selector;
use Sabberworm\CSS\RuleSet\DeclarationBlock;
/**
* This class represents a CSS style rule, including selectors, a declaration block, and an optional containing at-rule.
*
* @internal
*/
class StyleRule
{
/**
* @var DeclarationBlock
*/
private $declarationBlock;
/**
* @var string
*/
private $containingAtRule;
/**
* @param DeclarationBlock $declarationBlock
* @param string $containingAtRule e.g. `@media screen and (max-width: 480px)`
*/
public function __construct(DeclarationBlock $declarationBlock, string $containingAtRule = '')
{
$this->declarationBlock = $declarationBlock;
$this->containingAtRule = \trim($containingAtRule);
}
/**
* @return array<int, string> the selectors, e.g. `["h1", "p"]`
*/
public function getSelectors(): array
{
/** @var array<int, Selector> $selectors */
$selectors = $this->declarationBlock->getSelectors();
return \array_map(
static function (Selector $selector): string {
return (string)$selector;
},
$selectors
);
}
/**
* @return string the CSS declarations, separated and followed by a semicolon, e.g., `color: red; height: 4px;`
*/
public function getDeclarationAsText(): string
{
return \implode(' ', $this->declarationBlock->getRules());
}
/**
* Checks whether the declaration block has at least one declaration.
*/
public function hasAtLeastOneDeclaration(): bool
{
return $this->declarationBlock->getRules() !== [];
}
/**
* @returns string e.g. `@media screen and (max-width: 480px)`, or an empty string
*/
public function getContainingAtRule(): string
{
return $this->containingAtRule;
}
/**
* Checks whether the containing at-rule is non-empty and has any non-whitespace characters.
*/
public function hasContainingAtRule(): bool
{
return $this->getContainingAtRule() !== '';
}
}
emogrifier/src/CssInliner.php 0000644 00000123627 15154122651 0012262 0 ustar 00 <?php
declare(strict_types=1);
namespace Pelago\Emogrifier;
use Pelago\Emogrifier\Css\CssDocument;
use Pelago\Emogrifier\HtmlProcessor\AbstractHtmlProcessor;
use Pelago\Emogrifier\Utilities\CssConcatenator;
use Symfony\Component\CssSelector\CssSelectorConverter;
use Symfony\Component\CssSelector\Exception\ParseException;
/**
* This class provides functions for converting CSS styles into inline style attributes in your HTML code.
*/
class CssInliner extends AbstractHtmlProcessor
{
/**
* @var int
*/
private const CACHE_KEY_SELECTOR = 0;
/**
* @var int
*/
private const CACHE_KEY_CSS_DECLARATIONS_BLOCK = 1;
/**
* @var int
*/
private const CACHE_KEY_COMBINED_STYLES = 2;
/**
* Regular expression component matching a static pseudo class in a selector, without the preceding ":",
* for which the applicable elements can be determined (by converting the selector to an XPath expression).
* (Contains alternation without a group and is intended to be placed within a capturing, non-capturing or lookahead
* group, as appropriate for the usage context.)
*
* @var string
*/
private const PSEUDO_CLASS_MATCHER
= 'empty|(?:first|last|nth(?:-last)?+|only)-(?:child|of-type)|not\\([[:ascii:]]*\\)';
/**
* This regular expression componenet matches an `...of-type` pseudo class name, without the preceding ":". These
* pseudo-classes can currently online be inlined if they have an associated type in the selector expression.
*
* @var string
*/
private const OF_TYPE_PSEUDO_CLASS_MATCHER = '(?:first|last|nth(?:-last)?+|only)-of-type';
/**
* regular expression component to match a selector combinator
*
* @var string
*/
private const COMBINATOR_MATCHER = '(?:\\s++|\\s*+[>+~]\\s*+)(?=[[:alpha:]_\\-.#*:\\[])';
/**
* @var array<string, bool>
*/
private $excludedSelectors = [];
/**
* @var array<string, bool>
*/
private $allowedMediaTypes = ['all' => true, 'screen' => true, 'print' => true];
/**
* @var array{
* 0: array<string, int>,
* 1: array<string, array<string, string>>,
* 2: array<string, string>
* }
*/
private $caches = [
self::CACHE_KEY_SELECTOR => [],
self::CACHE_KEY_CSS_DECLARATIONS_BLOCK => [],
self::CACHE_KEY_COMBINED_STYLES => [],
];
/**
* @var ?CssSelectorConverter
*/
private $cssSelectorConverter = null;
/**
* the visited nodes with the XPath paths as array keys
*
* @var array<string, \DOMElement>
*/
private $visitedNodes = [];
/**
* the styles to apply to the nodes with the XPath paths as array keys for the outer array
* and the attribute names/values as key/value pairs for the inner array
*
* @var array<string, array<string, string>>
*/
private $styleAttributesForNodes = [];
/**
* Determines whether the "style" attributes of tags in the the HTML passed to this class should be preserved.
* If set to false, the value of the style attributes will be discarded.
*
* @var bool
*/
private $isInlineStyleAttributesParsingEnabled = true;
/**
* Determines whether the `<style>` blocks in the HTML passed to this class should be parsed.
*
* If set to true, the `<style>` blocks will be removed from the HTML and their contents will be applied to the HTML
* via inline styles.
*
* If set to false, the `<style>` blocks will be left as they are in the HTML.
*
* @var bool
*/
private $isStyleBlocksParsingEnabled = true;
/**
* For calculating selector precedence order.
* Keys are a regular expression part to match before a CSS name.
* Values are a multiplier factor per match to weight specificity.
*
* @var array<string, int>
*/
private $selectorPrecedenceMatchers = [
// IDs: worth 10000
'\\#' => 10000,
// classes, attributes, pseudo-classes (not pseudo-elements) except `:not`: worth 100
'(?:\\.|\\[|(?<!:):(?!not\\())' => 100,
// elements (not attribute values or `:not`), pseudo-elements: worth 1
'(?:(?<![="\':\\w\\-])|::)' => 1,
];
/**
* array of data describing CSS rules which apply to the document but cannot be inlined, in the format returned by
* {@see collateCssRules}
*
* @var array<array-key, array{
* media: string,
* selector: string,
* hasUnmatchablePseudo: bool,
* declarationsBlock: string,
* line: int
* }>|null
*/
private $matchingUninlinableCssRules = null;
/**
* Emogrifier will throw Exceptions when it encounters an error instead of silently ignoring them.
*
* @var bool
*/
private $debug = false;
/**
* Inlines the given CSS into the existing HTML.
*
* @param string $css the CSS to inline, must be UTF-8-encoded
*
* @return self fluent interface
*
* @throws ParseException in debug mode, if an invalid selector is encountered
* @throws \RuntimeException in debug mode, if an internal PCRE error occurs
*/
public function inlineCss(string $css = ''): self
{
$this->clearAllCaches();
$this->purgeVisitedNodes();
$this->normalizeStyleAttributesOfAllNodes();
$combinedCss = $css;
// grab any existing style blocks from the HTML and append them to the existing CSS
// (these blocks should be appended so as to have precedence over conflicting styles in the existing CSS)
if ($this->isStyleBlocksParsingEnabled) {
$combinedCss .= $this->getCssFromAllStyleNodes();
}
$parsedCss = new CssDocument($combinedCss);
$excludedNodes = $this->getNodesToExclude();
$cssRules = $this->collateCssRules($parsedCss);
$cssSelectorConverter = $this->getCssSelectorConverter();
foreach ($cssRules['inlinable'] as $cssRule) {
try {
$nodesMatchingCssSelectors = $this->getXPath()
->query($cssSelectorConverter->toXPath($cssRule['selector']));
/** @var \DOMElement $node */
foreach ($nodesMatchingCssSelectors as $node) {
if (\in_array($node, $excludedNodes, true)) {
continue;
}
$this->copyInlinableCssToStyleAttribute($node, $cssRule);
}
} catch (ParseException $e) {
if ($this->debug) {
throw $e;
}
}
}
if ($this->isInlineStyleAttributesParsingEnabled) {
$this->fillStyleAttributesWithMergedStyles();
}
$this->removeImportantAnnotationFromAllInlineStyles();
$this->determineMatchingUninlinableCssRules($cssRules['uninlinable']);
$this->copyUninlinableCssToStyleNode($parsedCss);
return $this;
}
/**
* Disables the parsing of inline styles.
*
* @return self fluent interface
*/
public function disableInlineStyleAttributesParsing(): self
{
$this->isInlineStyleAttributesParsingEnabled = false;
return $this;
}
/**
* Disables the parsing of `<style>` blocks.
*
* @return self fluent interface
*/
public function disableStyleBlocksParsing(): self
{
$this->isStyleBlocksParsingEnabled = false;
return $this;
}
/**
* Marks a media query type to keep.
*
* @param string $mediaName the media type name, e.g., "braille"
*
* @return self fluent interface
*/
public function addAllowedMediaType(string $mediaName): self
{
$this->allowedMediaTypes[$mediaName] = true;
return $this;
}
/**
* Drops a media query type from the allowed list.
*
* @param string $mediaName the tag name, e.g., "braille"
*
* @return self fluent interface
*/
public function removeAllowedMediaType(string $mediaName): self
{
if (isset($this->allowedMediaTypes[$mediaName])) {
unset($this->allowedMediaTypes[$mediaName]);
}
return $this;
}
/**
* Adds a selector to exclude nodes from emogrification.
*
* Any nodes that match the selector will not have their style altered.
*
* @param string $selector the selector to exclude, e.g., ".editor"
*
* @return self fluent interface
*/
public function addExcludedSelector(string $selector): self
{
$this->excludedSelectors[$selector] = true;
return $this;
}
/**
* No longer excludes the nodes matching this selector from emogrification.
*
* @param string $selector the selector to no longer exclude, e.g., ".editor"
*
* @return self fluent interface
*/
public function removeExcludedSelector(string $selector): self
{
if (isset($this->excludedSelectors[$selector])) {
unset($this->excludedSelectors[$selector]);
}
return $this;
}
/**
* Sets the debug mode.
*
* @param bool $debug set to true to enable debug mode
*
* @return self fluent interface
*/
public function setDebug(bool $debug): self
{
$this->debug = $debug;
return $this;
}
/**
* Gets the array of selectors present in the CSS provided to `inlineCss()` for which the declarations could not be
* applied as inline styles, but which may affect elements in the HTML. The relevant CSS will have been placed in a
* `<style>` element. The selectors may include those used within `@media` rules or those involving dynamic
* pseudo-classes (such as `:hover`) or pseudo-elements (such as `::after`).
*
* @return array<array-key, string>
*
* @throws \BadMethodCallException if `inlineCss` has not been called first
*/
public function getMatchingUninlinableSelectors(): array
{
return \array_column($this->getMatchingUninlinableCssRules(), 'selector');
}
/**
* @return array<array-key, array{
* media: string,
* selector: string,
* hasUnmatchablePseudo: bool,
* declarationsBlock: string,
* line: int
* }>
*
* @throws \BadMethodCallException if `inlineCss` has not been called first
*/
private function getMatchingUninlinableCssRules(): array
{
if (!\is_array($this->matchingUninlinableCssRules)) {
throw new \BadMethodCallException('inlineCss must be called first', 1568385221);
}
return $this->matchingUninlinableCssRules;
}
/**
* Clears all caches.
*/
private function clearAllCaches(): void
{
$this->caches = [
self::CACHE_KEY_SELECTOR => [],
self::CACHE_KEY_CSS_DECLARATIONS_BLOCK => [],
self::CACHE_KEY_COMBINED_STYLES => [],
];
}
/**
* Purges the visited nodes.
*/
private function purgeVisitedNodes(): void
{
$this->visitedNodes = [];
$this->styleAttributesForNodes = [];
}
/**
* Parses the document and normalizes all existing CSS attributes.
* This changes 'DISPLAY: none' to 'display: none'.
* We wouldn't have to do this if DOMXPath supported XPath 2.0.
* Also stores a reference of nodes with existing inline styles so we don't overwrite them.
*/
private function normalizeStyleAttributesOfAllNodes(): void
{
/** @var \DOMElement $node */
foreach ($this->getAllNodesWithStyleAttribute() as $node) {
if ($this->isInlineStyleAttributesParsingEnabled) {
$this->normalizeStyleAttributes($node);
}
// Remove style attribute in every case, so we can add them back (if inline style attributes
// parsing is enabled) to the end of the style list, thus keeping the right priority of CSS rules;
// else original inline style rules may remain at the beginning of the final inline style definition
// of a node, which may give not the desired results
$node->removeAttribute('style');
}
}
/**
* Returns a list with all DOM nodes that have a style attribute.
*
* @return \DOMNodeList
*
* @throws \RuntimeException
*/
private function getAllNodesWithStyleAttribute(): \DOMNodeList
{
$query = '//*[@style]';
$matches = $this->getXPath()->query($query);
if (!$matches instanceof \DOMNodeList) {
throw new \RuntimeException('XPatch query failed: ' . $query, 1618577797);
}
return $matches;
}
/**
* Normalizes the value of the "style" attribute and saves it.
*
* @param \DOMElement $node
*/
private function normalizeStyleAttributes(\DOMElement $node): void
{
$normalizedOriginalStyle = \preg_replace_callback(
'/-?+[_a-zA-Z][\\w\\-]*+(?=:)/S',
/** @param array<array-key, string> $propertyNameMatches */
static function (array $propertyNameMatches): string {
return \strtolower($propertyNameMatches[0]);
},
$node->getAttribute('style')
);
// In order to not overwrite existing style attributes in the HTML, we have to save the original HTML styles.
$nodePath = $node->getNodePath();
if (\is_string($nodePath) && !isset($this->styleAttributesForNodes[$nodePath])) {
$this->styleAttributesForNodes[$nodePath] = $this->parseCssDeclarationsBlock($normalizedOriginalStyle);
$this->visitedNodes[$nodePath] = $node;
}
$node->setAttribute('style', $normalizedOriginalStyle);
}
/**
* Parses a CSS declaration block into property name/value pairs.
*
* Example:
*
* The declaration block
*
* "color: #000; font-weight: bold;"
*
* will be parsed into the following array:
*
* "color" => "#000"
* "font-weight" => "bold"
*
* @param string $cssDeclarationsBlock the CSS declarations block without the curly braces, may be empty
*
* @return array<string, string>
* the CSS declarations with the property names as array keys and the property values as array values
*/
private function parseCssDeclarationsBlock(string $cssDeclarationsBlock): array
{
if (isset($this->caches[self::CACHE_KEY_CSS_DECLARATIONS_BLOCK][$cssDeclarationsBlock])) {
return $this->caches[self::CACHE_KEY_CSS_DECLARATIONS_BLOCK][$cssDeclarationsBlock];
}
$properties = [];
foreach (\preg_split('/;(?!base64|charset)/', $cssDeclarationsBlock) as $declaration) {
/** @var array<int, string> $matches */
$matches = [];
if (!\preg_match('/^([A-Za-z\\-]+)\\s*:\\s*(.+)$/s', \trim($declaration), $matches)) {
continue;
}
$propertyName = \strtolower($matches[1]);
$propertyValue = $matches[2];
$properties[$propertyName] = $propertyValue;
}
$this->caches[self::CACHE_KEY_CSS_DECLARATIONS_BLOCK][$cssDeclarationsBlock] = $properties;
return $properties;
}
/**
* Returns CSS content.
*
* @return string
*/
private function getCssFromAllStyleNodes(): string
{
$styleNodes = $this->getXPath()->query('//style');
if ($styleNodes === false) {
return '';
}
$css = '';
foreach ($styleNodes as $styleNode) {
$css .= "\n\n" . $styleNode->nodeValue;
$parentNode = $styleNode->parentNode;
if ($parentNode instanceof \DOMNode) {
$parentNode->removeChild($styleNode);
}
}
return $css;
}
/**
* Find the nodes that are not to be emogrified.
*
* @return array<int, \DOMElement>
*
* @throws ParseException
* @throws \UnexpectedValueException
*/
private function getNodesToExclude(): array
{
$excludedNodes = [];
foreach (\array_keys($this->excludedSelectors) as $selectorToExclude) {
try {
$matchingNodes = $this->getXPath()
->query($this->getCssSelectorConverter()->toXPath($selectorToExclude));
foreach ($matchingNodes as $node) {
if (!$node instanceof \DOMElement) {
$path = $node->getNodePath() ?? '$node';
throw new \UnexpectedValueException($path . ' is not a DOMElement.', 1617975914);
}
$excludedNodes[] = $node;
}
} catch (ParseException $e) {
if ($this->debug) {
throw $e;
}
}
}
return $excludedNodes;
}
/**
* @return CssSelectorConverter
*/
private function getCssSelectorConverter(): CssSelectorConverter
{
if (!$this->cssSelectorConverter instanceof CssSelectorConverter) {
$this->cssSelectorConverter = new CssSelectorConverter();
}
return $this->cssSelectorConverter;
}
/**
* Collates the individual rules from a `CssDocument` object.
*
* @param CssDocument $parsedCss
*
* @return array<string, array<array-key, array{
* media: string,
* selector: string,
* hasUnmatchablePseudo: bool,
* declarationsBlock: string,
* line: int
* }>>
* This 2-entry array has the key "inlinable" containing rules which can be inlined as `style` attributes
* and the key "uninlinable" containing rules which cannot. Each value is an array of sub-arrays with the
* following keys:
* - "media" (the media query string, e.g. "@media screen and (max-width: 480px)",
* or an empty string if not from a `@media` rule);
* - "selector" (the CSS selector, e.g., "*" or "header h1");
* - "hasUnmatchablePseudo" (`true` if that selector contains pseudo-elements or dynamic pseudo-classes such
* that the declarations cannot be applied inline);
* - "declarationsBlock" (the semicolon-separated CSS declarations for that selector,
* e.g., `color: red; height: 4px;`);
* - "line" (the line number, e.g. 42).
*/
private function collateCssRules(CssDocument $parsedCss): array
{
$matches = $parsedCss->getStyleRulesData(\array_keys($this->allowedMediaTypes));
$cssRules = [
'inlinable' => [],
'uninlinable' => [],
];
foreach ($matches as $key => $cssRule) {
if (!$cssRule->hasAtLeastOneDeclaration()) {
continue;
}
$mediaQuery = $cssRule->getContainingAtRule();
$declarationsBlock = $cssRule->getDeclarationAsText();
foreach ($cssRule->getSelectors() as $selector) {
// don't process pseudo-elements and behavioral (dynamic) pseudo-classes;
// only allow structural pseudo-classes
$hasPseudoElement = \strpos($selector, '::') !== false;
$hasUnmatchablePseudo = $hasPseudoElement || $this->hasUnsupportedPseudoClass($selector);
$parsedCssRule = [
'media' => $mediaQuery,
'selector' => $selector,
'hasUnmatchablePseudo' => $hasUnmatchablePseudo,
'declarationsBlock' => $declarationsBlock,
// keep track of where it appears in the file, since order is important
'line' => $key,
];
$ruleType = (!$cssRule->hasContainingAtRule() && !$hasUnmatchablePseudo) ? 'inlinable' : 'uninlinable';
$cssRules[$ruleType][] = $parsedCssRule;
}
}
\usort(
$cssRules['inlinable'],
/**
* @param array{selector: string, line: int} $first
* @param array{selector: string, line: int} $second
*/
function (array $first, array $second): int {
return $this->sortBySelectorPrecedence($first, $second);
}
);
return $cssRules;
}
/**
* Tests if a selector contains a pseudo-class which would mean it cannot be converted to an XPath expression for
* inlining CSS declarations.
*
* Any pseudo class that does not match {@see PSEUDO_CLASS_MATCHER} cannot be converted. Additionally, `...of-type`
* pseudo-classes cannot be converted if they are not associated with a type selector.
*
* @param string $selector
*
* @return bool
*/
private function hasUnsupportedPseudoClass(string $selector): bool
{
if (\preg_match('/:(?!' . self::PSEUDO_CLASS_MATCHER . ')[\\w\\-]/i', $selector)) {
return true;
}
if (!\preg_match('/:(?:' . self::OF_TYPE_PSEUDO_CLASS_MATCHER . ')/i', $selector)) {
return false;
}
foreach (\preg_split('/' . self::COMBINATOR_MATCHER . '/', $selector) as $selectorPart) {
if ($this->selectorPartHasUnsupportedOfTypePseudoClass($selectorPart)) {
return true;
}
}
return false;
}
/**
* Tests if part of a selector contains an `...of-type` pseudo-class such that it cannot be converted to an XPath
* expression.
*
* @param string $selectorPart part of a selector which has been split up at combinators
*
* @return bool `true` if the selector part does not have a type but does have an `...of-type` pseudo-class
*/
private function selectorPartHasUnsupportedOfTypePseudoClass(string $selectorPart): bool
{
if (\preg_match('/^[\\w\\-]/', $selectorPart)) {
return false;
}
return (bool)\preg_match('/:(?:' . self::OF_TYPE_PSEUDO_CLASS_MATCHER . ')/i', $selectorPart);
}
/**
* @param array{selector: string, line: int} $first
* @param array{selector: string, line: int} $second
*
* @return int
*/
private function sortBySelectorPrecedence(array $first, array $second): int
{
$precedenceOfFirst = $this->getCssSelectorPrecedence($first['selector']);
$precedenceOfSecond = $this->getCssSelectorPrecedence($second['selector']);
// We want these sorted in ascending order so selectors with lesser precedence get processed first and
// selectors with greater precedence get sorted last.
$precedenceForEquals = $first['line'] < $second['line'] ? -1 : 1;
$precedenceForNotEquals = $precedenceOfFirst < $precedenceOfSecond ? -1 : 1;
return ($precedenceOfFirst === $precedenceOfSecond) ? $precedenceForEquals : $precedenceForNotEquals;
}
/**
* @param string $selector
*
* @return int
*/
private function getCssSelectorPrecedence(string $selector): int
{
$selectorKey = \md5($selector);
if (isset($this->caches[self::CACHE_KEY_SELECTOR][$selectorKey])) {
return $this->caches[self::CACHE_KEY_SELECTOR][$selectorKey];
}
$precedence = 0;
foreach ($this->selectorPrecedenceMatchers as $matcher => $value) {
if (\trim($selector) === '') {
break;
}
$number = 0;
$selector = \preg_replace('/' . $matcher . '\\w+/', '', $selector, -1, $number);
$precedence += ($value * (int)$number);
}
$this->caches[self::CACHE_KEY_SELECTOR][$selectorKey] = $precedence;
return $precedence;
}
/**
* Copies $cssRule into the style attribute of $node.
*
* Note: This method does not check whether $cssRule matches $node.
*
* @param \DOMElement $node
* @param array{
* media: string,
* selector: string,
* hasUnmatchablePseudo: bool,
* declarationsBlock: string,
* line: int
* } $cssRule
*/
private function copyInlinableCssToStyleAttribute(\DOMElement $node, array $cssRule): void
{
$declarationsBlock = $cssRule['declarationsBlock'];
$newStyleDeclarations = $this->parseCssDeclarationsBlock($declarationsBlock);
if ($newStyleDeclarations === []) {
return;
}
// if it has a style attribute, get it, process it, and append (overwrite) new stuff
if ($node->hasAttribute('style')) {
// break it up into an associative array
$oldStyleDeclarations = $this->parseCssDeclarationsBlock($node->getAttribute('style'));
} else {
$oldStyleDeclarations = [];
}
$node->setAttribute(
'style',
$this->generateStyleStringFromDeclarationsArrays($oldStyleDeclarations, $newStyleDeclarations)
);
}
/**
* This method merges old or existing name/value array with new name/value array
* and then generates a string of the combined style suitable for placing inline.
* This becomes the single point for CSS string generation allowing for consistent
* CSS output no matter where the CSS originally came from.
*
* @param array<string, string> $oldStyles
* @param array<string, string> $newStyles
*
* @return string
*/
private function generateStyleStringFromDeclarationsArrays(array $oldStyles, array $newStyles): string
{
$cacheKey = \serialize([$oldStyles, $newStyles]);
if (isset($this->caches[self::CACHE_KEY_COMBINED_STYLES][$cacheKey])) {
return $this->caches[self::CACHE_KEY_COMBINED_STYLES][$cacheKey];
}
// Unset the overridden styles to preserve order, important if shorthand and individual properties are mixed
foreach ($oldStyles as $attributeName => $attributeValue) {
if (!isset($newStyles[$attributeName])) {
continue;
}
$newAttributeValue = $newStyles[$attributeName];
if (
$this->attributeValueIsImportant($attributeValue)
&& !$this->attributeValueIsImportant($newAttributeValue)
) {
unset($newStyles[$attributeName]);
} else {
unset($oldStyles[$attributeName]);
}
}
$combinedStyles = \array_merge($oldStyles, $newStyles);
$style = '';
foreach ($combinedStyles as $attributeName => $attributeValue) {
$style .= \strtolower(\trim($attributeName)) . ': ' . \trim($attributeValue) . '; ';
}
$trimmedStyle = \rtrim($style);
$this->caches[self::CACHE_KEY_COMBINED_STYLES][$cacheKey] = $trimmedStyle;
return $trimmedStyle;
}
/**
* Checks whether $attributeValue is marked as !important.
*
* @param string $attributeValue
*
* @return bool
*/
private function attributeValueIsImportant(string $attributeValue): bool
{
return (bool)\preg_match('/!\\s*+important$/i', $attributeValue);
}
/**
* Merges styles from styles attributes and style nodes and applies them to the attribute nodes
*/
private function fillStyleAttributesWithMergedStyles(): void
{
foreach ($this->styleAttributesForNodes as $nodePath => $styleAttributesForNode) {
$node = $this->visitedNodes[$nodePath];
$currentStyleAttributes = $this->parseCssDeclarationsBlock($node->getAttribute('style'));
$node->setAttribute(
'style',
$this->generateStyleStringFromDeclarationsArrays(
$currentStyleAttributes,
$styleAttributesForNode
)
);
}
}
/**
* Searches for all nodes with a style attribute and removes the "!important" annotations out of
* the inline style declarations, eventually by rearranging declarations.
*
* @throws \RuntimeException
*/
private function removeImportantAnnotationFromAllInlineStyles(): void
{
/** @var \DOMElement $node */
foreach ($this->getAllNodesWithStyleAttribute() as $node) {
$this->removeImportantAnnotationFromNodeInlineStyle($node);
}
}
/**
* Removes the "!important" annotations out of the inline style declarations,
* eventually by rearranging declarations.
* Rearranging needed when !important shorthand properties are followed by some of their
* not !important expanded-version properties.
* For example "font: 12px serif !important; font-size: 13px;" must be reordered
* to "font-size: 13px; font: 12px serif;" in order to remain correct.
*
* @param \DOMElement $node
*
* @throws \RuntimeException
*/
private function removeImportantAnnotationFromNodeInlineStyle(\DOMElement $node): void
{
$inlineStyleDeclarations = $this->parseCssDeclarationsBlock($node->getAttribute('style'));
/** @var array<string, string> $regularStyleDeclarations */
$regularStyleDeclarations = [];
/** @var array<string, string> $importantStyleDeclarations */
$importantStyleDeclarations = [];
foreach ($inlineStyleDeclarations as $property => $value) {
if ($this->attributeValueIsImportant($value)) {
$importantStyleDeclarations[$property] = $this->pregReplace('/\\s*+!\\s*+important$/i', '', $value);
} else {
$regularStyleDeclarations[$property] = $value;
}
}
$inlineStyleDeclarationsInNewOrder = \array_merge($regularStyleDeclarations, $importantStyleDeclarations);
$node->setAttribute(
'style',
$this->generateStyleStringFromSingleDeclarationsArray($inlineStyleDeclarationsInNewOrder)
);
}
/**
* Generates a CSS style string suitable to be used inline from the $styleDeclarations property => value array.
*
* @param array<string, string> $styleDeclarations
*
* @return string
*/
private function generateStyleStringFromSingleDeclarationsArray(array $styleDeclarations): string
{
return $this->generateStyleStringFromDeclarationsArrays([], $styleDeclarations);
}
/**
* Determines which of `$cssRules` actually apply to `$this->domDocument`, and sets them in
* `$this->matchingUninlinableCssRules`.
*
* @param array<array-key, array{
* media: string,
* selector: string,
* hasUnmatchablePseudo: bool,
* declarationsBlock: string,
* line: int
* }> $cssRules
* the "uninlinable" array of CSS rules returned by `collateCssRules`
*/
private function determineMatchingUninlinableCssRules(array $cssRules): void
{
$this->matchingUninlinableCssRules = \array_filter(
$cssRules,
function (array $cssRule): bool {
return $this->existsMatchForSelectorInCssRule($cssRule);
}
);
}
/**
* Checks whether there is at least one matching element for the CSS selector contained in the `selector` element
* of the provided CSS rule.
*
* Any dynamic pseudo-classes will be assumed to apply. If the selector matches a pseudo-element,
* it will test for a match with its originating element.
*
* @param array{
* media: string,
* selector: string,
* hasUnmatchablePseudo: bool,
* declarationsBlock: string,
* line: int
* } $cssRule
*
* @return bool
*
* @throws ParseException
*/
private function existsMatchForSelectorInCssRule(array $cssRule): bool
{
$selector = $cssRule['selector'];
if ($cssRule['hasUnmatchablePseudo']) {
$selector = $this->removeUnmatchablePseudoComponents($selector);
}
return $this->existsMatchForCssSelector($selector);
}
/**
* Checks whether there is at least one matching element for $cssSelector.
* When not in debug mode, it returns true also for invalid selectors (because they may be valid,
* just not implemented/recognized yet by Emogrifier).
*
* @param string $cssSelector
*
* @return bool
*
* @throws ParseException
*/
private function existsMatchForCssSelector(string $cssSelector): bool
{
try {
$nodesMatchingSelector = $this->getXPath()->query($this->getCssSelectorConverter()->toXPath($cssSelector));
} catch (ParseException $e) {
if ($this->debug) {
throw $e;
}
return true;
}
return $nodesMatchingSelector !== false && $nodesMatchingSelector->length !== 0;
}
/**
* Removes pseudo-elements and dynamic pseudo-classes from a CSS selector, replacing them with "*" if necessary.
* If such a pseudo-component is within the argument of `:not`, the entire `:not` component is removed or replaced.
*
* @param string $selector
*
* @return string
* selector which will match the relevant DOM elements if the pseudo-classes are assumed to apply, or in the
* case of pseudo-elements will match their originating element
*/
private function removeUnmatchablePseudoComponents(string $selector): string
{
// The regex allows nested brackets via `(?2)`.
// A space is temporarily prepended because the callback can't determine if the match was at the very start.
$selectorWithoutNots = \ltrim(\preg_replace_callback(
'/([\\s>+~]?+):not(\\([^()]*+(?:(?2)[^()]*+)*+\\))/i',
/** @param array<array-key, string> $matches */
function (array $matches): string {
return $this->replaceUnmatchableNotComponent($matches);
},
' ' . $selector
));
$selectorWithoutUnmatchablePseudoComponents = $this->removeSelectorComponents(
':(?!' . self::PSEUDO_CLASS_MATCHER . '):?+[\\w\\-]++(?:\\([^\\)]*+\\))?+',
$selectorWithoutNots
);
if (
!\preg_match(
'/:(?:' . self::OF_TYPE_PSEUDO_CLASS_MATCHER . ')/i',
$selectorWithoutUnmatchablePseudoComponents
)
) {
return $selectorWithoutUnmatchablePseudoComponents;
}
return \implode('', \array_map(
function (string $selectorPart): string {
return $this->removeUnsupportedOfTypePseudoClasses($selectorPart);
},
\preg_split(
'/(' . self::COMBINATOR_MATCHER . ')/',
$selectorWithoutUnmatchablePseudoComponents,
-1,
PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY
)
));
}
/**
* Helps `removeUnmatchablePseudoComponents()` replace or remove a selector `:not(...)` component if its argument
* contains pseudo-elements or dynamic pseudo-classes.
*
* @param array<array-key, string> $matches array of elements matched by the regular expression
*
* @return string
* the full match if there were no unmatchable pseudo components within; otherwise, any preceding combinator
* followed by "*", or an empty string if there was no preceding combinator
*/
private function replaceUnmatchableNotComponent(array $matches): string
{
[$notComponentWithAnyPrecedingCombinator, $anyPrecedingCombinator, $notArgumentInBrackets] = $matches;
if ($this->hasUnsupportedPseudoClass($notArgumentInBrackets)) {
return $anyPrecedingCombinator !== '' ? $anyPrecedingCombinator . '*' : '';
}
return $notComponentWithAnyPrecedingCombinator;
}
/**
* Removes components from a CSS selector, replacing them with "*" if necessary.
*
* @param string $matcher regular expression part to match the components to remove
* @param string $selector
*
* @return string
* selector which will match the relevant DOM elements if the removed components are assumed to apply (or in
* the case of pseudo-elements will match their originating element)
*/
private function removeSelectorComponents(string $matcher, string $selector): string
{
return \preg_replace(
['/([\\s>+~]|^)' . $matcher . '/i', '/' . $matcher . '/i'],
['$1*', ''],
$selector
);
}
/**
* Removes any `...-of-type` pseudo-classes from part of a CSS selector, if it does not have a type, replacing them
* with "*" if necessary.
*
* @param string $selectorPart part of a selector which has been split up at combinators
*
* @return string
* selector part which will match the relevant DOM elements if the pseudo-classes are assumed to apply
*/
private function removeUnsupportedOfTypePseudoClasses(string $selectorPart): string
{
if (!$this->selectorPartHasUnsupportedOfTypePseudoClass($selectorPart)) {
return $selectorPart;
}
return $this->removeSelectorComponents(
':(?:' . self::OF_TYPE_PSEUDO_CLASS_MATCHER . ')(?:\\([^\\)]*+\\))?+',
$selectorPart
);
}
/**
* Applies `$this->matchingUninlinableCssRules` to `$this->domDocument` by placing them as CSS in a `<style>`
* element.
* If there are no uninlinable CSS rules to copy there, a `<style>` element will be created containing only the
* applicable at-rules from `$parsedCss`.
* If there are none of either, an empty `<style>` element will not be created.
*
* @param CssDocument $parsedCss
* This may contain various at-rules whose content `CssInliner` does not currently attempt to inline or
* process in any other way, such as `@import`, `@font-face`, `@keyframes`, etc., and which should precede
* the processed but found-to-be-uninlinable CSS placed in the `<style>` element.
* Note that `CssInliner` processes `@media` rules so that they can be ordered correctly with respect to
* other uninlinable rules; these will not be duplicated from `$parsedCss`.
*/
private function copyUninlinableCssToStyleNode(CssDocument $parsedCss): void
{
$css = $parsedCss->renderNonConditionalAtRules();
// avoid including unneeded class dependency if there are no rules
if ($this->getMatchingUninlinableCssRules() !== []) {
$cssConcatenator = new CssConcatenator();
foreach ($this->getMatchingUninlinableCssRules() as $cssRule) {
$cssConcatenator->append([$cssRule['selector']], $cssRule['declarationsBlock'], $cssRule['media']);
}
$css .= $cssConcatenator->getCss();
}
// avoid adding empty style element
if ($css !== '') {
$this->addStyleElementToDocument($css);
}
}
/**
* Adds a style element with $css to $this->domDocument.
*
* This method is protected to allow overriding.
*
* @see https://github.com/MyIntervals/emogrifier/issues/103
*
* @param string $css
*/
protected function addStyleElementToDocument(string $css): void
{
$domDocument = $this->getDomDocument();
$styleElement = $domDocument->createElement('style', $css);
$styleAttribute = $domDocument->createAttribute('type');
$styleAttribute->value = 'text/css';
$styleElement->appendChild($styleAttribute);
$headElement = $this->getHeadElement();
$headElement->appendChild($styleElement);
}
/**
* Returns the HEAD element.
*
* This method assumes that there always is a HEAD element.
*
* @return \DOMElement
*
* @throws \UnexpectedValueException
*/
private function getHeadElement(): \DOMElement
{
$node = $this->getDomDocument()->getElementsByTagName('head')->item(0);
if (!$node instanceof \DOMElement) {
throw new \UnexpectedValueException('There is no HEAD element. This should never happen.', 1617923227);
}
return $node;
}
/**
* Wraps `preg_replace`. If an error occurs (which is highly unlikely), either it is logged and the original
* `$subject` is returned, or in debug mode an exception is thrown.
*
* This method only supports strings, not arrays of strings.
*
* @param string $pattern
* @param string $replacement
* @param string $subject
*
* @return string
*
* @throws \RuntimeException
*/
private function pregReplace(string $pattern, string $replacement, string $subject): string
{
$result = \preg_replace($pattern, $replacement, $subject);
if (!\is_string($result)) {
$this->logOrThrowPregLastError();
$result = $subject;
}
return $result;
}
/**
* Obtains the name of the error constant for `preg_last_error` (based on code posted at
* {@see https://www.php.net/manual/en/function.preg-last-error.php#124124}) and puts it into an error message
* which is either passed to `trigger_error` (in non-debug mode) or an exception which is thrown (in debug mode).
*
* @throws \RuntimeException
*/
private function logOrThrowPregLastError(): void
{
$pcreConstants = \get_defined_constants(true)['pcre'];
$pcreErrorConstantNames = \array_flip(\array_filter(
$pcreConstants,
static function (string $key): bool {
return \substr($key, -6) === '_ERROR';
},
ARRAY_FILTER_USE_KEY
));
$pregLastError = \preg_last_error();
$message = 'PCRE regex execution error `' . (string)($pcreErrorConstantNames[$pregLastError] ?? $pregLastError)
. '`';
if ($this->debug) {
throw new \RuntimeException($message, 1592870147);
}
\trigger_error($message);
}
}
emogrifier/src/HtmlProcessor/AbstractHtmlProcessor.php 0000644 00000035420 15154122651 0017276 0 ustar 00 <?php
declare(strict_types=1);
namespace Pelago\Emogrifier\HtmlProcessor;
/**
* Base class for HTML processor that e.g., can remove, add or modify nodes or attributes.
*
* The "vanilla" subclass is the HtmlNormalizer.
*
* @psalm-consistent-constructor
*/
abstract class AbstractHtmlProcessor
{
/**
* @var string
*/
protected const DEFAULT_DOCUMENT_TYPE = '<!DOCTYPE html>';
/**
* @var string
*/
protected const CONTENT_TYPE_META_TAG = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">';
/**
* @var string Regular expression part to match tag names that PHP's DOMDocument implementation is not aware are
* self-closing. These are mostly HTML5 elements, but for completeness <command> (obsolete) and <keygen>
* (deprecated) are also included.
*
* @see https://bugs.php.net/bug.php?id=73175
*/
protected const PHP_UNRECOGNIZED_VOID_TAGNAME_MATCHER = '(?:command|embed|keygen|source|track|wbr)';
/**
* Regular expression part to match tag names that may appear before the start of the `<body>` element. A start tag
* for any other element would implicitly start the `<body>` element due to tag omission rules.
*
* @var string
*/
protected const TAGNAME_ALLOWED_BEFORE_BODY_MATCHER
= '(?:html|head|base|command|link|meta|noscript|script|style|template|title)';
/**
* regular expression pattern to match an HTML comment, including delimiters and modifiers
*
* @var string
*/
protected const HTML_COMMENT_PATTERN = '/<!--[^-]*+(?:-(?!->)[^-]*+)*+(?:-->|$)/';
/**
* regular expression pattern to match an HTML `<template>` element, including delimiters and modifiers
*
* @var string
*/
protected const HTML_TEMPLATE_ELEMENT_PATTERN
= '%<template[\\s>][^<]*+(?:<(?!/template>)[^<]*+)*+(?:</template>|$)%i';
/**
* @var ?\DOMDocument
*/
protected $domDocument = null;
/**
* @var ?\DOMXPath
*/
private $xPath = null;
/**
* The constructor.
*
* Please use `::fromHtml` or `::fromDomDocument` instead.
*/
private function __construct()
{
}
/**
* Builds a new instance from the given HTML.
*
* @param string $unprocessedHtml raw HTML, must be UTF-encoded, must not be empty
*
* @return static
*
* @throws \InvalidArgumentException if $unprocessedHtml is anything other than a non-empty string
*/
public static function fromHtml(string $unprocessedHtml): self
{
if ($unprocessedHtml === '') {
throw new \InvalidArgumentException('The provided HTML must not be empty.', 1515763647);
}
$instance = new static();
$instance->setHtml($unprocessedHtml);
return $instance;
}
/**
* Builds a new instance from the given DOM document.
*
* @param \DOMDocument $document a DOM document returned by getDomDocument() of another instance
*
* @return static
*/
public static function fromDomDocument(\DOMDocument $document): self
{
$instance = new static();
$instance->setDomDocument($document);
return $instance;
}
/**
* Sets the HTML to process.
*
* @param string $html the HTML to process, must be UTF-8-encoded
*/
private function setHtml(string $html): void
{
$this->createUnifiedDomDocument($html);
}
/**
* Provides access to the internal DOMDocument representation of the HTML in its current state.
*
* @return \DOMDocument
*
* @throws \UnexpectedValueException
*/
public function getDomDocument(): \DOMDocument
{
if (!$this->domDocument instanceof \DOMDocument) {
$message = self::class . '::setDomDocument() has not yet been called on ' . static::class;
throw new \UnexpectedValueException($message, 1570472239);
}
return $this->domDocument;
}
/**
* @param \DOMDocument $domDocument
*/
private function setDomDocument(\DOMDocument $domDocument): void
{
$this->domDocument = $domDocument;
$this->xPath = new \DOMXPath($this->domDocument);
}
/**
* @return \DOMXPath
*
* @throws \UnexpectedValueException
*/
protected function getXPath(): \DOMXPath
{
if (!$this->xPath instanceof \DOMXPath) {
$message = self::class . '::setDomDocument() has not yet been called on ' . static::class;
throw new \UnexpectedValueException($message, 1617819086);
}
return $this->xPath;
}
/**
* Renders the normalized and processed HTML.
*
* @return string
*/
public function render(): string
{
$htmlWithPossibleErroneousClosingTags = $this->getDomDocument()->saveHTML();
return $this->removeSelfClosingTagsClosingTags($htmlWithPossibleErroneousClosingTags);
}
/**
* Renders the content of the BODY element of the normalized and processed HTML.
*
* @return string
*/
public function renderBodyContent(): string
{
$htmlWithPossibleErroneousClosingTags = $this->getDomDocument()->saveHTML($this->getBodyElement());
$bodyNodeHtml = $this->removeSelfClosingTagsClosingTags($htmlWithPossibleErroneousClosingTags);
return \preg_replace('%</?+body(?:\\s[^>]*+)?+>%', '', $bodyNodeHtml);
}
/**
* Eliminates any invalid closing tags for void elements from the given HTML.
*
* @param string $html
*
* @return string
*/
private function removeSelfClosingTagsClosingTags(string $html): string
{
return \preg_replace('%</' . self::PHP_UNRECOGNIZED_VOID_TAGNAME_MATCHER . '>%', '', $html);
}
/**
* Returns the BODY element.
*
* This method assumes that there always is a BODY element.
*
* @return \DOMElement
*
* @throws \RuntimeException
*/
private function getBodyElement(): \DOMElement
{
$node = $this->getDomDocument()->getElementsByTagName('body')->item(0);
if (!$node instanceof \DOMElement) {
throw new \RuntimeException('There is no body element.', 1617922607);
}
return $node;
}
/**
* Creates a DOM document from the given HTML and stores it in $this->domDocument.
*
* The DOM document will always have a BODY element and a document type.
*
* @param string $html
*/
private function createUnifiedDomDocument(string $html): void
{
$this->createRawDomDocument($html);
$this->ensureExistenceOfBodyElement();
}
/**
* Creates a DOMDocument instance from the given HTML and stores it in $this->domDocument.
*
* @param string $html
*/
private function createRawDomDocument(string $html): void
{
$domDocument = new \DOMDocument();
$domDocument->strictErrorChecking = false;
$domDocument->formatOutput = true;
$libXmlState = \libxml_use_internal_errors(true);
$domDocument->loadHTML($this->prepareHtmlForDomConversion($html));
\libxml_clear_errors();
\libxml_use_internal_errors($libXmlState);
$this->setDomDocument($domDocument);
}
/**
* Returns the HTML with added document type, Content-Type meta tag, and self-closing slashes, if needed,
* ensuring that the HTML will be good for creating a DOM document from it.
*
* @param string $html
*
* @return string the unified HTML
*/
private function prepareHtmlForDomConversion(string $html): string
{
$htmlWithSelfClosingSlashes = $this->ensurePhpUnrecognizedSelfClosingTagsAreXml($html);
$htmlWithDocumentType = $this->ensureDocumentType($htmlWithSelfClosingSlashes);
return $this->addContentTypeMetaTag($htmlWithDocumentType);
}
/**
* Makes sure that the passed HTML has a document type, with lowercase "html".
*
* @param string $html
*
* @return string HTML with document type
*/
private function ensureDocumentType(string $html): string
{
$hasDocumentType = \stripos($html, '<!DOCTYPE') !== false;
if ($hasDocumentType) {
return $this->normalizeDocumentType($html);
}
return self::DEFAULT_DOCUMENT_TYPE . $html;
}
/**
* Makes sure the document type in the passed HTML has lowercase "html".
*
* @param string $html
*
* @return string HTML with normalized document type
*/
private function normalizeDocumentType(string $html): string
{
// Limit to replacing the first occurrence: as an optimization; and in case an example exists as unescaped text.
return \preg_replace(
'/<!DOCTYPE\\s++html(?=[\\s>])/i',
'<!DOCTYPE html',
$html,
1
);
}
/**
* Adds a Content-Type meta tag for the charset.
*
* This method also ensures that there is a HEAD element.
*
* @param string $html
*
* @return string the HTML with the meta tag added
*/
private function addContentTypeMetaTag(string $html): string
{
if ($this->hasContentTypeMetaTagInHead($html)) {
return $html;
}
// We are trying to insert the meta tag to the right spot in the DOM.
// If we just prepended it to the HTML, we would lose attributes set to the HTML tag.
$hasHeadTag = \preg_match('/<head[\\s>]/i', $html);
$hasHtmlTag = \stripos($html, '<html') !== false;
if ($hasHeadTag) {
$reworkedHtml = \preg_replace(
'/<head(?=[\\s>])([^>]*+)>/i',
'<head$1>' . self::CONTENT_TYPE_META_TAG,
$html
);
} elseif ($hasHtmlTag) {
$reworkedHtml = \preg_replace(
'/<html(.*?)>/is',
'<html$1><head>' . self::CONTENT_TYPE_META_TAG . '</head>',
$html
);
} else {
$reworkedHtml = self::CONTENT_TYPE_META_TAG . $html;
}
return $reworkedHtml;
}
/**
* Tests whether the given HTML has a valid `Content-Type` metadata element within the `<head>` element. Due to tag
* omission rules, HTML parsers are expected to end the `<head>` element and start the `<body>` element upon
* encountering a start tag for any element which is permitted only within the `<body>`.
*
* @param string $html
*
* @return bool
*/
private function hasContentTypeMetaTagInHead(string $html): bool
{
\preg_match('%^.*?(?=<meta(?=\\s)[^>]*\\shttp-equiv=(["\']?+)Content-Type\\g{-1}[\\s/>])%is', $html, $matches);
if (isset($matches[0])) {
$htmlBefore = $matches[0];
try {
$hasContentTypeMetaTagInHead = !$this->hasEndOfHeadElement($htmlBefore);
} catch (\RuntimeException $exception) {
// If something unexpected occurs, assume the `Content-Type` that was found is valid.
\trigger_error($exception->getMessage());
$hasContentTypeMetaTagInHead = true;
}
} else {
$hasContentTypeMetaTagInHead = false;
}
return $hasContentTypeMetaTagInHead;
}
/**
* Tests whether the `<head>` element ends within the given HTML. Due to tag omission rules, HTML parsers are
* expected to end the `<head>` element and start the `<body>` element upon encountering a start tag for any element
* which is permitted only within the `<body>`.
*
* @param string $html
*
* @return bool
*
* @throws \RuntimeException
*/
private function hasEndOfHeadElement(string $html): bool
{
$headEndTagMatchCount
= \preg_match('%<(?!' . self::TAGNAME_ALLOWED_BEFORE_BODY_MATCHER . '[\\s/>])\\w|</head>%i', $html);
if (\is_int($headEndTagMatchCount) && $headEndTagMatchCount > 0) {
// An exception to the implicit end of the `<head>` is any content within a `<template>` element, as well in
// comments. As an optimization, this is only checked for if a potential `<head>` end tag is found.
$htmlWithoutCommentsOrTemplates = $this->removeHtmlTemplateElements($this->removeHtmlComments($html));
$hasEndOfHeadElement = $htmlWithoutCommentsOrTemplates === $html
|| $this->hasEndOfHeadElement($htmlWithoutCommentsOrTemplates);
} else {
$hasEndOfHeadElement = false;
}
return $hasEndOfHeadElement;
}
/**
* Removes comments from the given HTML, including any which are unterminated, for which the remainder of the string
* is removed.
*
* @param string $html
*
* @return string
*
* @throws \RuntimeException
*/
private function removeHtmlComments(string $html): string
{
$result = \preg_replace(self::HTML_COMMENT_PATTERN, '', $html);
if (!\is_string($result)) {
throw new \RuntimeException('Internal PCRE error', 1616521475);
}
return $result;
}
/**
* Removes `<template>` elements from the given HTML, including any without an end tag, for which the remainder of
* the string is removed.
*
* @param string $html
*
* @return string
*
* @throws \RuntimeException
*/
private function removeHtmlTemplateElements(string $html): string
{
$result = \preg_replace(self::HTML_TEMPLATE_ELEMENT_PATTERN, '', $html);
if (!\is_string($result)) {
throw new \RuntimeException('Internal PCRE error', 1616519652);
}
return $result;
}
/**
* Makes sure that any self-closing tags not recognized as such by PHP's DOMDocument implementation have a
* self-closing slash.
*
* @param string $html
*
* @return string HTML with problematic tags converted.
*/
private function ensurePhpUnrecognizedSelfClosingTagsAreXml(string $html): string
{
return \preg_replace(
'%<' . self::PHP_UNRECOGNIZED_VOID_TAGNAME_MATCHER . '\\b[^>]*+(?<!/)(?=>)%',
'$0/',
$html
);
}
/**
* Checks that $this->domDocument has a BODY element and adds it if it is missing.
*
* @throws \UnexpectedValueException
*/
private function ensureExistenceOfBodyElement(): void
{
if ($this->getDomDocument()->getElementsByTagName('body')->item(0) instanceof \DOMElement) {
return;
}
$htmlElement = $this->getDomDocument()->getElementsByTagName('html')->item(0);
if (!$htmlElement instanceof \DOMElement) {
throw new \UnexpectedValueException('There is no HTML element although there should be one.', 1569930853);
}
$htmlElement->appendChild($this->getDomDocument()->createElement('body'));
}
}
emogrifier/src/HtmlProcessor/CssToAttributeConverter.php 0000644 00000024101 15154122651 0017607 0 ustar 00 <?php
declare(strict_types=1);
namespace Pelago\Emogrifier\HtmlProcessor;
/**
* This HtmlProcessor can convert style HTML attributes to the corresponding other visual HTML attributes,
* e.g. it converts style="width: 100px" to width="100".
*
* It will only add attributes, but leaves the style attribute untouched.
*
* To trigger the conversion, call the convertCssToVisualAttributes method.
*/
class CssToAttributeConverter extends AbstractHtmlProcessor
{
/**
* This multi-level array contains simple mappings of CSS properties to
* HTML attributes. If a mapping only applies to certain HTML nodes or
* only for certain values, the mapping is an object with a whitelist
* of nodes and values.
*
* @var array<string, array{attribute: string, nodes?: array<int, string>, values?: array<int, string>}>
*/
private $cssToHtmlMap = [
'background-color' => [
'attribute' => 'bgcolor',
],
'text-align' => [
'attribute' => 'align',
'nodes' => ['p', 'div', 'td', 'th'],
'values' => ['left', 'right', 'center', 'justify'],
],
'float' => [
'attribute' => 'align',
'nodes' => ['table', 'img'],
'values' => ['left', 'right'],
],
'border-spacing' => [
'attribute' => 'cellspacing',
'nodes' => ['table'],
],
];
/**
* @var array<string, array<string, string>>
*/
private static $parsedCssCache = [];
/**
* Maps the CSS from the style nodes to visual HTML attributes.
*
* @return self fluent interface
*/
public function convertCssToVisualAttributes(): self
{
/** @var \DOMElement $node */
foreach ($this->getAllNodesWithStyleAttribute() as $node) {
$inlineStyleDeclarations = $this->parseCssDeclarationsBlock($node->getAttribute('style'));
$this->mapCssToHtmlAttributes($inlineStyleDeclarations, $node);
}
return $this;
}
/**
* Returns a list with all DOM nodes that have a style attribute.
*
* @return \DOMNodeList
*/
private function getAllNodesWithStyleAttribute(): \DOMNodeList
{
return $this->getXPath()->query('//*[@style]');
}
/**
* Parses a CSS declaration block into property name/value pairs.
*
* Example:
*
* The declaration block
*
* "color: #000; font-weight: bold;"
*
* will be parsed into the following array:
*
* "color" => "#000"
* "font-weight" => "bold"
*
* @param string $cssDeclarationsBlock the CSS declarations block without the curly braces, may be empty
*
* @return array<string, string>
* the CSS declarations with the property names as array keys and the property values as array values
*/
private function parseCssDeclarationsBlock(string $cssDeclarationsBlock): array
{
if (isset(self::$parsedCssCache[$cssDeclarationsBlock])) {
return self::$parsedCssCache[$cssDeclarationsBlock];
}
$properties = [];
foreach (\preg_split('/;(?!base64|charset)/', $cssDeclarationsBlock) as $declaration) {
/** @var array<int, string> $matches */
$matches = [];
if (!\preg_match('/^([A-Za-z\\-]+)\\s*:\\s*(.+)$/s', \trim($declaration), $matches)) {
continue;
}
$propertyName = \strtolower($matches[1]);
$propertyValue = $matches[2];
$properties[$propertyName] = $propertyValue;
}
self::$parsedCssCache[$cssDeclarationsBlock] = $properties;
return $properties;
}
/**
* Applies $styles to $node.
*
* This method maps CSS styles to HTML attributes and adds those to the
* node.
*
* @param array<string, string> $styles the new CSS styles taken from the global styles to be applied to this node
* @param \DOMElement $node node to apply styles to
*/
private function mapCssToHtmlAttributes(array $styles, \DOMElement $node): void
{
foreach ($styles as $property => $value) {
// Strip !important indicator
$value = \trim(\str_replace('!important', '', $value));
$this->mapCssToHtmlAttribute($property, $value, $node);
}
}
/**
* Tries to apply the CSS style to $node as an attribute.
*
* This method maps a CSS rule to HTML attributes and adds those to the node.
*
* @param string $property the name of the CSS property to map
* @param string $value the value of the style rule to map
* @param \DOMElement $node node to apply styles to
*/
private function mapCssToHtmlAttribute(string $property, string $value, \DOMElement $node): void
{
if (!$this->mapSimpleCssProperty($property, $value, $node)) {
$this->mapComplexCssProperty($property, $value, $node);
}
}
/**
* Looks up the CSS property in the mapping table and maps it if it matches the conditions.
*
* @param string $property the name of the CSS property to map
* @param string $value the value of the style rule to map
* @param \DOMElement $node node to apply styles to
*
* @return bool true if the property can be mapped using the simple mapping table
*/
private function mapSimpleCssProperty(string $property, string $value, \DOMElement $node): bool
{
if (!isset($this->cssToHtmlMap[$property])) {
return false;
}
$mapping = $this->cssToHtmlMap[$property];
$nodesMatch = !isset($mapping['nodes']) || \in_array($node->nodeName, $mapping['nodes'], true);
$valuesMatch = !isset($mapping['values']) || \in_array($value, $mapping['values'], true);
$canBeMapped = $nodesMatch && $valuesMatch;
if ($canBeMapped) {
$node->setAttribute($mapping['attribute'], $value);
}
return $canBeMapped;
}
/**
* Maps CSS properties that need special transformation to an HTML attribute.
*
* @param string $property the name of the CSS property to map
* @param string $value the value of the style rule to map
* @param \DOMElement $node node to apply styles to
*/
private function mapComplexCssProperty(string $property, string $value, \DOMElement $node): void
{
switch ($property) {
case 'background':
$this->mapBackgroundProperty($node, $value);
break;
case 'width':
// intentional fall-through
case 'height':
$this->mapWidthOrHeightProperty($node, $value, $property);
break;
case 'margin':
$this->mapMarginProperty($node, $value);
break;
case 'border':
$this->mapBorderProperty($node, $value);
break;
default:
}
}
/**
* @param \DOMElement $node node to apply styles to
* @param string $value the value of the style rule to map
*/
private function mapBackgroundProperty(\DOMElement $node, string $value): void
{
// parse out the color, if any
/** @var array<int, string> $styles */
$styles = \explode(' ', $value, 2);
$first = $styles[0];
if (\is_numeric($first[0]) || \strncmp($first, 'url', 3) === 0) {
return;
}
// as this is not a position or image, assume it's a color
$node->setAttribute('bgcolor', $first);
}
/**
* @param \DOMElement $node node to apply styles to
* @param string $value the value of the style rule to map
* @param string $property the name of the CSS property to map
*/
private function mapWidthOrHeightProperty(\DOMElement $node, string $value, string $property): void
{
// only parse values in px and %, but not values like "auto"
if (!\preg_match('/^(\\d+)(\\.(\\d+))?(px|%)$/', $value)) {
return;
}
$number = \preg_replace('/[^0-9.%]/', '', $value);
$node->setAttribute($property, $number);
}
/**
* @param \DOMElement $node node to apply styles to
* @param string $value the value of the style rule to map
*/
private function mapMarginProperty(\DOMElement $node, string $value): void
{
if (!$this->isTableOrImageNode($node)) {
return;
}
$margins = $this->parseCssShorthandValue($value);
if ($margins['left'] === 'auto' && $margins['right'] === 'auto') {
$node->setAttribute('align', 'center');
}
}
/**
* @param \DOMElement $node node to apply styles to
* @param string $value the value of the style rule to map
*/
private function mapBorderProperty(\DOMElement $node, string $value): void
{
if (!$this->isTableOrImageNode($node)) {
return;
}
if ($value === 'none' || $value === '0') {
$node->setAttribute('border', '0');
}
}
/**
* @param \DOMElement $node
*
* @return bool
*/
private function isTableOrImageNode(\DOMElement $node): bool
{
return $node->nodeName === 'table' || $node->nodeName === 'img';
}
/**
* Parses a shorthand CSS value and splits it into individual values. For example: `padding: 0 auto;` - `0 auto` is
* split into top: 0, left: auto, bottom: 0, right: auto.
*
* @param string $value a CSS property value with 1, 2, 3 or 4 sizes
*
* @return array<string, string>
* an array of values for top, right, bottom and left (using these as associative array keys)
*/
private function parseCssShorthandValue(string $value): array
{
/** @var array<int, string> $values */
$values = \preg_split('/\\s+/', $value);
$css = [];
$css['top'] = $values[0];
$css['right'] = (\count($values) > 1) ? $values[1] : $css['top'];
$css['bottom'] = (\count($values) > 2) ? $values[2] : $css['top'];
$css['left'] = (\count($values) > 3) ? $values[3] : $css['right'];
return $css;
}
}
emogrifier/src/HtmlProcessor/HtmlNormalizer.php 0000644 00000000502 15154122651 0015746 0 ustar 00 <?php
declare(strict_types=1);
namespace Pelago\Emogrifier\HtmlProcessor;
/**
* Normalizes HTML:
* - add a document type (HTML5) if missing
* - disentangle incorrectly nested tags
* - add HEAD and BODY elements (if they are missing)
* - reformat the HTML
*/
class HtmlNormalizer extends AbstractHtmlProcessor
{
}
emogrifier/src/HtmlProcessor/HtmlPruner.php 0000644 00000012010 15154122651 0015074 0 ustar 00 <?php
declare(strict_types=1);
namespace Pelago\Emogrifier\HtmlProcessor;
use Pelago\Emogrifier\CssInliner;
use Pelago\Emogrifier\Utilities\ArrayIntersector;
/**
* This class can remove things from HTML.
*/
class HtmlPruner extends AbstractHtmlProcessor
{
/**
* We need to look for display:none, but we need to do a case-insensitive search. Since DOMDocument only
* supports XPath 1.0, lower-case() isn't available to us. We've thus far only set attributes to lowercase,
* not attribute values. Consequently, we need to translate() the letters that would be in 'NONE' ("NOE")
* to lowercase.
*
* @var string
*/
private const DISPLAY_NONE_MATCHER
= '//*[@style and contains(translate(translate(@style," ",""),"NOE","noe"),"display:none")'
. ' and not(@class and contains(concat(" ", normalize-space(@class), " "), " -emogrifier-keep "))]';
/**
* Removes elements that have a "display: none;" style.
*
* @return self fluent interface
*/
public function removeElementsWithDisplayNone(): self
{
$elementsWithStyleDisplayNone = $this->getXPath()->query(self::DISPLAY_NONE_MATCHER);
if ($elementsWithStyleDisplayNone->length === 0) {
return $this;
}
foreach ($elementsWithStyleDisplayNone as $element) {
$parentNode = $element->parentNode;
if ($parentNode !== null) {
$parentNode->removeChild($element);
}
}
return $this;
}
/**
* Removes classes that are no longer required (e.g. because there are no longer any CSS rules that reference them)
* from `class` attributes.
*
* Note that this does not inspect the CSS, but expects to be provided with a list of classes that are still in use.
*
* This method also has the (presumably beneficial) side-effect of minifying (removing superfluous whitespace from)
* `class` attributes.
*
* @param array<array-key, string> $classesToKeep names of classes that should not be removed
*
* @return self fluent interface
*/
public function removeRedundantClasses(array $classesToKeep = []): self
{
$elementsWithClassAttribute = $this->getXPath()->query('//*[@class]');
if ($classesToKeep !== []) {
$this->removeClassesFromElements($elementsWithClassAttribute, $classesToKeep);
} else {
// Avoid unnecessary processing if there are no classes to keep.
$this->removeClassAttributeFromElements($elementsWithClassAttribute);
}
return $this;
}
/**
* Removes classes from the `class` attribute of each element in `$elements`, except any in `$classesToKeep`,
* removing the `class` attribute itself if the resultant list is empty.
*
* @param \DOMNodeList $elements
* @param array<array-key, string> $classesToKeep
*/
private function removeClassesFromElements(\DOMNodeList $elements, array $classesToKeep): void
{
$classesToKeepIntersector = new ArrayIntersector($classesToKeep);
/** @var \DOMElement $element */
foreach ($elements as $element) {
$elementClasses = \preg_split('/\\s++/', \trim($element->getAttribute('class')));
$elementClassesToKeep = $classesToKeepIntersector->intersectWith($elementClasses);
if ($elementClassesToKeep !== []) {
$element->setAttribute('class', \implode(' ', $elementClassesToKeep));
} else {
$element->removeAttribute('class');
}
}
}
/**
* Removes the `class` attribute from each element in `$elements`.
*
* @param \DOMNodeList $elements
*/
private function removeClassAttributeFromElements(\DOMNodeList $elements): void
{
/** @var \DOMElement $element */
foreach ($elements as $element) {
$element->removeAttribute('class');
}
}
/**
* After CSS has been inlined, there will likely be some classes in `class` attributes that are no longer referenced
* by any remaining (uninlinable) CSS. This method removes such classes.
*
* Note that it does not inspect the remaining CSS, but uses information readily available from the `CssInliner`
* instance about the CSS rules that could not be inlined.
*
* @param CssInliner $cssInliner object instance that performed the CSS inlining
*
* @return self fluent interface
*
* @throws \BadMethodCallException if `inlineCss` has not first been called on `$cssInliner`
*/
public function removeRedundantClassesAfterCssInlined(CssInliner $cssInliner): self
{
$classesToKeepAsKeys = [];
foreach ($cssInliner->getMatchingUninlinableSelectors() as $selector) {
\preg_match_all('/\\.(-?+[_a-zA-Z][\\w\\-]*+)/', $selector, $matches);
$classesToKeepAsKeys += \array_fill_keys($matches[1], true);
}
$this->removeRedundantClasses(\array_keys($classesToKeepAsKeys));
return $this;
}
}
emogrifier/src/Utilities/ArrayIntersector.php 0000644 00000003560 15154122651 0015455 0 ustar 00 <?php
declare(strict_types=1);
namespace Pelago\Emogrifier\Utilities;
/**
* When computing many array intersections using the same array, it is more efficient to use `array_flip()` first and
* then `array_intersect_key()`, than `array_intersect()`. See the discussion at
* {@link https://stackoverflow.com/questions/6329211/php-array-intersect-efficiency Stack Overflow} for more
* information.
*
* Of course, this is only possible if the arrays contain integer or string values, and either don't contain duplicates,
* or that fact that duplicates will be removed does not matter.
*
* This class takes care of the detail.
*
* @internal
*/
class ArrayIntersector
{
/**
* the array with which the object was constructed, with all its keys exchanged with their associated values
*
* @var array<array-key, array-key>
*/
private $invertedArray;
/**
* Constructs the object with the array that will be reused for many intersection computations.
*
* @param array<array-key, array-key> $array
*/
public function __construct(array $array)
{
$this->invertedArray = \array_flip($array);
}
/**
* Computes the intersection of `$array` and the array with which this object was constructed.
*
* @param array<array-key, array-key> $array
*
* @return array<array-key, array-key>
* Returns an array containing all of the values in `$array` whose values exist in the array
* with which this object was constructed. Note that keys are preserved, order is maintained, but
* duplicates are removed.
*/
public function intersectWith(array $array): array
{
$invertedArray = \array_flip($array);
$invertedIntersection = \array_intersect_key($invertedArray, $this->invertedArray);
return \array_flip($invertedIntersection);
}
}
emogrifier/src/Utilities/CssConcatenator.php 0000644 00000015116 15154122651 0015246 0 ustar 00 <?php
declare(strict_types=1);
namespace Pelago\Emogrifier\Utilities;
/**
* Facilitates building a CSS string by appending rule blocks one at a time, checking whether the media query,
* selectors, or declarations block are the same as those from the preceding block and combining blocks in such cases.
*
* Example:
* $concatenator = new CssConcatenator();
* $concatenator->append(['body'], 'color: blue;');
* $concatenator->append(['body'], 'font-size: 16px;');
* $concatenator->append(['p'], 'margin: 1em 0;');
* $concatenator->append(['ul', 'ol'], 'margin: 1em 0;');
* $concatenator->append(['body'], 'font-size: 14px;', '@media screen and (max-width: 400px)');
* $concatenator->append(['ul', 'ol'], 'margin: 0.75em 0;', '@media screen and (max-width: 400px)');
* $css = $concatenator->getCss();
*
* `$css` (if unminified) would contain the following CSS:
* ` body {
* ` color: blue;
* ` font-size: 16px;
* ` }
* ` p, ul, ol {
* ` margin: 1em 0;
* ` }
* ` @media screen and (max-width: 400px) {
* ` body {
* ` font-size: 14px;
* ` }
* ` ul, ol {
* ` margin: 0.75em 0;
* ` }
* ` }
*
* @internal
*/
class CssConcatenator
{
/**
* Array of media rules in order. Each element is an object with the following properties:
* - string `media` - The media query string, e.g. "@media screen and (max-width:639px)", or an empty string for
* rules not within a media query block;
* - object[] `ruleBlocks` - Array of rule blocks in order, where each element is an object with the following
* properties:
* - mixed[] `selectorsAsKeys` - Array whose keys are selectors for the rule block (values are of no
* significance);
* - string `declarationsBlock` - The property declarations, e.g. "margin-top: 0.5em; padding: 0".
*
* @var array<int, object{
* media: string,
* ruleBlocks: array<int, object{
* selectorsAsKeys: array<string, array-key>,
* declarationsBlock: string
* }>
* }>
*/
private $mediaRules = [];
/**
* Appends a declaration block to the CSS.
*
* @param array<array-key, string> $selectors
* array of selectors for the rule, e.g. ["ul", "ol", "p:first-child"]
* @param string $declarationsBlock
* the property declarations, e.g. "margin-top: 0.5em; padding: 0"
* @param string $media
* the media query for the rule, e.g. "@media screen and (max-width:639px)", or an empty string if none
*/
public function append(array $selectors, string $declarationsBlock, string $media = ''): void
{
$selectorsAsKeys = \array_flip($selectors);
$mediaRule = $this->getOrCreateMediaRuleToAppendTo($media);
$ruleBlocks = $mediaRule->ruleBlocks;
$lastRuleBlock = \end($ruleBlocks);
$hasSameDeclarationsAsLastRule = \is_object($lastRuleBlock)
&& $declarationsBlock === $lastRuleBlock->declarationsBlock;
if ($hasSameDeclarationsAsLastRule) {
$lastRuleBlock->selectorsAsKeys += $selectorsAsKeys;
} else {
$lastRuleBlockSelectors = \is_object($lastRuleBlock) ? $lastRuleBlock->selectorsAsKeys : [];
$hasSameSelectorsAsLastRule = \is_object($lastRuleBlock)
&& self::hasEquivalentSelectors($selectorsAsKeys, $lastRuleBlockSelectors);
if ($hasSameSelectorsAsLastRule) {
$lastDeclarationsBlockWithoutSemicolon = \rtrim(\rtrim($lastRuleBlock->declarationsBlock), ';');
$lastRuleBlock->declarationsBlock = $lastDeclarationsBlockWithoutSemicolon . ';' . $declarationsBlock;
} else {
$mediaRule->ruleBlocks[] = (object)\compact('selectorsAsKeys', 'declarationsBlock');
}
}
}
/**
* @return string
*/
public function getCss(): string
{
return \implode('', \array_map([self::class, 'getMediaRuleCss'], $this->mediaRules));
}
/**
* @param string $media The media query for rules to be appended, e.g. "@media screen and (max-width:639px)",
* or an empty string if none.
*
* @return object{
* media: string,
* ruleBlocks: array<int, object{
* selectorsAsKeys: array<string, array-key>,
* declarationsBlock: string
* }>
* }
*/
private function getOrCreateMediaRuleToAppendTo(string $media): object
{
$lastMediaRule = \end($this->mediaRules);
if (\is_object($lastMediaRule) && $media === $lastMediaRule->media) {
return $lastMediaRule;
}
$newMediaRule = (object)[
'media' => $media,
'ruleBlocks' => [],
];
$this->mediaRules[] = $newMediaRule;
return $newMediaRule;
}
/**
* Tests if two sets of selectors are equivalent (i.e. the same selectors, possibly in a different order).
*
* @param array<string, array-key> $selectorsAsKeys1
* array in which the selectors are the keys, and the values are of no significance
* @param array<string, array-key> $selectorsAsKeys2 another such array
*
* @return bool
*/
private static function hasEquivalentSelectors(array $selectorsAsKeys1, array $selectorsAsKeys2): bool
{
return \count($selectorsAsKeys1) === \count($selectorsAsKeys2)
&& \count($selectorsAsKeys1) === \count($selectorsAsKeys1 + $selectorsAsKeys2);
}
/**
* @param object{
* media: string,
* ruleBlocks: array<int, object{
* selectorsAsKeys: array<string, array-key>,
* declarationsBlock: string
* }>
* } $mediaRule
*
* @return string CSS for the media rule.
*/
private static function getMediaRuleCss(object $mediaRule): string
{
$ruleBlocks = $mediaRule->ruleBlocks;
$css = \implode('', \array_map([self::class, 'getRuleBlockCss'], $ruleBlocks));
$media = $mediaRule->media;
if ($media !== '') {
$css = $media . '{' . $css . '}';
}
return $css;
}
/**
* @param object{selectorsAsKeys: array<string, array-key>, declarationsBlock: string} $ruleBlock
*
* @return string CSS for the rule block.
*/
private static function getRuleBlockCss(object $ruleBlock): string
{
$selectorsAsKeys = $ruleBlock->selectorsAsKeys;
$selectors = \array_keys($selectorsAsKeys);
$declarationsBlock = $ruleBlock->declarationsBlock;
return \implode(',', $selectors) . '{' . $declarationsBlock . '}';
}
}