Operators = array( 'clean_rewrite_xhtml', 'bookmarkize', 'entity_decode' ); } function operatorList() { return $this->Operators; } function namedParameterPerOperator() { return true; } function namedParameterList() { return array( 'clean_rewrite_xhtml' => array( 'url_site' => array( 'type' => 'string', 'required' => true, 'default' => '' ) ), 'entity_decode' => array(), 'bookmarkize' => array( 'post_url' => array( 'type' => 'string', 'required' => true, 'default' => '' ), 'post_name' => array( 'type' => 'string', 'required' => false, 'default' => false ) ) ); } function modify( $tpl, $operatorName, $operatorParameters, $rootNamespace, $currentNamespace, &$operatorValue, $namedParameters ) { if ( $operatorName == 'clean_rewrite_xhtml' ) { $html = $operatorValue; eZDebug::accumulatorStart( 'planete', 'Planete', 'Clean rewrite operator' ); $operatorValue = self::cleanRewriteXHTML( $html, $namedParameters['url_site'] ); eZDebug::accumulatorStop( 'planete' ); } elseif ( $operatorName == 'entity_decode' ) { $html = $operatorValue; $ini = eZINI::instance( 'template.ini' ); $operatorValue = html_entity_decode( $html, ENT_QUOTES, $ini->variable( 'CharsetSettings', 'DefaultTemplateCharset' ) ); } elseif ( $operatorName == 'bookmarkize' ) { $url = $operatorValue; $postName = $namedParameters['post_url']; $postURL = $namedParameters['post_url']; if ( isset( $namedParameters['post_name'] ) && $namedParameters['post_name'] ) { $postName = $namedParameters['post_name']; } $operatorValue = self::bookmarkize( $url, $postURL, $postName ); } } static function bookmarkize( $url, $postURL, $postName ) { $url = str_replace( '%url', $postURL, $url ); return str_replace( '%title', $postName, $url ); } static function cleanRewriteXHTML( $html, $urlSite ) { $html = trim( $html ); if ( $html === '' ) { return ''; } // cleanup using tidy $tidy = new Tidy(); $config = array( 'indent' => false, 'show-body-only' => true, 'alt-text' => '', 'wrap' => 0, 'numeric-entities' => true, 'output-xhtml' => true ); $tidy->parseString( $html, $config, 'utf8' ); $tidy->cleanRepair(); $res = (string) $tidy; // manual cleanup $xml = '
' . $res . '
'; $dom = new DomDocument(); $parsing = $dom->loadXML( $xml ); eZDebug::writeDebug( $xml ); if ( $parsing ) { $xpath = new DomXPath( $dom ); // avoid XSS attacks self::cleanScript( $xpath ); // remove unnecessary tags self::cleanTags( $xpath ); // rewriting malformed URIs self::rewriteURI( $xpath, $urlSite ); $res = str_replace( '', '', $dom->saveXML() ); return $res; } else { eZDebug::writeError( $xml, 'Failed to parse XML in ' . __METHOD__ ); } return $xml; } static function rewriteURI( $xpath, $urlSite ) { $attributeNodes = $xpath->query( '//@*[( local-name() = "href" or local-name() = "src" ) and not( starts-with( ., "http" ) )]' ); $urlInfo = parse_url( $urlSite ); foreach( $attributeNodes as $attribute ) { if ( $attribute->value[0] == '/' ) { $attribute->value = $urlInfo['scheme'] . '://' . $urlInfo['host'] . $attribute->value; } elseif ( strpos( $attribute->value, ':' ) === false ) { $attribute->value = $urlSite . $attribute->value; } } } static function cleanTags( $xpath ) { // remove
at the beginning $root = $xpath->document->documentElement; foreach( $root->childNodes as $child ) { if ( $child->localName == 'br' ) { $root->removeChild( $child ); } else { break ; } } // get rid of used as anchor $anchorNodes = $xpath->query( '//a[not( @href )]' ); foreach( $anchorNodes as $anchor ) { $parent = $anchor->parentNode; $parent->removeChild( $anchor ); } // get rid of target attribute on link $targetAttributes = $xpath->query( '//@*[local-name() = "target" and local-name( .. ) = "a"]' ); foreach( $targetAttributes as $attr ) { $aNode = $attr->parentNode; $aNode->removeAttributeNode( $attr ); } // get rid of valign and align attributes $targetAttributes = $xpath->query( '//@*[local-name() = "align" or local-name() = "valign"]' ); foreach( $targetAttributes as $attr ) { $aNode = $attr->parentNode; $aNode->removeAttributeNode( $attr ); } // remove developper.com stuffs $divNodes = $xpath->query( '//div[contains( @style, "font-size" ) and contains( a/@href, "http://blog.developpez.com/" )]' ); if ( $divNodes && $divNodes->length === 1 ) { $divNode = $divNodes->item( 0 ); $contentNode = $divNode->parentNode; $contentNode->removeChild( $divNode ); } // remove tweetmeme widget $divNodes = $xpath->query( '//div[@class="tweetmeme_button"]' ); if ( $divNodes && $divNodes->length > 0 ) { foreach( $divNodes as $div ) { $parent = $div->parentNode; $parent->removeChild( $div ); } } } static function cleanScript( $xpath ) { $scriptNodes = $xpath->query( '//script' ); foreach( $scriptNodes as $script ) { $parent = $script->parentNode; $parent->removeChild( $script ); } $attributeNodes = $xpath->query( '//@*[starts-with( local-name(), "on" )]' ); foreach( $attributeNodes as $attr ) { $parent = $attr->parentNode; $parent->removeAttributeNode( $attr ); } } static function rssCacheInfo() { $ini = eZINI::instance(); $varDir = $ini->variable( 'FileSettings', 'VarDir' ); $cacheDir = $varDir . '/' . $ini->variable( 'FileSettings', 'CacheDir' ) . '/rss/'; return array( 'cache-dir' => $cacheDir, 'cache-file' => 'planet.php' ); } } ?>