commit 5663c8bc865b744661cf82b1abfca64d73c0f2fa Author: George Dorn Date: Mon Feb 23 04:25:44 2015 -0800 Some comics supported diff --git a/init.php b/init.php new file mode 100644 index 0000000..ea14929 --- /dev/null +++ b/init.php @@ -0,0 +1,196 @@ +add_hook($host::HOOK_ARTICLE_FILTER, $this); + $host->add_hook($host::HOOK_RENDER_ARTICLE_CDM, $this); + $host->add_hook($host::HOOK_RENDER_ARTICLE, $this); + } + + function hook_render_article_cdm($article) { + return $this->mangle_article($article); + } + + function hook_render_article($article) { + return $this->mangle_article($article); + } + + function mangle_article($article) { + if (strpos($article['link'], 'eatthattoast.com/comic/') !== FALSE) { + $article['content'] = preg_replace('#(/[0-9-]+)-150x150\.gif#', '$1.gif', $article['content']); + $article['content'] = preg_replace('#(width|height)="150"#', '', $article['content']); + } + // Joy of Tech + elseif (strpos($article['link'], 'http://www.geekculture.com/joyoftech/') !== FALSE) { + $xpath = $this->get_xpath_dealie($article['link']); + $article['content'] = $this->get_img_tags($xpath, '//p[@class="Maintext"]//img[contains(@src, "joyimages")]', $article); + } + // Girls with Slingshots + elseif (strpos($article['link'], 'girlswithslingshots.com/comic/') !== FALSE) { + $xpath = $this->get_xpath_dealie($article['link']); + $article['content'] = $this->get_img_tags($xpath, "//div[@id='comicbody']//img", $article); + } + // CTRL+ALT+DEL Sillies + elseif (strpos($article['link'], 'cad-comic.com/sillies/') !== FALSE) { + $xpath = $this->get_xpath_dealie($article['link']); + $article['content'] = $this->get_img_tags($xpath, "//div[@id='content']/img", $article); + } + // Three Panel Soul + elseif (strpos($article['link'], 'threepanelsoul.com/2') !== FALSE) { + $xpath = $this->get_xpath_dealie($article['link']); + $article['content'] = $this->get_img_tags($xpath, "//div[@id='comic']/img", $article); + } + // Two Lumps + elseif (strpos($article['link'], 'twolumps.net/d/') !== FALSE) { + $xpath = $this->get_xpath_dealie($article['link']); + $article['content'] = $this->get_img_tags($xpath, "//img[@class='ksc' and contains(@src, 'comics')]", $article); + } + // Timothy Winchester (People I Know) + elseif (strpos($article['link'], 'www.timothywinchester.com/2') !== FALSE) { + $xpath = $this->get_xpath_dealie($article['link']); + $orig_content = strip_tags($article['content']); + $article['content'] = $this->get_img_tags($xpath, "//div[@class='singleImage']/img[@class='magicfields']", $article); + $article['content'] .= "
$orig_content
"; + } + // Awkward Zombie + elseif (strpos($article['link'], 'awkwardzombie.com/index.php?comic') !== FALSE) { + $xpath = $this->get_xpath_dealie($article['link']); + $orig_content = strip_tags($article['content']); + $article['content'] = $this->get_img_tags($xpath, "//div[@id='comic']/img", $article); + $article['content'] .= "

$orig_content

"; + //also append the blarg post because that's small, interesting, + //and sometimes necessary for old fogeys like me to get what game it's about + $entries = $xpath->query("//div[@id='blarg']/div[last()]"); + foreach ($entries as $entry){ + $article['content'] .= "

" . $entry->textContent . "

"; + } + } + // Scenes From A Multiverse (to get alt tags) + elseif (strpos($article['link'], 'amultiverse.com/comic/') !== FALSE) { + $xpath = $this->get_xpath_dealie($article['link']); + $article['content'] = $this->get_img_tags($xpath, "//div[@id='comic']//img", $article); + } + // XKCD (alt tags we don't need to call out for) + elseif (strpos($article['content'], 'imgs.xkcd.com/comics/') !== FALSE) { + $doc = new DOMDocument(); + $doc->loadHTML($article['content']); + $xpath = new DOMXpath($doc); + $imgs = $xpath->query('//img'); //doesn't get simpler than this + foreach($imgs as $img){ + $article['content'] .= "
Alt: " . $img->getAttribute('title') . ""; + } + } + // Questionable Content (cleanup) + elseif (strpos($article['link'], 'questionablecontent') !== FALSE) { + // only keep everything starting at the first + if(preg_match("@.*( + if(preg_match("@.*()@", $article['content'], $matches)){ + $img = $matches[1]; + $img = preg_replace("@width=\"\d+\"@", "", $img); + $img = preg_replace("@height=\"\d+\"@", "", $img); + $article['content'] = $img; + } + } + + if(ADD_IDS){ + $article['content'] .= "
ID: " . $article['id']; + } + return $article; + } + + function get_img_tags($xpath, $query, $article){ + $entries = $xpath->query($query); + $result_html = ''; + foreach ($entries as $entry){ + $orig_src = $entry->getAttribute('src'); + $new_src = $this->rel2abs($orig_src, $article['link']); + $entry->setAttribute('src', $new_src); + $result_html .= $entry->ownerDocument->saveXML($entry); + $alt_text = $entry->getAttribute('alt'); + if (!$alt_text){ + $alt_text = $entry->getAttribute('title'); + } + if ($alt_text && $alt_text != $article['title']){ + $result_html .= "
Alt: $alt_text
"; + } + } + return $result_html; + } + + + function get_xpath_dealie($link){ + list($html, $content_type) = $this->get_content($link); + $doc = new DOMDocument(); + $doc->loadHTML($html); + $xpath = new DOMXPath($doc); + return $xpath; + } + + function get_content($link) { + /** + * Use this if you want to dig into the linked page for content, e.g. alt tags. + */ + global $fetch_last_content_type; + $html = fetch_file_contents($link); + $content_type = $fetch_last_content_type; + return array( $html, $content_type); + } + + function rel2abs($rel, $base) + { + if (parse_url($rel, PHP_URL_SCHEME) != '' || substr($rel, 0, 2) == '//') { + return $rel; + } + if ($rel[0]=='#' || $rel[0]=='?') { + return $base.$rel; + } + extract(parse_url($base)); + $path = preg_replace('#/[^/]*$#', '', $path); + if ($rel[0] == '/') { + $path = ''; + } + + /* dirty absolute URL */ + $abs = "$host$path/$rel"; + + /* replace '//' or '/./' or '/foo/../' with '/' */ + $re = array('#(/\.?/)#', '#/(?!\.\.)[^/]+/\.\./#'); + for($n=1; $n>0; $abs=preg_replace($re, '/', $abs, -1, $n)) {} + + /* absolute URL is ready! */ + return $scheme.'://'.$abs; + } + + function api_version() { + return 2; + } +} +