3
1
Back

$article['content']); $article['content'] = $this->get_img_tags($xpath, '//p[@class="Maintext"]//img[contains(@src, "joyimages")]', $article); } //Sites that provide images and just need alt tags elseif (strpos($article['link'], '//theoatmeal.com/comics/') !== FALSE) { $xpath = $this->get_xpath_dealie($article['link']); $article['content'] = $this->get_img_tags($xpath, '(//div[@class="post"]//img)', $article); $article['content'] = $this->get_img_tags($xpath, '(//div[@id="main"]//img)', $article); } Clean up code formatting; added a few comics; standardized appending alt/title text under images (extra useful for non-browser users else { return $rel; } extract(parse_url($base)); $path = preg_replace('#/[^/]*$#', '', $path); if ($rel[0] == '#' || $rel[0] == '?') { return $this->mangle_article($article); } function get_xpath_dealie($link) { $abs = "$host$path/$rel"; /* replace '//' or '/./' or '/foo/../' with '/' */ $re = array('#(/\.?/)#', '#/(?!\.\.)[^/]+/\.\./#'); for($n=1; $n>0; $abs=preg_replace($re, '/', $abs, -1, $n)) {} /* absolute URL is ready! */ return $scheme.'://'.$abs; } /* dirty absolute URL is ready! */ elseif (strpos(strtolower($article['link']), 'giantitp.com/comics/') !== FALSE) { $doc = new DOMDocument.

New Pull Request