File Comic/Bysize.php

  1: <?php
  2: /**
  3:  * Klutz_Comic_Bysize Class.
  4:  *
  5:  * This class uses follows the search methodology until it hits the
  6:  * final page in the list.  On this page it gets a list of all images
  7:  * and tries to figure out which image is most likely to be the comic
  8:  * based on image sizes.  This is the LEAST efficient driver and
  9:  * you're discouraged from using it when not absolutely necessary.
 10:  *
 11:  * @author  Marcus I. Ryan <marcus@riboflavin.net>
 12:  * @package Klutz
 13:  */
 14: class Klutz_Comic_Bysize extends Klutz_Comic
 15: {
 16:     /**
 17:      * Once set, an array of preg searches to perform to find the comic image
 18:      *
 19:      * @var array
 20:      */
 21:     var $search = null;
 22: 
 23:     /**
 24:      * A list of images to ignore (preg matches)
 25:      *
 26:      * @var array
 27:      */
 28:     var $ignore = array();
 29: 
 30:     /**
 31:      * What is the smallest height to consider
 32:      *
 33:      * @var integer
 34:      */
 35:     var $minheight = 0;
 36: 
 37:     /**
 38:      * What are the smallest width to consider
 39:      *
 40:      * @var integer
 41:      */
 42:     var $minwidth = 0;
 43: 
 44:     /**
 45:      * What is the largest height to consider
 46:      *
 47:      * @var integer
 48:      */
 49:     var $maxheight = 65536;
 50: 
 51:     /**
 52:      * What is the largest width to consider
 53:      *
 54:      * @var integer
 55:      */
 56:     var $maxwidth = 65536;
 57: 
 58:     /**
 59:      * How should we decide which image to take? Options are "first",
 60:      * "biggest", and "smallest".
 61:      *
 62:      * @param string
 63:      */
 64:     var $choose = 'biggest';
 65: 
 66:     /**
 67:      * Constructor - Create an object that can be used to retrieve a comic
 68:      * by looking at all images on a page, a list of images to ignore, and
 69:      * a range of dimensions, then choose which image is most likely the
 70:      * comic.
 71:      *
 72:      * @param string $comic                 Index for the comic
 73:      */
 74:     function Klutz_Comic_bysize($comic)
 75:     {
 76:         // call the parent constructor...this should leave $comic with just
 77:         // the parameters we need for fetching (if any are left)
 78: 
 79:         $par = get_parent_class($this);
 80:         $this->$par($comic);
 81: 
 82:         if (is_null($this->subs)) {
 83:             $this->subs = array('url');
 84:         }
 85: 
 86:         // Check to see if we have one search pattern
 87:         if (empty($comic['search'])) {
 88:             $this->search = array();
 89:         } elseif (is_array($comic['search']) && count($comic['search']) > 0) {
 90:             $this->search = $comic['search'];
 91:             unset($comic['search']);
 92:         } elseif (is_string($comic['search']) && !empty($comic['search'])) {
 93:             $this->search = array($comic['search']);
 94:             unset($comic['search']);
 95:         } else {
 96:             $this->search = array();
 97:         }
 98: 
 99:         $this->search = $this->_prepareSearch($this->search);
100: 
101:         // Check to see if we have any ignores
102:         if (isset($comic['ignore'])) {
103:             if (is_array($comic['ignore']) && count($comic['ignore']) > 0) {
104:                 $this->ignore = $comic['ignore'];
105:             } elseif (is_string($comic['ignore']) && !empty($comic['ignore'])) {
106:                 $this->ignore = array($comic['ignore']);
107:             }
108:             unset($comic['ignore']);
109:         }
110: 
111:         foreach (array('minheight', 'maxheight', 'minwidth', 'maxwidth') as $f) {
112:             if (!empty($comic[$f])) {
113:                 $this->$f = $comic[$f];
114:             }
115:             unset($comic[$f]);
116:         }
117: 
118:         if (isset($comic['choose'])) {
119:             $this->choose = $comic['choose'];
120:             unset($comic[$f]);
121:         }
122:     }
123: 
124:     /**
125:      * Do all that is necessary to get the final URL from which the comic
126:      * will be fetched.  Instead of returning the comic, return the URL
127:      * pointing to that comic.
128:      *
129:      * @param timestamp $date  Date of the comic to retrieve (default today)
130:      *
131:      * @return string  URL of the comic image
132:      */
133:     function fetchURL($date = null)
134:     {
135:         if (is_null($date)) {
136:             $date = mktime(0, 0, 0);
137:         }
138:         $offset = $this->getOverride('offset', $date);
139:         $d = getdate($date);
140:         $date = mktime(0, 0, 0, $d['mon'], $d['mday'] - $offset, $d['year']);
141: 
142:         $url = $this->getOverride('url', $date);
143:         if (in_array('url', $this->getOverride('subs', $date))) {
144:             $url = $this->substitute($url, $date);
145:         }
146: 
147:         // make sure $this->http is set up properly
148:         $this->_initHTTP($date, $url);
149: 
150:         // loop through the array of searches to get a final URL
151:         foreach ($this->getOverride('search', $date,
152:                                     array($this, '_prepareSearch')) as $search) {
153:             if (in_array('search', $this->subs)) {
154:                 $search = $this->substitute($search, $date);
155:             }
156: 
157:             $this->http->setURL($url);
158:             $this->http->sendRequest();
159:             if (is_array($search)) {
160:                 $text = $this->http->getResponseBody();
161:                 foreach ($search as $s) {
162:                     $num_matches = preg_match($s, $text, $matches);
163:                     if (isset($matches[1])) {
164:                         $text = $matches[1];
165:                     } elseif ($num_matches > 0) {
166:                         $text = $matches[0];
167:                     } else {
168:                         break;
169:                     }
170:                 }
171:             } else {
172:                 preg_match($search, $this->http->getResponseBody(), $matches);
173:             }
174: 
175:             if (empty($matches[1]) && $this->days != 'random') {
176:                 $msg = "URL: $url" .
177:                     "\nSEARCH: " . print_r($search, true) .
178:                     "\nHTML: " . $this->http->getResponseBody();
179:                 Horde::logMessage($msg, __FILE__, __LINE__, PEAR_LOG_DEBUG);
180:                 return false;
181:             }
182: 
183:             if (strstr($matches[1], '://')) {
184:                 $url = $matches[1];
185:             } elseif (substr($matches[1],0,1) == '/') {
186:                 $url = preg_replace("|^(http://.*?)/.*$|", '\\1', $url);
187:                 $url .= $matches[1];
188:             } else {
189:                 $url = preg_replace("|^(http://[^?]*/).*$|", '\\1', $url);
190:                 $url .= $matches[1];
191:             }
192:         }
193: 
194:         // At this point we should have a URL we need to get the list of
195:         // images from.
196:         $this->http->setURL($url);
197:         $this->http->sendRequest();
198:         $images = $this->_stripimages($this->http->getResponseBody());
199:         $images = $this->_expandurls($images, $url);
200:         $images = $this->_getsizes($images, true, $date);
201: 
202:         // make sure we actually have a list of images to work from
203:         if (count($images) == 0) { return false; }
204: 
205:         // if we have only one image it is the biggest, smalles, first...
206:         if (count($images) == 1) { return $images[0]['url']; }
207: 
208:         switch ($this->getOverride('choose',$date)) {
209:         case 'biggest':
210:             $image = false;
211:             $max = 0;
212:             foreach ($images as $i) {
213:                 $s = $i['height'] * $i['width'];
214:                 if ( $s > $max) {
215:                     $max = $s;
216:                     $image = $i['url'];
217:                 }
218:             }
219:             return $image;
220:             break;
221:         case 'smallest':
222:             $image = false;
223:             $min = 0;
224:             foreach ($images as $i) {
225:                 $s = $i['height'] * $i['width'];
226:                 if ( $s < $max) {
227:                     $min = $s;
228:                     $image = $i['url'];
229:                 }
230:             }
231:             return $image;
232:             break;
233:         case 'first':
234:         default:
235:             return $images[0]['url'];
236:             break;
237:         }
238:     }
239: 
240:     /**
241:      * Fetch the actual image
242:      *
243:      * @param timestamp $date  The date to retrieve the comic for (default
244:      *                         today).
245:      *
246:      * @return mixed  Klutz_Image on success, false otherwise.
247:      */
248:     function &fetchImage($date = null)
249:     {
250:         if (is_null($date)) {
251:             $date = mktime(0, 0, 0);
252:         }
253:         $offset = $this->getOverride('offset', $date);
254:         $d = getdate($date);
255:         $date = mktime(0, 0, 0, $d['mon'], $d['mday'] - $offset, $d['year']);
256: 
257:         $url = $this->fetchURL($date);
258:         if ($url === false) {
259:             return $url;
260:         }
261: 
262:         // Make sure $this->http is set up properly.
263:         $this->_initHTTP($date, $url);
264: 
265:         $this->http->setURL($url);
266:         $this->http->sendRequest();
267: 
268:         $image = &new Klutz_Image($this->http->getResponseBody());
269:         if (is_null($image) || is_null($image->type)) {
270:             $image = false;
271:         }
272: 
273:         return $image;
274:     }
275: 
276:     /**
277:      * Strip the list of images from the contents of a web page.
278:      * Derived from Snoopy's striplinks function.
279:      *
280:      * @param string $document  The HTML document to parse.
281:      *
282:      * @return array  List of images in the page.
283:      */
284:     function _stripimages($document)
285:     {
286:         preg_match_all("'<\s*img.*?src\s*=\s*       # find <img src=
287:                         ([\"\'])?                   # find single or double quote
288:                         (?(1) (.*?)\\1 | ([^\s\>]+))# if quote found, match up to next matching
289:                                                     # quote, otherwise match up to next space
290:                         'isx", $document, $images);
291: 
292:         // Concatenate the non-empty matches from the conditional
293:         // subpattern.
294:         $match = array();
295:         foreach ($images[2] as $val) {
296:             if (!empty($val)) {
297:                 $match[] = $val;
298:             }
299:         }
300:         foreach ($images[3] as $val) {
301:             if (!empty($val)) {
302:                 $match[] = $val;
303:             }
304:         }
305: 
306:         // Return the images.
307:         return array_filter($match, array($this, '_ignore'));
308:     }
309: 
310:     /**
311:      * Expand paths to fully-qualified URLs
312:      *
313:      * @param array $urls   Paths to expand
314:      * @param string $base  The base URL used for relative links
315:      *
316:      * @return array  Fully-qualified URLs
317:      */
318:     function _expandurls($urls, $base)
319:     {
320:         $return = array();
321:         foreach ($urls as $url) {
322:             if (strstr($url, '://')) {
323:                 // Don't do anything, but it saves some processing.
324:             } elseif (substr($url,0,1) == '/') {
325:                 $url = preg_replace("|^(http://.*?)/.*$|", '\\1', $base) . $url;
326:             } else {
327:                 $url = preg_replace("|^(http://.*/).*$|", '\\1', $base) . $url;
328:             }
329:             if (in_array(substr($url, 0, 4), array('http', 'ftp'))) {
330:                 $return[] = $url;
331:             }
332:         }
333:         return $return;
334:     }
335: 
336:     /**
337:      * Determine if the passed image name is on the list of images to
338:      * ignore.
339:      *
340:      * @param string $string  The name to check
341:      *
342:      * @return boolean  True if we should ignore it
343:      */
344:     function _ignore($string)
345:     {
346:         foreach ($this->ignore as $ignore) {
347:             if (stristr($string, $ignore) !== false) {
348:                 return false;
349:             }
350:         }
351:         return true;
352:     }
353: 
354:     /**
355:      * Get the dimensions from the list of images passed in.
356:      *
357:      * @param array $images    The list of images to check.
358:      * @param boolean $filter  Filter by size, etc? (true).
359:      * @param timestamp $date  Date to use for filter prefs.
360:      *
361:      * @return array  Dimensions for all desired images.
362:      */
363:     function _getsizes($images, $filter = true, $date = null)
364:     {
365:         $sizes = array();
366: 
367:         $minwidth = $this->getOverride('minwidth', $date);
368:         $minheight = $this->getOverride('minheight', $date);
369:         $maxwidth = $this->getOverride('maxwidth', $date);
370:         $maxheight = $this->getOverride('maxheight', $date);
371: 
372:         foreach ($images as $i) {
373:             $s = @getimagesize($i);
374:             if (!is_null($s)) {
375:                 if ($filter) {
376:                     if ($s[KLUTZ_FLD_WIDTH]< $minwidth) {
377:                         continue;
378:                     }
379:                     if ($s[KLUTZ_FLD_HEIGHT] < $minheight) {
380:                         continue;
381:                     }
382:                     if ($s[KLUTZ_FLD_WIDTH] > $maxwidth) {
383:                         continue;
384:                     }
385:                     if ($s[KLUTZ_FLD_HEIGHT] > $maxheight) {
386:                         continue;
387:                     }
388:                 }
389:                 $sizes[] = array('url' => $i,
390:                                  'height' => $s[KLUTZ_FLD_HEIGHT],
391:                                  'width'  => $s[KLUTZ_FLD_WIDTH]);
392:             }
393:         }
394: 
395:         return $sizes;
396:     }
397: 
398: }
399:
Packages

Classes