forked from soyrex/PHP-Microdata
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMicrodataParser.php
More file actions
134 lines (124 loc) · 2.72 KB
/
MicrodataParser.php
File metadata and controls
134 lines (124 loc) · 2.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
<?
/**
* Microdata library to read microdata from websites and provide it back
* as structured data. Currently supports:
*
* - PHP Array
* - JSON
* - XML
*
* @package Microdata
* @author Alex Holt <alex@outsideinmedia.co.uk>
* @copyright 2011
*/
/**
* Class: MicrodataParser
* Implements a simple parser to load the html for a URL and read out a
* data structure from the microdata items defined on the page.
*
* @package Microdata
*/
class MicrodataParser
{
private $dataArray=false;
public function __construct($url)
{
// Require the simple_html_dom.php library, this handles reading and
// accessing hte dom of the destination document
require_once('simplehtmldom/simple_html_dom.php');
$html = file_get_contents($url); // get hte HTML from the url:
$html = preg_replace('|\n|',' ',$html); // remove newlines
$html = str_get_html($html); // get a dom from the html.
// parse out the microdata into our internal private dataArray:
$this->dataArray = $this->getItems($html->find('body',0));
}
/**
* getArray()
* Returns the parsed document's microdata tree as a PHP array.
*
* @return array $microdataArray
*/
public function getArray()
{
return($this->dataArray);
}
/**
* getJson()
* Returns the parsed document's microdata tree as a JSON string.
*
* @return string $microdataJson
*/
public function getJson()
{
return(json_encode($this->dataArray));
}
/**
* prettyType()
* Returns Microdata type as a simple string - nicer for matching.
*
* @return string $microdataType
*/
private function prettyType($type)
{
return strtolower(preg_replace('|.*/|','',$type));
}
/**
* getValue()
* Returns an element's value.
*
* @return string value
*/
private function getValue($elem)
{
switch($elem->tag)
{
case 'img':
return($elem->src);
break;
case 'meta':
return($elem->content);
break;
case 'a':
return($elem->href);
break;
}
return(strip_tags($elem->innertext));
}
/**
* getItems()
* Recurse through the tree and find microdata.. the guts.
*
* @return array $microdataItems
*/
private function getItems($elem)
{
$notitem = false;
$item = array();
if($elem->itemscope)
{
$item['_type'] = $elem->itemtype;
$item['_class'] = $this->prettyType($elem->itemtype);
}
else
{
$notitem = true;
}
foreach($elem->children() as $child)
{
if($child->itemprop && !$child->itemscope)
{
$item[$child->itemprop] = $this->getValue($child);
}
else
{
$car = $this->getItems($child);
if(count($car) > 0)
$item = array_merge($item,$car);
}
}
if(count($item) > 1 && !$notitem)
$item = array($elem->itemprop=>$item);
return($item);
}
}
?>