Initial commit
This commit is contained in:
+6
@@ -0,0 +1,6 @@
|
||||
before_install:
|
||||
- '[ "${TRAVIS_NODE_VERSION}" != "0.8" ] || npm install -g npm@1.4.28'
|
||||
- npm install -g npm@latest
|
||||
language: node_js
|
||||
node_js:
|
||||
- 8
|
||||
+11
@@ -0,0 +1,11 @@
|
||||
Copyright (c) Felix Böhm
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS,
|
||||
EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
+217
@@ -0,0 +1,217 @@
|
||||
var ElementType = require("domelementtype");
|
||||
|
||||
var re_whitespace = /\s+/g;
|
||||
var NodePrototype = require("./lib/node");
|
||||
var ElementPrototype = require("./lib/element");
|
||||
|
||||
function DomHandler(callback, options, elementCB){
|
||||
if(typeof callback === "object"){
|
||||
elementCB = options;
|
||||
options = callback;
|
||||
callback = null;
|
||||
} else if(typeof options === "function"){
|
||||
elementCB = options;
|
||||
options = defaultOpts;
|
||||
}
|
||||
this._callback = callback;
|
||||
this._options = options || defaultOpts;
|
||||
this._elementCB = elementCB;
|
||||
this.dom = [];
|
||||
this._done = false;
|
||||
this._tagStack = [];
|
||||
this._parser = this._parser || null;
|
||||
}
|
||||
|
||||
//default options
|
||||
var defaultOpts = {
|
||||
normalizeWhitespace: false, //Replace all whitespace with single spaces
|
||||
withStartIndices: false, //Add startIndex properties to nodes
|
||||
withEndIndices: false, //Add endIndex properties to nodes
|
||||
};
|
||||
|
||||
DomHandler.prototype.onparserinit = function(parser){
|
||||
this._parser = parser;
|
||||
};
|
||||
|
||||
//Resets the handler back to starting state
|
||||
DomHandler.prototype.onreset = function(){
|
||||
DomHandler.call(this, this._callback, this._options, this._elementCB);
|
||||
};
|
||||
|
||||
//Signals the handler that parsing is done
|
||||
DomHandler.prototype.onend = function(){
|
||||
if(this._done) return;
|
||||
this._done = true;
|
||||
this._parser = null;
|
||||
this._handleCallback(null);
|
||||
};
|
||||
|
||||
DomHandler.prototype._handleCallback =
|
||||
DomHandler.prototype.onerror = function(error){
|
||||
if(typeof this._callback === "function"){
|
||||
this._callback(error, this.dom);
|
||||
} else {
|
||||
if(error) throw error;
|
||||
}
|
||||
};
|
||||
|
||||
DomHandler.prototype.onclosetag = function(){
|
||||
//if(this._tagStack.pop().name !== name) this._handleCallback(Error("Tagname didn't match!"));
|
||||
|
||||
var elem = this._tagStack.pop();
|
||||
|
||||
if(this._options.withEndIndices && elem){
|
||||
elem.endIndex = this._parser.endIndex;
|
||||
}
|
||||
|
||||
if(this._elementCB) this._elementCB(elem);
|
||||
};
|
||||
|
||||
DomHandler.prototype._createDomElement = function(properties){
|
||||
if (!this._options.withDomLvl1) return properties;
|
||||
|
||||
var element;
|
||||
if (properties.type === "tag") {
|
||||
element = Object.create(ElementPrototype);
|
||||
} else {
|
||||
element = Object.create(NodePrototype);
|
||||
}
|
||||
|
||||
for (var key in properties) {
|
||||
if (properties.hasOwnProperty(key)) {
|
||||
element[key] = properties[key];
|
||||
}
|
||||
}
|
||||
|
||||
return element;
|
||||
};
|
||||
|
||||
DomHandler.prototype._addDomElement = function(element){
|
||||
var parent = this._tagStack[this._tagStack.length - 1];
|
||||
var siblings = parent ? parent.children : this.dom;
|
||||
var previousSibling = siblings[siblings.length - 1];
|
||||
|
||||
element.next = null;
|
||||
|
||||
if(this._options.withStartIndices){
|
||||
element.startIndex = this._parser.startIndex;
|
||||
}
|
||||
if(this._options.withEndIndices){
|
||||
element.endIndex = this._parser.endIndex;
|
||||
}
|
||||
|
||||
if(previousSibling){
|
||||
element.prev = previousSibling;
|
||||
previousSibling.next = element;
|
||||
} else {
|
||||
element.prev = null;
|
||||
}
|
||||
|
||||
siblings.push(element);
|
||||
element.parent = parent || null;
|
||||
};
|
||||
|
||||
DomHandler.prototype.onopentag = function(name, attribs){
|
||||
var properties = {
|
||||
type: name === "script" ? ElementType.Script : name === "style" ? ElementType.Style : ElementType.Tag,
|
||||
name: name,
|
||||
attribs: attribs,
|
||||
children: []
|
||||
};
|
||||
|
||||
var element = this._createDomElement(properties);
|
||||
|
||||
this._addDomElement(element);
|
||||
|
||||
this._tagStack.push(element);
|
||||
};
|
||||
|
||||
DomHandler.prototype.ontext = function(data){
|
||||
//the ignoreWhitespace is officially dropped, but for now,
|
||||
//it's an alias for normalizeWhitespace
|
||||
var normalize = this._options.normalizeWhitespace || this._options.ignoreWhitespace;
|
||||
|
||||
var lastTag;
|
||||
|
||||
if(!this._tagStack.length && this.dom.length && (lastTag = this.dom[this.dom.length-1]).type === ElementType.Text){
|
||||
if(normalize){
|
||||
lastTag.data = (lastTag.data + data).replace(re_whitespace, " ");
|
||||
} else {
|
||||
lastTag.data += data;
|
||||
}
|
||||
} else {
|
||||
if(
|
||||
this._tagStack.length &&
|
||||
(lastTag = this._tagStack[this._tagStack.length - 1]) &&
|
||||
(lastTag = lastTag.children[lastTag.children.length - 1]) &&
|
||||
lastTag.type === ElementType.Text
|
||||
){
|
||||
if(normalize){
|
||||
lastTag.data = (lastTag.data + data).replace(re_whitespace, " ");
|
||||
} else {
|
||||
lastTag.data += data;
|
||||
}
|
||||
} else {
|
||||
if(normalize){
|
||||
data = data.replace(re_whitespace, " ");
|
||||
}
|
||||
|
||||
var element = this._createDomElement({
|
||||
data: data,
|
||||
type: ElementType.Text
|
||||
});
|
||||
|
||||
this._addDomElement(element);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
DomHandler.prototype.oncomment = function(data){
|
||||
var lastTag = this._tagStack[this._tagStack.length - 1];
|
||||
|
||||
if(lastTag && lastTag.type === ElementType.Comment){
|
||||
lastTag.data += data;
|
||||
return;
|
||||
}
|
||||
|
||||
var properties = {
|
||||
data: data,
|
||||
type: ElementType.Comment
|
||||
};
|
||||
|
||||
var element = this._createDomElement(properties);
|
||||
|
||||
this._addDomElement(element);
|
||||
this._tagStack.push(element);
|
||||
};
|
||||
|
||||
DomHandler.prototype.oncdatastart = function(){
|
||||
var properties = {
|
||||
children: [{
|
||||
data: "",
|
||||
type: ElementType.Text
|
||||
}],
|
||||
type: ElementType.CDATA
|
||||
};
|
||||
|
||||
var element = this._createDomElement(properties);
|
||||
|
||||
this._addDomElement(element);
|
||||
this._tagStack.push(element);
|
||||
};
|
||||
|
||||
DomHandler.prototype.oncommentend = DomHandler.prototype.oncdataend = function(){
|
||||
this._tagStack.pop();
|
||||
};
|
||||
|
||||
DomHandler.prototype.onprocessinginstruction = function(name, data){
|
||||
var element = this._createDomElement({
|
||||
name: name,
|
||||
data: data,
|
||||
type: ElementType.Directive
|
||||
});
|
||||
|
||||
this._addDomElement(element);
|
||||
};
|
||||
|
||||
module.exports = DomHandler;
|
||||
+20
@@ -0,0 +1,20 @@
|
||||
// DOM-Level-1-compliant structure
|
||||
var NodePrototype = require('./node');
|
||||
var ElementPrototype = module.exports = Object.create(NodePrototype);
|
||||
|
||||
var domLvl1 = {
|
||||
tagName: "name"
|
||||
};
|
||||
|
||||
Object.keys(domLvl1).forEach(function(key) {
|
||||
var shorthand = domLvl1[key];
|
||||
Object.defineProperty(ElementPrototype, key, {
|
||||
get: function() {
|
||||
return this[shorthand] || null;
|
||||
},
|
||||
set: function(val) {
|
||||
this[shorthand] = val;
|
||||
return val;
|
||||
}
|
||||
});
|
||||
});
|
||||
+44
@@ -0,0 +1,44 @@
|
||||
// This object will be used as the prototype for Nodes when creating a
|
||||
// DOM-Level-1-compliant structure.
|
||||
var NodePrototype = module.exports = {
|
||||
get firstChild() {
|
||||
var children = this.children;
|
||||
return children && children[0] || null;
|
||||
},
|
||||
get lastChild() {
|
||||
var children = this.children;
|
||||
return children && children[children.length - 1] || null;
|
||||
},
|
||||
get nodeType() {
|
||||
return nodeTypes[this.type] || nodeTypes.element;
|
||||
}
|
||||
};
|
||||
|
||||
var domLvl1 = {
|
||||
tagName: "name",
|
||||
childNodes: "children",
|
||||
parentNode: "parent",
|
||||
previousSibling: "prev",
|
||||
nextSibling: "next",
|
||||
nodeValue: "data"
|
||||
};
|
||||
|
||||
var nodeTypes = {
|
||||
element: 1,
|
||||
text: 3,
|
||||
cdata: 4,
|
||||
comment: 8
|
||||
};
|
||||
|
||||
Object.keys(domLvl1).forEach(function(key) {
|
||||
var shorthand = domLvl1[key];
|
||||
Object.defineProperty(NodePrototype, key, {
|
||||
get: function() {
|
||||
return this[shorthand] || null;
|
||||
},
|
||||
set: function(val) {
|
||||
this[shorthand] = val;
|
||||
return val;
|
||||
}
|
||||
});
|
||||
});
|
||||
+73
@@ -0,0 +1,73 @@
|
||||
{
|
||||
"_from": "domhandler@^2.3.0",
|
||||
"_id": "domhandler@2.4.2",
|
||||
"_inBundle": false,
|
||||
"_integrity": "sha512-JiK04h0Ht5u/80fdLMCEmV4zkNh2BcoMFBmZ/91WtYZ8qVXSKjiw7fXMgFPnHcSZgOo3XdinHvmnDUeMf5R4wA==",
|
||||
"_location": "/domhandler",
|
||||
"_phantomChildren": {},
|
||||
"_requested": {
|
||||
"type": "range",
|
||||
"registry": true,
|
||||
"raw": "domhandler@^2.3.0",
|
||||
"name": "domhandler",
|
||||
"escapedName": "domhandler",
|
||||
"rawSpec": "^2.3.0",
|
||||
"saveSpec": null,
|
||||
"fetchSpec": "^2.3.0"
|
||||
},
|
||||
"_requiredBy": [
|
||||
"/htmlparser2"
|
||||
],
|
||||
"_resolved": "https://registry.npmjs.org/domhandler/-/domhandler-2.4.2.tgz",
|
||||
"_shasum": "8805097e933d65e85546f726d60f5eb88b44f803",
|
||||
"_spec": "domhandler@^2.3.0",
|
||||
"_where": "/mnt/dropdrive/Dropbox/japan/Nihongo/kanjiLatex/grabKanji/node_modules/htmlparser2",
|
||||
"author": {
|
||||
"name": "Felix Boehm",
|
||||
"email": "me@feedic.com"
|
||||
},
|
||||
"bugs": {
|
||||
"url": "https://github.com/fb55/DomHandler/issues"
|
||||
},
|
||||
"bundleDependencies": false,
|
||||
"dependencies": {
|
||||
"domelementtype": "1"
|
||||
},
|
||||
"deprecated": false,
|
||||
"description": "handler for htmlparser2 that turns pages into a dom",
|
||||
"devDependencies": {
|
||||
"htmlparser2": "^3.9.0",
|
||||
"jshint": "^2.9.1",
|
||||
"mocha": "^3.0.2"
|
||||
},
|
||||
"directories": {
|
||||
"test": "tests"
|
||||
},
|
||||
"homepage": "https://github.com/fb55/DomHandler#readme",
|
||||
"jshintConfig": {
|
||||
"quotmark": "double",
|
||||
"trailing": true,
|
||||
"unused": true,
|
||||
"undef": true,
|
||||
"node": true,
|
||||
"proto": true,
|
||||
"globals": {
|
||||
"it": true
|
||||
}
|
||||
},
|
||||
"keywords": [
|
||||
"dom",
|
||||
"htmlparser2"
|
||||
],
|
||||
"license": "BSD-2-Clause",
|
||||
"main": "index.js",
|
||||
"name": "domhandler",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git://github.com/fb55/DomHandler.git"
|
||||
},
|
||||
"scripts": {
|
||||
"test": "mocha -R list && jshint index.js test/"
|
||||
},
|
||||
"version": "2.4.2"
|
||||
}
|
||||
+116
@@ -0,0 +1,116 @@
|
||||
# domhandler [](https://travis-ci.org/fb55/domhandler)
|
||||
|
||||
The DOM handler (formally known as DefaultHandler) creates a tree containing all nodes of a page. The tree may be manipulated using the [domutils](https://github.com/fb55/domutils) library.
|
||||
|
||||
## Usage
|
||||
```javascript
|
||||
var handler = new DomHandler([ <func> callback(err, dom), ] [ <obj> options ]);
|
||||
// var parser = new Parser(handler[, options]);
|
||||
```
|
||||
|
||||
Available options are described below.
|
||||
|
||||
## Example
|
||||
```javascript
|
||||
var htmlparser = require("htmlparser2");
|
||||
var rawHtml = "Xyz <script language= javascript>var foo = '<<bar>>';< / script><!--<!-- Waah! -- -->";
|
||||
var handler = new htmlparser.DomHandler(function (error, dom) {
|
||||
if (error)
|
||||
[...do something for errors...]
|
||||
else
|
||||
[...parsing done, do something...]
|
||||
console.log(dom);
|
||||
});
|
||||
var parser = new htmlparser.Parser(handler);
|
||||
parser.write(rawHtml);
|
||||
parser.end();
|
||||
```
|
||||
|
||||
Output:
|
||||
|
||||
```javascript
|
||||
[{
|
||||
data: 'Xyz ',
|
||||
type: 'text'
|
||||
}, {
|
||||
type: 'script',
|
||||
name: 'script',
|
||||
attribs: {
|
||||
language: 'javascript'
|
||||
},
|
||||
children: [{
|
||||
data: 'var foo = \'<bar>\';<',
|
||||
type: 'text'
|
||||
}]
|
||||
}, {
|
||||
data: '<!-- Waah! -- ',
|
||||
type: 'comment'
|
||||
}]
|
||||
```
|
||||
|
||||
## Option: normalizeWhitespace
|
||||
Indicates whether the whitespace in text nodes should be normalized (= all whitespace should be replaced with single spaces). The default value is "false".
|
||||
|
||||
The following HTML will be used:
|
||||
|
||||
```html
|
||||
<font>
|
||||
<br>this is the text
|
||||
<font>
|
||||
```
|
||||
|
||||
### Example: true
|
||||
|
||||
```javascript
|
||||
[{
|
||||
type: 'tag',
|
||||
name: 'font',
|
||||
children: [{
|
||||
data: ' ',
|
||||
type: 'text'
|
||||
}, {
|
||||
type: 'tag',
|
||||
name: 'br'
|
||||
}, {
|
||||
data: 'this is the text ',
|
||||
type: 'text'
|
||||
}, {
|
||||
type: 'tag',
|
||||
name: 'font'
|
||||
}]
|
||||
}]
|
||||
```
|
||||
|
||||
### Example: false
|
||||
|
||||
```javascript
|
||||
[{
|
||||
type: 'tag',
|
||||
name: 'font',
|
||||
children: [{
|
||||
data: '\n\t',
|
||||
type: 'text'
|
||||
}, {
|
||||
type: 'tag',
|
||||
name: 'br'
|
||||
}, {
|
||||
data: 'this is the text\n',
|
||||
type: 'text'
|
||||
}, {
|
||||
type: 'tag',
|
||||
name: 'font'
|
||||
}]
|
||||
}]
|
||||
```
|
||||
|
||||
## Option: withDomLvl1
|
||||
|
||||
Adds DOM level 1 properties to all elements.
|
||||
|
||||
<!-- TODO: description -->
|
||||
|
||||
## Option: withStartIndices
|
||||
Indicates whether a `startIndex` property will be added to nodes. When the parser is used in a non-streaming fashion, `startIndex` is an integer indicating the position of the start of the node in the document. The default value is "false".
|
||||
|
||||
## Option: withEndIndices
|
||||
Indicates whether a `endIndex` property will be added to nodes. When the parser is used in a non-streaming fashion, `endIndex` is an integer indicating the position of the end of the node in the document. The default value is "false".
|
||||
+57
@@ -0,0 +1,57 @@
|
||||
{
|
||||
"name": "Basic test",
|
||||
"options": {},
|
||||
"html": "<!DOCTYPE html><html><title>The Title</title><body>Hello world</body></html>",
|
||||
"expected": [
|
||||
{
|
||||
"name": "!doctype",
|
||||
"data": "!DOCTYPE html",
|
||||
"type": "directive"
|
||||
},
|
||||
{
|
||||
"type": "tag",
|
||||
"name": "html",
|
||||
"attribs": {},
|
||||
"parent": null,
|
||||
"children": [
|
||||
{
|
||||
"type": "tag",
|
||||
"name": "title",
|
||||
"attribs": {},
|
||||
"parent": {
|
||||
"type": "tag",
|
||||
"name": "html",
|
||||
"attribs": {}
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"data": "The Title",
|
||||
"type": "text",
|
||||
"parent": {
|
||||
"type": "tag",
|
||||
"name": "title",
|
||||
"attribs": {}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "tag",
|
||||
"name": "body",
|
||||
"attribs": {},
|
||||
"children": [
|
||||
{
|
||||
"data": "Hello world",
|
||||
"type": "text"
|
||||
}
|
||||
],
|
||||
"prev": {
|
||||
"type": "tag",
|
||||
"name": "title",
|
||||
"attribs": {}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
+21
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"name": "Single Tag 1",
|
||||
"options": {},
|
||||
"html": "<br>text</br>",
|
||||
"expected": [
|
||||
{
|
||||
"type": "tag",
|
||||
"name": "br",
|
||||
"attribs": {}
|
||||
},
|
||||
{
|
||||
"data": "text",
|
||||
"type": "text"
|
||||
},
|
||||
{
|
||||
"type": "tag",
|
||||
"name": "br",
|
||||
"attribs": {}
|
||||
}
|
||||
]
|
||||
}
|
||||
+21
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"name": "Single Tag 2",
|
||||
"options": {},
|
||||
"html": "<br>text<br>",
|
||||
"expected": [
|
||||
{
|
||||
"type": "tag",
|
||||
"name": "br",
|
||||
"attribs": {}
|
||||
},
|
||||
{
|
||||
"data": "text",
|
||||
"type": "text"
|
||||
},
|
||||
{
|
||||
"type": "tag",
|
||||
"name": "br",
|
||||
"attribs": {}
|
||||
}
|
||||
]
|
||||
}
|
||||
+27
@@ -0,0 +1,27 @@
|
||||
{
|
||||
"name": "Unescaped chars in script",
|
||||
"options": {},
|
||||
"html": "<head><script language=\"Javascript\">var foo = \"<bar>\"; alert(2 > foo); var baz = 10 << 2; var zip = 10 >> 1; var yap = \"<<>>>><<\";</script></head>",
|
||||
"expected": [
|
||||
{
|
||||
"type": "tag",
|
||||
"name": "head",
|
||||
"attribs": {},
|
||||
"children": [
|
||||
{
|
||||
"type": "script",
|
||||
"name": "script",
|
||||
"attribs": {
|
||||
"language": "Javascript"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"data": "var foo = \"<bar>\"; alert(2 > foo); var baz = 10 << 2; var zip = 10 >> 1; var yap = \"<<>>>><<\";",
|
||||
"type": "text"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
+18
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"name": "Special char in comment",
|
||||
"options": {},
|
||||
"html": "<head><!-- commented out tags <title>Test</title>--></head>",
|
||||
"expected": [
|
||||
{
|
||||
"type": "tag",
|
||||
"name": "head",
|
||||
"attribs": {},
|
||||
"children": [
|
||||
{
|
||||
"data": " commented out tags <title>Test</title>",
|
||||
"type": "comment"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
+18
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"name": "Script source in comment",
|
||||
"options": {},
|
||||
"html": "<script><!--var foo = 1;--></script>",
|
||||
"expected": [
|
||||
{
|
||||
"type": "script",
|
||||
"name": "script",
|
||||
"attribs": {},
|
||||
"children": [
|
||||
{
|
||||
"data": "<!--var foo = 1;-->",
|
||||
"type": "text"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
+20
@@ -0,0 +1,20 @@
|
||||
{
|
||||
"name": "Unescaped chars in style",
|
||||
"options": {},
|
||||
"html": "<style type=\"text/css\">\n body > p\n\t{ font-weight: bold; }</style>",
|
||||
"expected": [
|
||||
{
|
||||
"type": "style",
|
||||
"name": "style",
|
||||
"attribs": {
|
||||
"type": "text/css"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"data": "\n body > p\n\t{ font-weight: bold; }",
|
||||
"type": "text"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
+20
@@ -0,0 +1,20 @@
|
||||
{
|
||||
"name": "Extra spaces in tag",
|
||||
"options": {},
|
||||
"html": "<font\t\n size='14' \n>the text</\t\nfont\t \n>",
|
||||
"expected": [
|
||||
{
|
||||
"type": "tag",
|
||||
"name": "font",
|
||||
"attribs": {
|
||||
"size": "14"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"data": "the text",
|
||||
"type": "text"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
+20
@@ -0,0 +1,20 @@
|
||||
{
|
||||
"name": "Unquoted attributes",
|
||||
"options": {},
|
||||
"html": "<font size= 14>the text</font>",
|
||||
"expected": [
|
||||
{
|
||||
"type": "tag",
|
||||
"name": "font",
|
||||
"attribs": {
|
||||
"size": "14"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"data": "the text",
|
||||
"type": "text"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
+15
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"name": "Singular attribute",
|
||||
"options": {},
|
||||
"html": "<option value='foo' selected>",
|
||||
"expected": [
|
||||
{
|
||||
"type": "tag",
|
||||
"name": "option",
|
||||
"attribs": {
|
||||
"value": "foo",
|
||||
"selected": ""
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
+40
@@ -0,0 +1,40 @@
|
||||
{
|
||||
"name": "Text outside tags",
|
||||
"options": {},
|
||||
"html": "Line one\n<br>\nline two",
|
||||
"expected": [
|
||||
{
|
||||
"data": "Line one\n",
|
||||
"type": "text",
|
||||
"prev": null,
|
||||
"next": {
|
||||
"type": "tag",
|
||||
"name": "br",
|
||||
"attribs": {}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "tag",
|
||||
"name": "br",
|
||||
"attribs": {},
|
||||
"prev": {
|
||||
"data": "Line one\n",
|
||||
"type": "text"
|
||||
},
|
||||
"next": {
|
||||
"data": "\nline two",
|
||||
"type": "text"
|
||||
}
|
||||
},
|
||||
{
|
||||
"data": "\nline two",
|
||||
"type": "text",
|
||||
"prev": {
|
||||
"type": "tag",
|
||||
"name": "br",
|
||||
"attribs": {}
|
||||
},
|
||||
"next": null
|
||||
}
|
||||
]
|
||||
}
|
||||
+11
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"name": "Only text",
|
||||
"options": {},
|
||||
"html": "this is the text",
|
||||
"expected": [
|
||||
{
|
||||
"data": "this is the text",
|
||||
"type": "text"
|
||||
}
|
||||
]
|
||||
}
|
||||
+19
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"name": "Comment within text",
|
||||
"options": {},
|
||||
"html": "this is <!-- the comment --> the text",
|
||||
"expected": [
|
||||
{
|
||||
"data": "this is ",
|
||||
"type": "text"
|
||||
},
|
||||
{
|
||||
"data": " the comment ",
|
||||
"type": "comment"
|
||||
},
|
||||
{
|
||||
"data": " the text",
|
||||
"type": "text"
|
||||
}
|
||||
]
|
||||
}
|
||||
+18
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"name": "Comment within text within script",
|
||||
"options": {},
|
||||
"html": "<script>this is <!-- the comment --> the text</script>",
|
||||
"expected": [
|
||||
{
|
||||
"type": "script",
|
||||
"name": "script",
|
||||
"attribs": {},
|
||||
"children": [
|
||||
{
|
||||
"data": "this is <!-- the comment --> the text",
|
||||
"type": "text"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
+22
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"name": "Option 'verbose' set to 'false'",
|
||||
"options": {
|
||||
"verbose": false
|
||||
},
|
||||
"html": "<font\t\n size='14' \n>the text</\t\nfont\t \n>",
|
||||
"expected": [
|
||||
{
|
||||
"type": "tag",
|
||||
"name": "font",
|
||||
"attribs": {
|
||||
"size": "14"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"data": "the text",
|
||||
"type": "text"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
+47
@@ -0,0 +1,47 @@
|
||||
{
|
||||
"name": "Normalize whitespace",
|
||||
"options": {
|
||||
"normalizeWhitespace": true
|
||||
},
|
||||
"html": "Line one\n<br>\t \r\n\f <br>\nline two<font><br> x </font>",
|
||||
"expected": [
|
||||
{
|
||||
"data": "Line one ",
|
||||
"type": "text"
|
||||
},
|
||||
{
|
||||
"type": "tag",
|
||||
"name": "br",
|
||||
"attribs": {}
|
||||
},
|
||||
{
|
||||
"data": " ",
|
||||
"type": "text"
|
||||
},
|
||||
{
|
||||
"type": "tag",
|
||||
"name": "br",
|
||||
"attribs": {}
|
||||
},
|
||||
{
|
||||
"data": " line two",
|
||||
"type": "text"
|
||||
},
|
||||
{
|
||||
"type": "tag",
|
||||
"name": "font",
|
||||
"attribs": {},
|
||||
"children": [
|
||||
{
|
||||
"type": "tag",
|
||||
"name": "br",
|
||||
"attribs": {}
|
||||
},
|
||||
{
|
||||
"data": " x ",
|
||||
"type": "text"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
+18
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"name": "XML Namespace",
|
||||
"options": {},
|
||||
"html": "<ns:tag>text</ns:tag>",
|
||||
"expected": [
|
||||
{
|
||||
"type": "tag",
|
||||
"name": "ns:tag",
|
||||
"attribs": {},
|
||||
"children": [
|
||||
{
|
||||
"data": "text",
|
||||
"type": "text"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
+16
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"name": "Enforce empty tags",
|
||||
"options": {},
|
||||
"html": "<link>text</link>",
|
||||
"expected": [
|
||||
{
|
||||
"type": "tag",
|
||||
"name": "link",
|
||||
"attribs": {}
|
||||
},
|
||||
{
|
||||
"data": "text",
|
||||
"type": "text"
|
||||
}
|
||||
]
|
||||
}
|
||||
+20
@@ -0,0 +1,20 @@
|
||||
{
|
||||
"name": "Ignore empty tags (xml mode)",
|
||||
"options": {
|
||||
"xmlMode": true
|
||||
},
|
||||
"html": "<link>text</link>",
|
||||
"expected": [
|
||||
{
|
||||
"type": "tag",
|
||||
"name": "link",
|
||||
"attribs": {},
|
||||
"children": [
|
||||
{
|
||||
"data": "text",
|
||||
"type": "text"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
+20
@@ -0,0 +1,20 @@
|
||||
{
|
||||
"name": "Template script tags",
|
||||
"options": {},
|
||||
"html": "<script type=\"text/template\"><h1>Heading1</h1></script>",
|
||||
"expected": [
|
||||
{
|
||||
"type": "script",
|
||||
"name": "script",
|
||||
"attribs": {
|
||||
"type": "text/template"
|
||||
},
|
||||
"children": [
|
||||
{
|
||||
"data": "<h1>Heading1</h1>",
|
||||
"type": "text"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
+15
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"name": "Conditional comments",
|
||||
"options": {},
|
||||
"html": "<!--[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]--><!--[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]-->",
|
||||
"expected": [
|
||||
{
|
||||
"data": "[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]",
|
||||
"type": "comment"
|
||||
},
|
||||
{
|
||||
"data": "[if lt IE 7]> <html class='no-js ie6 oldie' lang='en'> <![endif]",
|
||||
"type": "comment"
|
||||
}
|
||||
]
|
||||
}
|
||||
+41
@@ -0,0 +1,41 @@
|
||||
{
|
||||
"name": "lowercase tags",
|
||||
"options": {},
|
||||
"html": "<!DOCTYPE html><HTML><TITLE>The Title</title><BODY>Hello world</body></html>",
|
||||
"expected": [
|
||||
{
|
||||
"name": "!doctype",
|
||||
"data": "!DOCTYPE html",
|
||||
"type": "directive"
|
||||
},
|
||||
{
|
||||
"type": "tag",
|
||||
"name": "html",
|
||||
"attribs": {},
|
||||
"children": [
|
||||
{
|
||||
"type": "tag",
|
||||
"name": "title",
|
||||
"attribs": {},
|
||||
"children": [
|
||||
{
|
||||
"data": "The Title",
|
||||
"type": "text"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "tag",
|
||||
"name": "body",
|
||||
"attribs": {},
|
||||
"children": [
|
||||
{
|
||||
"data": "Hello world",
|
||||
"type": "text"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
+131
@@ -0,0 +1,131 @@
|
||||
{
|
||||
"name": "DOM level 1",
|
||||
"options": { "withDomLvl1": true },
|
||||
"html": "<div>some stray text<h1>Hello, world.</h1><!-- comment node -->more stray text</div>",
|
||||
"expected": [
|
||||
{
|
||||
"type": "tag",
|
||||
"nodeType": 1,
|
||||
"name": "div",
|
||||
"tagName": "div",
|
||||
"attribs": {},
|
||||
"nodeValue": null,
|
||||
"children": [
|
||||
{
|
||||
"type": "text",
|
||||
"nodeType": 3,
|
||||
"tagName": null,
|
||||
"data": "some stray text",
|
||||
"nodeValue": "some stray text",
|
||||
"childNodes": null,
|
||||
"firstChild": null,
|
||||
"lastChild": null
|
||||
},
|
||||
{
|
||||
"type": "tag",
|
||||
"nodeType": 1,
|
||||
"name": "h1",
|
||||
"tagName": "h1",
|
||||
"nodeValue": null,
|
||||
"attribs": {},
|
||||
"children": [
|
||||
{
|
||||
"type": "text",
|
||||
"nodeType": 3,
|
||||
"tagName": null,
|
||||
"data": "Hello, world.",
|
||||
"nodeValue": "Hello, world.",
|
||||
"childNodes": null,
|
||||
"firstChild": null,
|
||||
"lastChild": null
|
||||
}
|
||||
],
|
||||
"firstChild": {
|
||||
"type": "text",
|
||||
"nodeType": 3,
|
||||
"tagName": null,
|
||||
"data": "Hello, world.",
|
||||
"nodeValue": "Hello, world.",
|
||||
"childNodes": null,
|
||||
"firstChild": null,
|
||||
"lastChild": null
|
||||
},
|
||||
"lastChild": {
|
||||
"type": "text",
|
||||
"nodeType": 3,
|
||||
"tagName": null,
|
||||
"data": "Hello, world.",
|
||||
"nodeValue": "Hello, world.",
|
||||
"childNodes": null,
|
||||
"firstChild": null,
|
||||
"lastChild": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "comment",
|
||||
"nodeType": 8,
|
||||
"tagName": null,
|
||||
"data": " comment node ",
|
||||
"nodeValue": " comment node ",
|
||||
"childNodes": null,
|
||||
"firstChild": null,
|
||||
"lastChild": null,
|
||||
"prev": {
|
||||
"type": "tag",
|
||||
"name": "h1",
|
||||
"nodeValue": null,
|
||||
"attribs": {}
|
||||
},
|
||||
"previousSibling": {
|
||||
"type": "tag",
|
||||
"name": "h1",
|
||||
"nodeValue": null,
|
||||
"attribs": {}
|
||||
},
|
||||
"next": {
|
||||
"type": "text",
|
||||
"tagName": null,
|
||||
"data": "more stray text"
|
||||
},
|
||||
"nextSibling": {
|
||||
"type": "text",
|
||||
"tagName": null,
|
||||
"data": "more stray text"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"nodeType": 3,
|
||||
"tagName": null,
|
||||
"data": "more stray text",
|
||||
"nodeValue": "more stray text",
|
||||
"childNodes": null,
|
||||
"firstChild": null,
|
||||
"lastChild": null,
|
||||
"next": null,
|
||||
"nextSibling": null
|
||||
}
|
||||
],
|
||||
"firstChild": {
|
||||
"type": "text",
|
||||
"nodeType": 3,
|
||||
"tagName": null,
|
||||
"data": "some stray text",
|
||||
"nodeValue": "some stray text",
|
||||
"childNodes": null,
|
||||
"firstChild": null,
|
||||
"lastChild": null
|
||||
},
|
||||
"lastChild": {
|
||||
"type": "text",
|
||||
"nodeType": 3,
|
||||
"tagName": null,
|
||||
"data": "more stray text",
|
||||
"nodeValue": "more stray text",
|
||||
"childNodes": null,
|
||||
"firstChild": null,
|
||||
"lastChild": null
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
+85
@@ -0,0 +1,85 @@
|
||||
{
|
||||
"name": "withStartIndices adds correct startIndex properties",
|
||||
"options": {"withStartIndices": true},
|
||||
"streaming": false,
|
||||
"html": "<!DOCTYPE html> <html> <title>The Title</title> <body class='foo'>Hello world <p></p></body> <!-- the comment --> </html> ",
|
||||
"expected": [
|
||||
{
|
||||
"startIndex": 0,
|
||||
"name": "!doctype",
|
||||
"data": "!DOCTYPE html",
|
||||
"type": "directive"
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"data": " "
|
||||
},
|
||||
{
|
||||
"startIndex": 16,
|
||||
"type": "tag",
|
||||
"name": "html",
|
||||
"attribs": {},
|
||||
"parent": null,
|
||||
"children": [
|
||||
{
|
||||
"startIndex": 22,
|
||||
"type": "text",
|
||||
"data": " "
|
||||
},
|
||||
{
|
||||
"startIndex": 23,
|
||||
"type": "tag",
|
||||
"name": "title",
|
||||
"attribs": {},
|
||||
"children": [
|
||||
{
|
||||
"startIndex": 30,
|
||||
"data": "The Title",
|
||||
"type": "text"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"startIndex": 47,
|
||||
"type": "text",
|
||||
"data": " "
|
||||
},
|
||||
{
|
||||
"startIndex": 48,
|
||||
"type": "tag",
|
||||
"name": "body",
|
||||
"attribs": {"class": "foo"},
|
||||
"children": [
|
||||
{
|
||||
"startIndex": 66,
|
||||
"data": "Hello world ",
|
||||
"type": "text"
|
||||
},
|
||||
{
|
||||
"startIndex": 78,
|
||||
"type": "tag",
|
||||
"name": "p",
|
||||
"attribs": {},
|
||||
"children": []
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"startIndex": 92,
|
||||
"type": "text",
|
||||
"data": " "
|
||||
},
|
||||
{
|
||||
"startIndex": 93,
|
||||
"type": "comment",
|
||||
"data": " the comment "
|
||||
},
|
||||
{
|
||||
"startIndex": 113,
|
||||
"type": "text",
|
||||
"data": " "
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
+86
@@ -0,0 +1,86 @@
|
||||
{
|
||||
"name": "withEndIndices adds correct endIndex properties",
|
||||
"options": {"withStartIndices": true,"withEndIndices": true},
|
||||
"streaming": false,
|
||||
"html": "<!DOCTYPE html> <html> <title>The Title</title> <body class='foo'>Hello world <p></p></body> <!-- the comment --> </html> ",
|
||||
"expected": [
|
||||
{
|
||||
"endIndex": null,
|
||||
"name": "!doctype",
|
||||
"data": "!DOCTYPE html",
|
||||
"type": "directive"
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"data": " ",
|
||||
"endIndex": 15
|
||||
},
|
||||
{
|
||||
"endIndex": 120,
|
||||
"type": "tag",
|
||||
"name": "html",
|
||||
"attribs": {},
|
||||
"parent": null,
|
||||
"children": [
|
||||
{
|
||||
"endIndex": 22,
|
||||
"type": "text",
|
||||
"data": " "
|
||||
},
|
||||
{
|
||||
"endIndex": 46,
|
||||
"type": "tag",
|
||||
"name": "title",
|
||||
"attribs": {},
|
||||
"children": [
|
||||
{
|
||||
"endIndex": 38,
|
||||
"data": "The Title",
|
||||
"type": "text"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"endIndex": 47,
|
||||
"type": "text",
|
||||
"data": " "
|
||||
},
|
||||
{
|
||||
"endIndex": 91,
|
||||
"type": "tag",
|
||||
"name": "body",
|
||||
"attribs": {"class": "foo"},
|
||||
"children": [
|
||||
{
|
||||
"endIndex": 77,
|
||||
"data": "Hello world ",
|
||||
"type": "text"
|
||||
},
|
||||
{
|
||||
"endIndex": 84,
|
||||
"type": "tag",
|
||||
"name": "p",
|
||||
"attribs": {},
|
||||
"children": []
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"endIndex": 92,
|
||||
"type": "text",
|
||||
"data": " "
|
||||
},
|
||||
{
|
||||
"endIndex": 112,
|
||||
"type": "comment",
|
||||
"data": " the comment "
|
||||
},
|
||||
{
|
||||
"endIndex": 113,
|
||||
"type": "text",
|
||||
"data": " "
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
+60
@@ -0,0 +1,60 @@
|
||||
var fs = require("fs"),
|
||||
path = require("path"),
|
||||
assert = require("assert"),
|
||||
util = require("util"),
|
||||
Parser = require("htmlparser2").Parser,
|
||||
Handler = require("../");
|
||||
|
||||
var basePath = path.resolve(__dirname, "cases"),
|
||||
inspectOpts = { showHidden: true, depth: null };
|
||||
|
||||
fs
|
||||
.readdirSync(basePath)
|
||||
.filter(RegExp.prototype.test, /\.json$/) //only allow .json files
|
||||
.map(function(name){
|
||||
return path.resolve(basePath, name);
|
||||
})
|
||||
.map(require)
|
||||
.forEach(function(test){
|
||||
it(test.name, function(){
|
||||
var expected = test.expected;
|
||||
|
||||
var handler = new Handler(function(err, actual){
|
||||
assert.ifError(err);
|
||||
try {
|
||||
compare(expected, actual);
|
||||
} catch(e){
|
||||
e.expected = util.inspect(expected, inspectOpts);
|
||||
e.actual = util.inspect(actual, inspectOpts);
|
||||
throw e;
|
||||
}
|
||||
}, test.options);
|
||||
|
||||
var data = test.html;
|
||||
|
||||
var parser = new Parser(handler, test.options);
|
||||
|
||||
//first, try to run the test via chunks
|
||||
if (test.streaming || test.streaming === undefined){
|
||||
for(var i = 0; i < data.length; i++){
|
||||
parser.write(data.charAt(i));
|
||||
}
|
||||
parser.done();
|
||||
}
|
||||
|
||||
//then parse everything
|
||||
parser.parseComplete(data);
|
||||
});
|
||||
});
|
||||
|
||||
function compare(expected, result){
|
||||
assert.equal(typeof expected, typeof result, "types didn't match");
|
||||
if(typeof expected !== "object" || expected === null){
|
||||
assert.strictEqual(expected, result, "result doesn't equal expected");
|
||||
} else {
|
||||
for(var prop in expected){
|
||||
assert.ok(prop in result, "result didn't contain property " + prop);
|
||||
compare(expected[prop], result[prop]);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user