javascript - Creating a regex to parse html to MXML syntax -
i searched lot on stackoverflow , found interesting that's includes:
how create regular expression span attribute?
and javascript regex replace text div , < >
but turns out couldn't manage parse goal replace div data-type attribute , remove data-type attribute on strings.
here's how did.
//doesn't work multi lines, first occurrency , nothing more. // regex: /\s?data\-type\=(?:['"])?(\d+)(?:['"])?/ var source_code = $("body").html(); var rdiv = /div/gm; // remove divs var mxml = source_code.match(/\s?data\-type\=(?:['"])?(\w+)(?:['"])?/); var rattr =source_code.match(/\s?data\-type\=(?:['"])?(\w+)(?:['"])/gm); var outra = source_code.replace(rdiv,'s:'+mxml[1]); var nestr = outra.replace(rattr[0],'');// worked first element console.log(nestr); console.log(mxml); console.log(rattr); over html sample page
<div id="app" data-type="application"> <div data-type="label"></div> <div data-type="button"></div> <div data-type="vbox"></div> <div data-type="group"></div> </div> any light on specific thing? may missing something, have no clue, there's no left space otherwise asking here.
i've created jsfiddle show, open console of browser see results have me.
feel free answer on jsfiddle or better explanation of regex, why it's fails.
until feedback, keep trying see if can manage replace text.
thanks in advance.
it easier parse markup tree of objects , convert mxml.
something this:
var source_code = $("body").html(); var openstarttagrx = /^\s*<div/i; var closestarttagrx = /^\s*>/i; var closetagrx = /^\s*<\/div>/i; var attrsrx = new regexp( '^\\s+' + '(?:(data-type)|([a-z-]+))' + // group 1 "data-type" group 2 attribute '\\=' + '(?:\'|")' + '(.*?)' + // group 3 data-type or attribute value '(?:\'|")', 'mi'); function thing() { this.type = undefined; this.attrs = undefined; this.children = undefined; } thing.prototype.addattr = function(key, value) { this.attrs = this.attrs || {}; this.attrs[key] = value; }; thing.prototype.addchild = function(child) { this.children = this.children || []; this.children.push(child); }; function geterrmsg(expected, str) { return 'malformed source, expected: ' + expected + '\n"' + str.slice(0,20) + '"'; } function parseelm(str) { var result, elm, childresult; if (!openstarttagrx.test(str)) { return; } elm = new thing(); str = str.replace(openstarttagrx, ''); // parse attributes result = attrsrx.exec(str); while (result) { if (result[1]) { elm.type = result[3]; } else { elm.addattr(result[2], result[3]); } str = str.replace(attrsrx, ''); result = attrsrx.exec(str); } // close off tag if (!closestarttagrx.test(str)) { throw new error(geterrmsg('end of opening tag', str)); } str = str.replace(closestarttagrx, ''); // if has child tags childresult = parseelm(str); while (childresult) { str = childresult.str; elm.addchild(childresult.elm); childresult = parseelm(str); } // tag should have closing tag if (!closetagrx.test(str)) { throw new error(geterrmsg('closing tag element', str)); } str = str.replace(closetagrx, ''); return { str: str, elm: elm }; } console.log(parseelm(source_code).elm); this parses markup provided following:
{ "type" : "application" "attrs" : { "id" : "app" }, "children" : [ { "type" : "label" }, { "type" : "button" }, { "type" : "vbox" }, { "type" : "group" } ], } it's recursive, embedded groups parsed, too.
Comments
Post a Comment