在这里,经过我跟同事的一起商量,决定服务端为客户端写一种中间格式, 包括所有需要的格式:TEXT, IMG, A, \n
所以我在服务端就定义了一个接口,一些子类.
接口: IMediaObject.java
/** * * @author Vernon.Chen * @version 1.0 2013-8-23 */public interface IMediaObject { public static final int TYPE_A = 1; public static final int TYPE_IMAGE = 2; public static final int TYPE_BR = 3; public static final int TYPE_TEXT = 4; int getType();}
类: TextObject.java
/** * * @author Vernon.Chen * @version 1.0 2013-8-23 */public class TextObject implements IMediaObject { private String text; public TextObject() { } public TextObject(String text) { this.text = text; } public String getText() { return text; } public void setText(String text) { this.text = text; } @Override public int getType() { return TYPE_TEXT; }}
类:ImageObject.java
/** * * @author Vernon.Chen * @version 1.0 2013-8-23 */public class ImageObject implements IMediaObject { private String src; private String imageId; public ImageObject() { } public ImageObject(String src) { this.src = src; } public ImageObject(String src, String imageId) { this.src = src; this.imageId = imageId; } public String getSrc() { return src; } public void setSrc(String src) { this.src = src; } @Override public int getType() { return TYPE_IMAGE; } public String getImageId() { return imageId; } public void setImageId(String imageId) { this.imageId = imageId; }}
类:AObject.java
/** * * @author Vernon.Chen * @version 1.0 2013-8-23 */public class AObject implements IMediaObject { private String href; private String name; public AObject() { }; public AObject(String href, String name) { this.href = href; this.name = name; } public String getHref() { return href; } public void setHref(String href) { this.href = href; } @Override public int getType() { return TYPE_A; } public String getName() { return name; } public void setName(String name) { this.name = name; }}
类:BrObject.java
/** * * @author Vernon.Chen * @version 1.0 2013-8-23 */public class BrObject implements IMediaObject{ private String name = "\n"; public String getName() { return name; } public void setName(String name) { this.name = name; } @Override public int getType() { return TYPE_BR; }}
这就是以上的集中格式.
然后,我们用Jsoup的包去解析文本,并且转换成我们的中间格式,最终用JSON数组的格式返回给客户端去. 客户端只要按照JSON的顺序解析出来就可以了.
核心解析代码,利用了递归算法.
/** * 解析返回的内容 * * @author Vernon.Chen * */ static class ParseContext { StringBuilder sb = new StringBuilder(); List<IMediaObject> list = new ArrayList<IMediaObject>(); public void closeLast() { if (sb.length() > 0) { list.add(new TextObject(sb.toString())); sb.setLength(0); } } } /** * 解析 * * @param source * @return * @author Vernon.Chen * @date 2013-8-27 */ @SuppressWarnings("unchecked") public static List<IMediaObject> parse(String source) { if (StringUtil.isEmpty(source)) { return Collections.EMPTY_LIST ; } Document doc = Jsoup.parse(source); Element element = doc.body(); ParseContext pc = new ParseContext(); parseElementChildren(pc, element); pc.closeLast(); return pc.list; } /** * 递归 * * @param parseContext * @param element * @author Vernon.Chen * @date 2013-8-27 */ private static void parseElementChildren(ParseContext parseContext, Element element) { List<Node> nodes = element.childNodes(); for (Node node : nodes) { String nodeName = node.nodeName(); if (nodeName.equals("#text")) { // 连续的Text应该放在一起 TextNode textNode = (TextNode) node; parseContext.sb.append(textNode.text()); } else if (nodeName.equals("br")) { parseContext.closeLast(); parseContext.list.add(new BrObject()); } else if (nodeName.equals("a")) { parseA(parseContext, node); } else if (nodeName.equals("img")) { parseContext.closeLast(); Element e = (Element) node; // 解析IMG元素, 提取有用数据 String src = e.attr("src") ; String imageId = "" ; if (StringUtil.isNotEmpty(src)) { // http://img.dianziq.com/img/xl_7vA7fe.png if (src.indexOf("dianziq") > -1){ if(src.lastIndexOf('_') > -1) { imageId = src.substring(src.lastIndexOf('_') + 1,src.length()) ; } else { imageId = src.substring(src.lastIndexOf('/') + 1,src.length()) ; } } } ImageObject image = new ImageObject(src, imageId); parseContext.list.add(image); } else if (nodeName.equals("p")) { if (node instanceof Element) { parseElementChildren(parseContext, (Element) node); } parseContext.closeLast(); parseContext.list.add(new BrObject()); }else { if (node instanceof Element) { parseElementChildren(parseContext, (Element) node); } } } } /** * 解析A * * @param parseContext * @param node * @author Vernon.Chen * @date 2013-8-27 */ private static void parseA(ParseContext parseContext, Node node) { parseContext.closeLast(); Element e = (Element) node ; Elements elements = e.select("img") ; if (elements != null && elements.size() > 0) { // 包含图片 List<Node> nodes = e.childNodes(); if (nodes.size() > 0) { parseElementChildren(parseContext,e); } } else { AObject a = new AObject(e.attr("href"), e.text()); parseContext.list.add(a); } }
最后我们如果调用呢?
public static void main(String[] args) { StringBuilder sb = new StringBuilder() ; sb.append("<a title='sadfas' href='http://www.sdfasdfsadfds' target='_blanck'>我是</a>") ; sb.append("<img data-addr='http://192.168.1.29/wendaimg/77d41de8917e4511c444e1f7facb86b79fac1f2b.jpg' dzq-style='max-width: 500px;' dzq='true' src='http://192.168.1.29/wendaimg/77d41de8917e4511c444e1f7facb86b79fac1f2b.jpg' style='max-width: 500px;'/>") ; sb.append("<a title=sadfas href=http://www.sdfasdfsadfds target=_blanck>喜欢</a>") ; sb.append("<a title=sadfas href=http://www.sdfasdfsadfds target=_blanck>喜欢<strong>eee</strong><div>111111<img src='http://img.dianziq.com/img/xl_7vA7fe.png'/></div></a>"); sb.append("<a title=sadfas href=http://www.sdfasdfsadfds target=_blanck>喜欢</a>"); List<IMediaObject> list = new DzqJsoup().parse(sb.toString()); // list = Collections.EMPTY_LIST ; System.out.println(JSONArray.fromObject(list)); }
这样子就可以, 可能这种方式不是最好,但是这也我现在能够想到的. 如果各位有更好的方式, 请联系我. 谢谢~
联系客服