[教程]解决安卓获取网页时乱码

2014-04-23 21:18:24 -0400
我们有时获取网页的时候,会出现乱码,像韩语,泰语。而用这个段代码处理过的字符串,就可以正常显示了。
package com.luoye.code;
import java.util.*;
//解决编码问题
public class HTMLDecoder
{
public HashMap<String, Character> charTable;

public String decode(String s)
{
String t;
Character ch;
int tmpPos, i;

int maxPos = s.length();
StringBuffer sb = new StringBuffer(maxPos);
int curPos = 0;
while (curPos < maxPos)
{
char c = s.charAt(curPos++);
if (c == '&')
{
tmpPos = curPos;
if (tmpPos < maxPos)
{
char d = s.charAt(tmpPos++);
if (d == '#')
{
if (tmpPos < maxPos)
{
d = s.charAt(tmpPos++);
if ((d == 'x') || (d == 'X'))
{
if (tmpPos < maxPos)
{
d = s.charAt(tmpPos++);
if (isHexDigit(d))
{
while (tmpPos < maxPos)
{
d = s.charAt(tmpPos++);
if (!isHexDigit(d))
{
if (d == ';')
{
t = s.substring(curPos + 2, tmpPos - 1);
try
{
i = Integer.parseInt(t, 16);
if ((i >= 0) && (i < 65536))
{
c = (char) i;
curPos = tmpPos;
}
}
catch (NumberFormatException e)
{
}
}
break;
}
}
}
}
}
else if (isDigit(d))
{
while (tmpPos < maxPos)
{
d = s.charAt(tmpPos++);
if (!isDigit(d))
{
if (d == ';')
{
t = s.substring(curPos + 1, tmpPos - 1);
try
{
i = Integer.parseInt(t);
if ((i >= 0) && (i < 65536))
{
c = (char) i;
curPos = tmpPos;
}
}
catch (NumberFormatException e)
{
}
}
break;
}
}
}
}
}
else if (isLetter(d))
{
while (tmpPos < maxPos)
{
d = s.charAt(tmpPos++);
if (!isLetterOrDigit(d))
{
if (d == ';')
{
t = s.substring(curPos, tmpPos - 1);
ch = (Character) charTable.get(t);
if (ch != null)
{
c = ch.charValue();
curPos = tmpPos;
}
}
break;
}
}


}
}
}
sb.append(c);
}
return sb.toString();
}

private boolean isLetterOrDigit(char c)
{
return isLetter(c) || isDigit(c);
}

private boolean isHexDigit(char c)
{
return isHexLetter(c) || isDigit(c);
}

private boolean isLetter(char c)
{
return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'));
}

private boolean isHexLetter(char c)
{
return ((c >= 'a') && (c <= 'f')) || ((c >= 'A') && (c <= 'F'));
}

private boolean isDigit(char c)
{
return (c >= '0') && (c <= '9');
}

public String compact(String s)
{
int maxPos = s.length();
StringBuffer sb = new StringBuffer(maxPos);
int curPos = 0;
while (curPos < maxPos)
{
char c = s.charAt(curPos++);
if (isWhitespace(c))
{
while ((curPos < maxPos) && isWhitespace(s.charAt(curPos)))
{
curPos++;
}
c = '\u0020';
}
sb.append(c);
}
return sb.toString();
}


public boolean isWhitespace(char ch)
{
return (ch == '\u0020') || (ch == '\r') || (ch == '\n') || (ch == '\u0009') || (ch == '\u000c') || (ch == '\u200b');
}
{
charTable = new HashMap<String, Character>();
charTable.put("quot", new Character((char) 34));
charTable.put("amp", new Character((char) 38));
charTable.put("apos", new Character((char) 39));
charTable.put("lt", new Character((char) 60));
charTable.put("gt", new Character((char) 62));
charTable.put("nbsp", new Character((char) 160));
charTable.put("iexcl", new Character((char) 161));
charTable.put("cent", new Character((char) 162));
charTable.put("pound", new Character((char) 163));
charTable.put("curren", new Character((char) 164));
charTable.put("yen", new Character((char) 165));
charTable.put("brvbar", new Character((char) 166));
charTable.put("sect", new Character((char) 167));
charTable.put("uml", new Character((char) 168));
charTable.put("copy", new Character((char) 169));
charTable.put("ordf", new Character((char) 170));
charTable.put("laquo", new Character((char) 171));
charTable.put("not", new Character((char) 172));
charTable.put("shy", new Character((char) 173));
charTable.put("reg", new Character((char) 174));
charTable.put("macr", new Character((char) 175));
charTable.put("deg", new Character((char) 176));
charTable.put("plusmn", new Character((char) 177));
charTable.put("sup2", new Character((char) 178));
charTable.put("sup3", new Character((char) 179));
charTable.put("acute", new Character((char) 180));
charTable.put("micro", new Character((char) 181));
charTable.put("para", new Character((char) 182));
charTable.put("lsaquo", new Character((char) 8249));
charTable.put("rsaquo", new Character((char) 8250));
charTable.put("oline", new Character((char) 8254));
charTable.put("frasl", new Character((char) 8260));
charTable.put("euro", new Character((char) 8364));
charTable.put("image", new Character((char) 8465));
charTable.put("weierp", new Character((char) 8472));
charTable.put("real", new Character((char) 8476));
charTable.put("trade", new Character((char) 8482));
charTable.put("alefsym", new Character((char) 8501));
charTable.put("larr", new Character((char) 8592));
charTable.put("uarr", new Character((char) 8593));
charTable.put("rarr", new Character((char) 8594));
charTable.put("darr", new Character((char) 8595));
charTable.put("harr", new Character((char) 8596));
charTable.put("crarr", new Character((char) 8629));
charTable.put("lArr", new Character((char) 8656));
charTable.put("uArr", new Character((char) 8657));
charTable.put("rArr", new Character((char) 8658));
charTable.put("dArr", new Character((char) 8659));
charTable.put("hArr", new Character((char) 8660));
charTable.put("forall", new Character((char) 8704));
charTable.put("part", new Character((char) 8706));
charTable.put("exist", new Character((char) 8707));
charTable.put("empty", new Character((char) 8709));
charTable.put("nabla", new Character((char) 8711));
charTable.put("isin", new Character((char) 8712));
charTable.put("notin", new Character((char) 8713));
charTable.put("ni", new Character((char) 8715));
charTable.put("prod", new Character((char) 8719));
charTable.put("sum", new Character((char) 8721));
charTable.put("minus", new Character((char) 8722));
charTable.put("lowast", new Character((char) 8727));
charTable.put("radic", new Character((char) 8730));
charTable.put("prop", new Character((char) 8733));
charTable.put("infin", new Character((char) 8734));
charTable.put("ang", new Character((char) 8736));
charTable.put("and", new Character((char) 8743));
charTable.put("or", new Character((char) 8744));
charTable.put("cap", new Character((char) 8745));
charTable.put("cup", new Character((char) 8746));
charTable.put("int", new Character((char) 8747));
charTable.put("there4", new Character((char) 8756));
charTable.put("sim", new Character((char) 8764));
charTable.put("cong", new Character((char) 8773));
charTable.put("asymp", new Character((char) 8776));
charTable.put("ne", new Character((char) 8800));
charTable.put("equiv", new Character((char) 8801));
charTable.put("le", new Character((char) 8804));
charTable.put("ge", new Character((char) 8805));
charTable.put("sub", new Character((char) 8834));
charTable.put("sup", new Character((char) 8835));
charTable.put("nsub", new Character((char) 8836));
charTable.put("sube", new Character((char) 8838));
charTable.put("supe", new Character((char) 8839));
charTable.put("oplus", new Character((char) 8853));
charTable.put("otimes", new Character((char) 8855));
charTable.put("perp", new Character((char) 8869));
charTable.put("sdot", new Character((char) 8901));
charTable.put("lceil", new Character((char) 8968));
charTable.put("rceil", new Character((char) 8969));
charTable.put("lfloor", new Character((char) 8970));
charTable.put("rfloor", new Character((char) 8971));
charTable.put("lang", new Character((char) 9001));
charTable.put("rang", new Character((char) 9002));
charTable.put("loz", new Character((char) 9674));
charTable.put("spades", new Character((char) 9824));
charTable.put("clubs", new Character((char) 9827));
charTable.put("hearts", new Character((char) 9829));
charTable.put("diams", new Character((char) 9830));
}
}
«Newer      Older»
Comment:
Name:

Back to home

Subscribe | Register | Login | N