`
tomgooityeeee
  • 浏览: 71465 次
文章分类
社区版块
存档分类
最新评论

字符编码工具类

阅读更多

   
字符编码工具类
package charTools;

import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.net.URLDecoder;

import java.security.*;
import java.text.*;
import java.util.*;
/**
 * Title:字符编码工具类
 * Description:
 * Copyright: flashman.com.cn Copyright (c) 2005
 * Company: flashman.com.cn
 * @author: jeffzhu
 * @version 1.0
 */

/*
 *===================功能说明====================================
 *
 *================================字符操作=======================
 *String chopAtWord(String string, int length)   从字符串第一位开始取n位字符
 *
 *
 *
 *=================================编码转换======================
 *String ISO2GB(String text)           转换编码 ISO-8859-1到GB2312
 *String GB2ISO(String text)           转换编码 GB2312到ISO-8859-1
 *String Utf8URLencode(String text)    Utf8URL编码
 *String Utf8URLdecode(String text)    Utf8URL解码
 *String CodeToWord(String text)       utf8URL编码转字符
 *boolean Utf8codeCheck(String text)   编码是否有效
 *boolean   isUtf8Url(String text)     是否Utf8Url编码
 *
 *
 *==================================加密解密======================
 *synchronized static final String hash(String data)    MessageDigest加密
 *String encodeBase64(String data)                      base64加密
 *public static String decodeBase64(String data)        base64解密
 *
 *
 */
public class charTools1 {
			/**
			 * 转换编码 ISO-8859-1到GB2312
			 * @param text
			 * @return
			 */
		  public String ISO2GB(String text) {
		    String result = "";
		    try {
		      result = new String(text.getBytes("ISO-8859-1"), "GB2312");
		    }
		    catch (UnsupportedEncodingException ex) {
		      result = ex.toString();
		    }
		    return result;
		  }

		  /**
		   * 转换编码 GB2312到ISO-8859-1
		   * @param text
		   * @return
		   */
		  public String GB2ISO(String text) {
		    String result = "";
		    try {
		      result = new String(text.getBytes("GB2312"), "ISO-8859-1");
		    }
		    catch (UnsupportedEncodingException ex) {
		      ex.printStackTrace();
		    }
		    return result;
		  }
		  /**
		   * Utf8URL编码
		   * @param s
		   * @return
		   */
		  public String Utf8URLencode(String text) {
		    StringBuffer result = new StringBuffer();

		    for (int i = 0; i < text.length(); i++) {

		      char c = text.charAt(i);
		      if (c >= 0 &amp;&amp; c <= 255) {
		        result.append(c);
		      }else {

		        byte[] b = new byte[0];
		        try {
		          b = Character.toString(c).getBytes("UTF-8");
		        }catch (Exception ex) {
		        }

		        for (int j = 0; j < b.length; j++) {
		          int k = b[j];
		          if (k < 0) k += 256;
		          result.append("%" + Integer.toHexString(k).toUpperCase());
		        }

		      }
		    }

		    return result.toString();
		  }

		  /**
		   * Utf8URL解码
		   * @param text
		   * @return
		   */
		  public String Utf8URLdecode(String text) {
		    String result = "";
		    int p = 0;

		    if (text!=null &amp;&amp; text.length()>0){
		      text = text.toLowerCase();
		      p = text.indexOf("%e");
		      if (p == -1) return text;

		      while (p != -1) {
		        result += text.substring(0, p);
		        text = text.substring(p, text.length());
		        if (text == "" || text.length() < 9) return result;

		        result += CodeToWord(text.substring(0, 9));
		        text = text.substring(9, text.length());
		        p = text.indexOf("%e");
		      }

		    }

		    return result + text;
		  }

		  /**
		   * utf8URL编码转字符
		   * @param text
		   * @return
		   */
		  private String CodeToWord(String text) {
		    String result;

		    if (Utf8codeCheck(text)) {
		      byte[] code = new byte[3];
		      code[0] = (byte) (Integer.parseInt(text.substring(1, 3), 16) - 256);
		      code[1] = (byte) (Integer.parseInt(text.substring(4, 6), 16) - 256);
		      code[2] = (byte) (Integer.parseInt(text.substring(7, 9), 16) - 256);
		      try {
		        result = new String(code, "UTF-8");
		      }catch (UnsupportedEncodingException ex) {
		        result = null;
		      }
		    }
		    else {
		      result = text;
		    }

		    return result;
		  }

		  public static boolean isValidUtf8(byte[] b, int aMaxCount) {
		    int lLen = b.length, lCharCount = 0;
		    for (int i = 0; i < lLen &amp;&amp; lCharCount < aMaxCount; ++lCharCount) {
		      byte lByte = b[i++]; //to fast operation, ++ now, ready for the following for(;;)
		      if (lByte >= 0) continue; //>=0 is normal ascii
		      if (lByte < (byte) 0xc0 || lByte > (byte) 0xfd)
		        return false;
		      int lCount = lByte > (byte) 0xfc ? 5 : lByte > (byte) 0xf8 ? 4 : lByte > (byte) 0xf0 ? 3 : lByte > (byte) 0xe0 ? 2 : 1;
		      if (i + lCount > lLen) return false;
		      for (int j = 0; j < lCount; ++j, ++i)
		        if (b[i] >= (byte) 0xc0)return false;
		    }
		    return true;
		  }

		  /**
		   * 编码是否有效
		   * @param text
		   * @return
		   */
		  private boolean Utf8codeCheck(String text){
		    String sign = "";
		    if (text.startsWith("%e"))
		      for (int i = 0, p = 0; p != -1; i++) {
		        p = text.indexOf("%", p);
		        if (p != -1)
		          p++;
		        sign += p;
		      }
		    return sign.equals("147-1");
		  }

		  /**
		   * 是否Utf8Url编码
		   * @param text
		   * @return
		   */
		  public boolean isUtf8Url(String text) {
		    text = text.toLowerCase();
		    int p = text.indexOf("%");
		    if (p != -1 &amp;&amp; text.length() - p > 9) {
		      text = text.substring(p, p + 9);
		    }
		    return Utf8codeCheck(text);
		  }
		  
/*
 * ======================================加密解密=================================================
 */		  
		    /**
		     * Used by the hash method.
		     */
		    private static MessageDigest digest = null;

		    /**
			 * 将传入的参数转化为它所对应的hash码
		     * @param data the String to compute the hash of.
		     * @return a hashed version of the passed-in String
		     */
		    public synchronized static final String hash(String data) {
		        if (digest == null) {
		            try {
		                digest = MessageDigest.getInstance("MD5");
		            }
		            catch (NoSuchAlgorithmException nsae) {
		                System.err.println("Failed to load the MD5 MessageDigest. " +
		                "Jive will be unable to function normally.");
		                nsae.printStackTrace();
		            }
		        }
		        // Now, compute hash.
		        digest.update(data.getBytes());
		        return encodeHex(digest.digest());
		    }

		    /**
			 * 将传入的byte型数组转化为对应的十六进制,并组合成字符串输出
		     * Turns an array of bytes into a String representing each byte as an
		     * unsigned hex number.
		     * <p>
		     * Method by Santeri Paavolainen, Helsinki Finland 1996<br>
		     * (c) Santeri Paavolainen, Helsinki Finland 1996<br>
		     * Distributed under LGPL.
		     *
		     * @param bytes an array of bytes to convert to a hex-string
		     * @return generated hex string
		     */
		    public static final String encodeHex(byte[] bytes) {
		        StringBuffer buf = new StringBuffer(bytes.length * 2);
		        int i;

		        for (i = 0; i < bytes.length; i++) {
		            if (((int) bytes[i] &amp; 0xff) < 0x10) {
		                buf.append("0");
		            }
		            buf.append(Long.toString((int) bytes[i] &amp; 0xff, 16));
		        }
		        return buf.toString();
		    }

		    /**
			 * 将十六进制数字型的字符串转化为byte型的数组,将字符串按两位两位分开
		     * Turns a hex encoded string into a byte array. It is specifically meant
		     * to "reverse" the toHex(byte[]) method.    
		     * @param hex a hex encoded String to transform into a byte array.
		     * @return a byte array representing the hex String[
		     */
		    public static final byte[] decodeHex(String hex) {
		        char [] chars = hex.toCharArray();
		        byte[] bytes = new byte[chars.length/2];
		        int byteCount = 0;
		        for (int i=0; i<chars.length; i+=2) {
		            byte newByte = 0x00;
		            newByte |= hexCharToByte(chars[i]);
		            newByte <<= 4;
		            newByte |= hexCharToByte(chars[i+1]);
		            bytes[byteCount] = newByte;
		            byteCount++;
		        }
		        return bytes;
		    }


		    /**
		     * Returns the the byte value of a hexadecmical char (0-f). It's assumed
		     * that the hexidecimal chars are lower case as appropriate.
		     *
		     * @param ch a hexedicmal character (0-f)
		     * @return the byte value of the character (0x00-0x0F)
		     */
		    private static final byte hexCharToByte(char ch) {
		        switch(ch) {
		            case '0': return 0x00;
		            case '1': return 0x01;
		            case '2': return 0x02;
		            case '3': return 0x03;
		            case '4': return 0x04;
		            case '5': return 0x05;
		            case '6': return 0x06;
		            case '7': return 0x07;
		            case '8': return 0x08;
		            case '9': return 0x09;
		            case 'a': return 0x0A;
		            case 'b': return 0x0B;
		            case 'c': return 0x0C;
		            case 'd': return 0x0D;
		            case 'e': return 0x0E;
		            case 'f': return 0x0F;
		        }
		        return 0x00;
		    }
		    

		    //*********************************************************************
		    //* Base64 - a simple base64 encoder and decoder.
		    //*
		    //*     Copyright (c) 1999, Bob Withers - bwit@pobox.com
		    //*
		    //* This code may be freely used for any purpose, either personal
		    //* or commercial, provided the authors copyright notice remains
		    //* intact.
		    //*********************************************************************

		    /**base64加密
		     * Encodes a String as a base64 String.
		     *
		     * @param data a String to encode.
		     * @return a base64 encoded String.
		     */
		    public static String encodeBase64(String data) {
		        return encodeBase64(data.getBytes());
		    }

		    /**
		     * Encodes a byte array into a base64 String.
		     *
		     * @param data a byte array to encode.
		     * @return a base64 encode String.
		     */
		    public static String encodeBase64(byte[] data) {
		        int c;
		        int len = data.length;
		        StringBuffer ret = new StringBuffer(((len / 3) + 1) * 4);
		        for (int i = 0; i < len; ++i) {
		            c = (data[i] >> 2) &amp; 0x3f;
		            ret.append(cvt.charAt(c));
		            c = (data[i] << 4) &amp; 0x3f;
		            if (++i < len)
		                c |= (data[i] >> 4) &amp; 0x0f;

		            ret.append(cvt.charAt(c));
		            if (i < len) {
		                c = (data[i] << 2) &amp; 0x3f;
		                if (++i < len)
		                    c |= (data[i] >> 6) &amp; 0x03;

		                ret.append(cvt.charAt(c));
		            }
		            else {
		                ++i;
		                ret.append((char) fillchar);
		            }

		            if (i < len) {
		                c = data[i] &amp; 0x3f;
		                ret.append(cvt.charAt(c));
		            }
		            else {
		                ret.append((char) fillchar);
		            }
		        }
		        return ret.toString();
		    }

		    /**base64解密
		     * Decodes a base64 String.
		     *
		     * @param data a base64 encoded String to decode.
		     * @return the decoded String.
		     */
		    public static String decodeBase64(String data) {
		        return decodeBase64(data.getBytes());
		    }

		    /**
		     * Decodes a base64 aray of bytes.
		     *
		     * @param data a base64 encode byte array to decode.
		     * @return the decoded String.
		     */
		    public static String decodeBase64(byte[] data) {
		        int c, c1;
		        int len = data.length;
		        StringBuffer ret = new StringBuffer((len * 3) / 4);
		        for (int i = 0; i < len; ++i) {
		            c = cvt.indexOf(data[i]);
		            ++i;
		            c1 = cvt.indexOf(data[i]);
		            c = ((c << 2) | ((c1 >> 4) &amp; 0x3));
		            ret.append((char) c);
		            if (++i < len) {
		                c = data[i];
		                if (fillchar == c)
		                    break;

		                c = cvt.indexOf((char) c);
		                c1 = ((c1 << 4) &amp; 0xf0) | ((c >> 2) &amp; 0xf);
		                ret.append((char) c1);
		            }

		            if (++i < len) {
		                c1 = data[i];
		                if (fillchar == c1)
		                    break;

		                c1 = cvt.indexOf((char) c1);
		                c = ((c << 6) &amp; 0xc0) | c1;
		                ret.append((char) c);
		            }
		        }
		        return ret.toString();
		    }

		    private static final int fillchar = '=';
		    private static final String cvt = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
		                                    + "abcdefghijklmnopqrstuvwxyz"
		                                    + "0123456789+/";
		    
		    
		    
		    /**
		     * 从字符串中取n位字符
		     *
		     * @param string the String to chop.
		     * @param length the index in <code>string</code> to start looking for a
		     *       whitespace boundary at.
		     * @return a substring of <code>string</code> whose length is less than or
		     *       equal to <code>length</code>, and that is chopped at whitespace.
		     */

		     public static final String chopAtWord(String string, int length) {
		         if (string == null) {
		             return string;
		         }

		         char [] charArray = string.toCharArray();
		         int sLength = string.length();
		         if (length < sLength) {
		             sLength = length;
		         }

		         // First check if there is a newline character before length; if so,
		         // chop word there.
		         for (int i=0; i<sLength-1; i++) {
		             // Windows
		             if (charArray[i] == '\r' &amp;&amp; charArray[i+1] == '\n') {
		                 return string.substring(0, i+1);
		             }
		             // Unix
		             else if (charArray[i] == '\n') {
		                 return string.substring(0, i);
		             }
		         }
		         // Also check boundary case of Unix newline
		         if (charArray[sLength-1] == '\n') {
		             return string.substring(0, sLength-1);
		         }

		         // Done checking for newline, now see if the total string is less than
		         // the specified chop point.
		         if (string.length() < length) {
		             return string;
		         }

		         // No newline, so chop at the first whitespace.
		         for (int i = length-1; i > 0; i--) {
		             if (charArray[i] == ' ') {
		                 return string.substring(0, i).trim();
		             }
		         }

		         // Did not find word boundary so return original String chopped at
		         // specified length.
		         return string.substring(0, length);
		     }


		    

		  /**
		   * 测试
		   * @param args
		   */
		  public static void main(String[] args) {
           //实例化工具类
		    charTools1 charTools = new charTools1();

		    String url;
		    //定义一个URL字符串
		    url="http://www.baidu.com/s?bs=indexof+%D3%C3%B7%A8&amp;f=8&amp;wd=java+%D7%D6%B7%FB%B4%A6%C0%ED%B9%A4%BE%DF%C0%E0";
//		    url = "http://www.google.com/search?hl=zh-CN&amp;newwindow=1&amp;q=%E4%B8%AD%E5%9B%BD%E5%A4%A7%E7%99%BE%E7%A7%91%E5%9C%A8%E7%BA%BF%E5%85%A8%E6%96%87%E6%A3%80%E7%B4%A2&amp;btnG=%E6%90%9C%E7%B4%A2&amp;lr=";
		    //调用方法isUtf8Url,进行判断是否Utf8Url编码
		    if(charTools.isUtf8Url(url)){
		    //如果是Utf8Url编码则调用Utf8URLdecode进行解码
		      System.out.println(charTools.Utf8URLdecode(url));
		    }else{
		      System.out.println(URLDecoder.decode(url));
		    }

		    url = "http://www.baidu.com/baidu?word=%D6%D0%B9%FA%B4%F3%B0%D9%BF%C6%D4%DA%CF%DF%C8%AB%CE%C4%BC%EC%CB%F7&amp;tn=myie2dg";
		    if(charTools.isUtf8Url(url)){
		      System.out.println(charTools.Utf8URLdecode(url));
		    }else{
		      System.out.println(URLDecoder.decode(url));
		    }
		    
		    String charT="刘奇庭是不错的人";
		    String ct=charTools1.chopAtWord(charT, 5);
		    System.out.println(ct);


		  }

	}


 
0
0
分享到:
评论

相关推荐

    Java自动识别文件字符编码工具类.rar

    Java自动识别文件字符编码工具类 参考博客 https://blog.csdn.net/superbeyone/article/details/103036914 使用方式: String encode = EncodingDetect.getFileEncode(geoJsonFile); log.info("系统检测到文件[ {}...

    字符编码工具类CharacterEncodingFilter.java

    字符编码工具类CharacterEncodingFilter.java 字符编码工具类CharacterEncodingFilter.java 字符编码工具类CharacterEncodingFilter.java

    Str字符串处理工具类

    1.字符串转换为其他类型 2.字符串首字母转换为大写 3.取得一个随机字符串,包含数字和字符 4.转码 5.替换特殊字符串 6.公式格式化

    javaweb项目常用工具包

    Base64工具类-字符编码工具类-数据类型转换-日期工具类-Escape中文转码工具类-fastjson工具类-文件工具类-Http工具类-http请求工具类-用于模拟HTTP请求中GET/POST方式 -图片处理工具类-Ip工具类-mail工具类-Map工具...

    字符转换工具,用于转换编码之类的

    字符转换工具,用于转换编码之类的字符转换工具,用于转换编码之类的字符转换工具,用于转换编码之类的字符转换工具,用于转换编码之类的字符转换工具,用于转换编码之类的字符转换工具,用于转换编码之类的字符转换...

    字符转码工具

    一个转换字符的工具,实用性强

    Java 所有字符串转UTF-8 万能工具类-GetEncode.java

    不需要关心接受的字符串编码是UTF_8还是GBK,还是ios-8859-1,自动转换为utf-8编码格式,无需判断字符串原有编码,用法://处理编码String newStr = GetEncode.transcode(oldStr);

    字符集编码工具

    支持各种字符集编码,ascii、utf-8、utf-16、GBK、GB2312等等,也支持各种进制之间的转换

    CharsetUtils.java

    * 字符编码工具类 , 主要方法:1)获取传入字符串的编码格式 2)根据新的编码格式生成新字符串 /** 7位ASCII字符,也叫作ISO646-US、Unicode字符集的基本拉丁块 */ US_ASCII("US-ASCII","位ASCII字符,也叫作ISO...

    多功能文件字符集编码转换工具

    NULL 博文链接:https://darkmasky.iteye.com/blog/600056

    base64编码工具类

    base64编码工具类,使用字节数组进行编码,使用十六进制表示的字符串进行编码

    JAVA 转换字符编码工具

    NULL 博文链接:https://sammyfun.iteye.com/blog/1662240

    常用编码工具类对一些常用的编码操作类

    常用编码工具类.对一些常用的编码操作类 快速检测算法 节数组效验 字符串效验 获取唯一特征串,可用于密码加密 获取文件的特征串 解码字符串 编码字符串

    字符编码过滤器

    java过滤器实现统一字符编码 封装好的工具类,可以直接使用

    计算机字符编码Unicode与Windows.pdf

    本书主要表现Unicode编码的平面数、编码方法、码位数、字符字节数、字符区域划分等重要概念 通过UTF32、UTF-16、UTF-8以及ANSI之间的对比...作为工具书 也是计算机编程技术的一部分 还可作为计算机类教材的辅助教才。

    hutool 工具类

    编码工具-16进制工具 编码工具-转义工具 编码工具-Hash工具 编码工具-URL工具 编码工具-Base32-64工具 编码工具-Unicode工具 常用类辅助工具-转换工具 常用类辅助工具-日期工具 常用类辅助工具-字符串工具...

    GBK字符集编码表

    标准的GBK字符集编码表,工具类文档必备

    进行字符操作的工具类

    很好的工具类,实现对各类字符编码的转换,介绍了Unicode编码转换

    java常用工具类

    文件工具类,Http请求工具类,图片处理工具类。Ip工具类。mail工具类,Map工具类,MD5编码工具类,数字工具类,随机数工具类,反射工具类,字符串处理工具类,URL工具类,XML工具类,常用的数据验证工具类

    Java开发常用Util工具类

    字符串工具类/数据类型转换类/集合工具类/数组工具类/Properties文件操作类/常用流操作工具类/编码工具类/Json工具类/日期工具类/下载文件工具类/解压ZIP工具类/文件编码转码

Global site tag (gtag.js) - Google Analytics