zoukankan      html  css  js  c++  java
  • Java源码学习(JDK 11)——java.lang.String

    定义

    package java.lang;
    
    public final class String implements java.io.Serializable, Comparable<String>, CharSequence {
    	// ...
    }
    
    • final 类,不能被继承
    • 实现 Serializable 接口,可序列化
    • 实现 Comparable 接口,可比较大小
    • 实现 CharSequence 接口,StringBuffer和Stringbuilder同样实现该接口

    属性

    @Stable						// never null
    private final byte[] value;	// JDK 11 内部用byte数组储存值 
    
    private final byte coder;	// 编码 LATIN1 或 UTF16 
    
    static final boolean COMPACT_STRINGS;	// 字符串压缩
    
    static {
    	COMPACT_STRINGS = true;	
    }
    
    @Native static final byte LATIN1 = 0;
    @Native static final byte UTF16  = 1;
    
    private int hash; // 将hashcode缓存起来
    
    public static final Comparator<String> CASE_INSENSITIVE_ORDER = new CaseInsensitiveComparator();    // 内部类
    // ...
    
    • hash:缓存hashcode,String经常被比较,将hashcode缓存,提高效率。
    • value:JDK 8及以前,value用char数组存储,然而很多时候,字符只需要1个字节来表示。因此从JDK 9以后,value使用byte数组存储,并添加了coder,COMPACT_STRINGS字段,帮助压缩字符串存储空间。
    • coder:LATIN1表示1个字符占用1个byte;UTF16表示1个字符占用2个byte。
    • COMPACT_STRINGS:默认值为true。当值为false时,字符串必然以UTF16的形式存储。
      因此,当COMPACT_STRINGS=true并且每个字符都可用1个字节表示时,coder=LATIN1;否则coder=UTF16

    内部类

    private static class CaseInsensitiveComparator implements Comparator<String>, java.io.Serializable {
            // use serialVersionUID from JDK 1.2.2 for interoperability
        private static final long serialVersionUID = 8575799808933029326L;
    
        public int compare(String s1, String s2) {
            byte v1[] = s1.value;
            byte v2[] = s2.value;
            if (s1.coder() == s2.coder()) {
                return s1.isLatin1() ? StringLatin1.compareToCI(v1, v2)
                : StringUTF16.compareToCI(v1, v2);
            }
            return s1.isLatin1() ? StringLatin1.compareToCI_UTF16(v1, v2)
            : StringUTF16.compareToCI_Latin1(v1, v2);
        }
    
        /** Replaces the de-serialized object. */
        private Object readResolve() { return CASE_INSENSITIVE_ORDER; }
    }
    

    实现忽略大小写的字符串比较。
    compareToIgnoreCase方法利用该内部类的方法实现。

    构造方法

    • null
    public String() {
    	this.value = "".value;
    	this.coder = "".coder;
    }
    
    • char[]
    public String(char value[]) {
    	this(value, 0, value.length, null);
    }
    
    public String(char value[], int offset, int count) {
    	this(value, offset, count, rangeCheck(value, offset, count));
    }
    
    private static Void rangeCheck(char[] value, int offset, int count) {
    	checkBoundsOffCount(offset, count, value.length);	// 静态方法 如果数组越界会抛出StringIndexOutOfBoundsException
    	return null;
    }
    
    String(char[] value, int off, int len, Void sig) {	// sig与public方法区别开
    	if (len == 0) {
    		this.value = "".value;
    		this.coder = "".coder;
    		return;
    	}
    	if (COMPACT_STRINGS) {
    		byte[] val = StringUTF16.compress(value, off, len);
    		if (val != null) {
    			this.value = val;
    			this.coder = LATIN1;
    			return;
    		}
    	}
    	this.coder = UTF16;
    	this.value = StringUTF16.toBytes(value, off, len);
    }
    
    // StringUTF16.compress
    public static byte[] compress(char[] val, int off, int len) {
    	byte[] ret = new byte[len];
        if (compress(val, off, ret, 0, len) == len) {	// 压缩失败会返回0
        	return ret;	// LATIN1 编码
        }
        return null;
    }
    
    // StringUTF16.compress
    // compressedCopy char[] -> byte[]
    @HotSpotIntrinsicCandidate
    public static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
        for (int i = 0; i < len; i++) {
            char c = src[srcOff];
            if (c > 0xFF) {
                len = 0;
                break;
            }
            dst[dstOff] = (byte)c;	// char截断为byte
            srcOff++;
            dstOff++;
        }
        return len;
    }
    
    • byte[]
    // 与char[]类似 多了字符集的解码
    // 参数可以是String类型的charsetName 也可以是CharSet类型
    public String(byte bytes[], int offset, int length, String charsetName)
    throws UnsupportedEncodingException {
    	if (charsetName == null)
    		throw new NullPointerException("charsetName");
    	checkBoundsOffCount(offset, length, bytes.length);
    	StringCoding.Result ret = StringCoding.decode(charsetName, bytes, offset, length);
    	this.value = ret.value;
    	this.coder = ret.coder;
    }
    
    • StringBuffer/StringBuilder
    // 内容复制 StringBuffer/StringBuilder修改不影响String
    public String(StringBuffer buffer) {
    	this(buffer.toString());
    }
    public String(StringBuilder builder) {
    	this(builder, null);
    }
    

    方法

    • length:返回长度
    // UTF16编码的需要将长度/2
    public int length() {
    	return value.length >> coder();		
    }
    byte coder() {
    	// UTF16 = 1; LATIN1 = 0
    	return COMPACT_STRINGS ? coder : UTF16;	
    }
    
    • isEmpty:长度是否为0
    • charAt:某位置上的字符
    • getChars:获取char数组
    • getBytes:获取byte数组
    • equals:字符串相等
    public boolean equals(Object anObject) {
        if (this == anObject) {
            return true;
        }
        if (anObject instanceof String) {
            String aString = (String)anObject;
            // 编码不同的字符串不可能相同 因为内容相同的字符串总是以相同编码存储
            if (coder() == aString.coder()) {
                return isLatin1() ? StringLatin1.equals(value, aString.value)
                                  : StringUTF16.equals(value, aString.value);
            }
        }
        return false;
    }
    
    • contentEquals:内容相同
    public boolean contentEquals(CharSequence cs) {
        // Argument is a StringBuffer, StringBuilder
        if (cs instanceof AbstractStringBuilder) {
            if (cs instanceof StringBuffer) {
            	// 因为 StringBuffer 线程安全 所以加上synchronized
                synchronized(cs) {
                   return nonSyncContentEquals((AbstractStringBuilder)cs);
                }
            } else {
                return nonSyncContentEquals((AbstractStringBuilder)cs);
            }
        }
        // Argument is a String
        if (cs instanceof String) {
            return equals(cs);
        }
        // Argument is a generic CharSequence
        int n = cs.length();
        if (n != length()) {
            return false;
        }
        byte[] val = this.value;
        if (isLatin1()) {
            for (int i = 0; i < n; i++) {
                if ((val[i] & 0xff) != cs.charAt(i)) {
                    return false;
                }
            }
        } else {
            if (!StringUTF16.contentEquals(val, cs, n)) {
                return false;
            }
        }
        return true;
    }
    
    • equalsIgnoreCase:忽略大小写字符串相同
    • compareTo:字符串比较,按字典序
    • compareToIgnoreCase:忽略大小写的字符串比较
    • regionMatches:字符串范围内相等
    • startsWith:是否以字符串开头
    • endsWith:是否以字符串结尾
    public boolean endsWith(String suffix) {
        return startsWith(suffix, length() - suffix.length());
    }
    
    • indexOf:返回第一次出现的下标,未出现返回-1
    // StringLatin1.indexOf
    public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) {
        byte first = str[0];
        int max = (valueCount - strCount);
        for (int i = fromIndex; i <= max; i++) {
            // Look for first character.
            if (value[i] != first) {
                while (++i <= max && value[i] != first);
            }
            // Found first character, now look at the rest of value
            if (i <= max) {
                int j = i + 1;
                int end = j + strCount - 1;
                for (int k = 1; j < end && value[j] == str[k]; j++, k++);
                    if (j == end) {
                        // Found whole string.
                        return i;
                    }
                }
            }
            return -1;
        }
    
    • lastIndexOf:返回最后一次出现的下标,未出现返回-1
    • substring:子串
    • subSequence:子CharSequence
    public CharSequence subSequence(int beginIndex, int endIndex) {
        return this.substring(beginIndex, endIndex);
    }
    
    • concat:字符串拼接
    public String concat(String str) {
    	if (str.isEmpty()) {
    		return this;
    	}
        // 编码相同 数组连接起来构造新String
    	if (coder() == str.coder()) {
    		byte[] val = this.value;
    		byte[] oval = str.value;
    		int len = val.length + oval.length;
    		byte[] buf = Arrays.copyOf(val, len);
    		System.arraycopy(oval, 0, buf, val.length, oval.length);
    		return new String(buf, coder);
    	}
        // 编码不同 统一转成UTF16
    	int len = length();
    	int olen = str.length();
    	byte[] buf = StringUTF16.newBytesFor(len + olen);
    	getBytes(buf, 0, UTF16);
    	str.getBytes(buf, len, UTF16);
    	return new String(buf, UTF16);
    }
    
    • replace:字符(串)替换,替换所有出现
    • matches:正则匹配
    • contains:包含
    public boolean contains(CharSequence s) {
    	return indexOf(s.toString()) >= 0;
    }
    
    • replaceFirst:字符串替换,替换第一次出现
    • replaceAll:字符串正则替换
    • split:字符串分割,可添加限制数量
    • join:静态方法,将元素用delimiter连接起来,元素可以是CharSequence,或是迭代器中的元素
    public static String join(CharSequence, CharSequence...);
    public static String join(CharSequence, Iterable<? extends CharSequence>)
    
    System.out.println(String.join(",", "ab", "c"));
    System.out.println(String.join(",", Arrays.asList("ab", "c")));
    // output:ab,c
    
    • toLowerCase:转为小写
    • toUpperCase:转为大写
    • trim:去掉开头结尾的所有空白字符(无法删掉unicode空白字符)
    public static String trim(byte[] value) {
    	int len = value.length;
    	int st = 0;
    	while ((st < len) && ((value[st] & 0xff) <= ' ')) {
    		st++;
    	}
    	while ((st < len) && ((value[len - 1] & 0xff) <= ' ')) {
    		len--;
    	}
    	return ((st > 0) || (len < value.length)) ?
    	newString(value, st, len - st) : null;
    }
    
    • strip:去掉开头结尾的所有空白字符
    • stripLeading:去掉开头空白
    • stripTrailing:去掉结尾空白
    • isBlank:是否只含有空白字符
    • lines:返回Stream
    System.out.println("1
    2
    3
    ".lines().count());
    // output:3
    
    • toCharArray:返回char数组
    • format:静态方法,字符串格式化
    • valueOf:静态方法,转化为字符串
    public static String valueOf(Object obj) {
        return (obj == null) ? "null" : obj.toString();
    }
    
    • copyValueOf:静态方法,将char[]复制为字符串
    • intern:JDK7之后,可理解为:将首次遇到的字符串加载到常量池中,并返回常量池中的引用
      • 常量池中有该字符串的引用,则返回常量池中的引用
      • 常量池中没有字符串的引用,则将字符串加载到常量池中,并返回该字符串对象的引用
    • repeat:重复字符串
    public String repeat(int count) {
        if (count < 0) {
            throw new IllegalArgumentException("count is negative: " + count);
        }
        if (count == 1) {
            return this;
        }
        final int len = value.length;
        if (len == 0 || count == 0) {
            return "";
        }
        if (len == 1) {
            final byte[] single = new byte[count];
            Arrays.fill(single, value[0]);
            return new String(single, coder);
        }
        // 长度超出Integer.MAX_VALUE 会抛出异常
        if (Integer.MAX_VALUE / count < len) {
            throw new OutOfMemoryError("Repeating " + len + " bytes String " + count +
                    " times will produce a String exceeding maximum size.");
        }
        final int limit = len * count;
        final byte[] multiple = new byte[limit];
        System.arraycopy(value, 0, multiple, 0, len);
        int copied = len;
        for (; copied < limit - copied; copied <<= 1) {
            System.arraycopy(multiple, 0, multiple, copied, copied);
        }
        System.arraycopy(multiple, 0, multiple, copied, limit - copied);
        return new String(multiple, coder);
    }
    

    注意事项

    1. 内存分配
    • String s = "abc";
      • 当常量池中不存在"abc"这个字符串的引用,在堆内存中new一个新的String对象,将这个对象的引用加入常量池。
      • 当常量池中存在"abc"这个字符串的引用,s指向这个引用;
    • String s = new String("abc"):在堆上new一个对象
    • String s = a + b:在堆上new一个对象
    • String s = "a" + "b":相当于Stirng s = "ab"
    2. equals

    推荐"常量字符串".equals(str)而不是str.equals("常量字符串")
    若str为null,则后者会报异常,而前者是安全的。

  • 相关阅读:
    Notes of Daily Scrum Meeting(12.18)
    Notes of Daily Scrum Meeting(12.17)
    Notes of Daily Scrum Meeting(12.16)
    Notes of Daily Scrum Meeting(12.8)
    Notes of Daily Scrum Meeting(12.5)
    Notes of Daily Scrum Meeting(12.3)
    Notes of Daily Scrum Meeting(11.12)
    Linux中profile、bashrc、bash_profile之间的区别和联系
    Linux GCC编译
    mysql 5.7.16 远程连接
  • 原文地址:https://www.cnblogs.com/JL916/p/12435349.html
Copyright © 2011-2022 走看看