一、背景
1.今天主要介绍的是String类中的split()方法,主要从源码的角度去分析这个方法底层到底做了些什么,下面我们就开始吧。
2.其实看源码也能学到很多东西的,希望坚持下去。
二、源码解析split(String regex)
1.首先我们写一个str1.split(",")其中str1定义为"i,am,a,student",下面我们就开始跳进我们的源码,开始分析。
1.第一阶段
public String[] split(String regex) {
//默认limit为0
return split(regex, 0);
}
1.2.第二阶段源码
public String[] split(String regex, int limit) {
/* fastpath if the regex is a
(1)one-char String and this character is not one of the
RegEx's meta characters ".$|()[{^?*+\\", or
(2)two-char String and the first char is the backslash and
the second is not the ascii digit or ascii letter.
*/
char ch = 0;//regex字符的ascll码值
if (((regex.value.length == 1 &&
".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) ||
(regex.length() == 2 &&
regex.charAt(0) == '\\' &&
(((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 &&
((ch-'a')|('z'-ch)) < 0 &&
((ch-'A')|('Z'-ch)) < 0)) &&
(ch < Character.MIN_HIGH_SURROGATE ||
ch > Character.MAX_LOW_SURROGATE))
{
int off = 0;
int next = 0;
boolean limited = limit > 0;
//存放按照指定字符分割完成的值
ArrayList<String> list = new ArrayList<>();
while ((next = indexOf(ch, off)) != -1) {//参考下面第三阶段的源码
if (!limited || list.size() < limit - 1) {
list.add(substring(off, next));
off = next + 1;
} else { // last one
//assert (list.size() == limit - 1);
list.add(substring(off, value.length));
off = value.length;
break;
}
}
// If no match was found, return this
if (off == 0)
return new String[]{this};
// Add remaining segment,添加最后一个遗留的参数
if (!limited || list.size() < limit)
list.add(substring(off, value.length));
// Construct result,返回结果,下面在继续判断limit的值为0或者不为0
int resultSize = list.size();
if (limit == 0) {
while (resultSize > 0 && list.get(resultSize - 1).length() == 0) {
resultSize--;
}
}
String[] result = new String[resultSize];
return list.subList(0, resultSize).toArray(result);
}
return Pattern.compile(regex).split(this, limit);
}
1.3.第三阶段
public int indexOf(int ch, int fromIndex) {//当匹配不上就返回了-1
final int max = value.length;
if (fromIndex < 0) {
fromIndex = 0;
} else if (fromIndex >= max) {
// Note: fromIndex might be near -1>>>1.
return -1;
}
if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
// handle most cases here (ch is a BMP code point or a
// negative value (invalid code point))
final char[] value = this.value;
for (int i = fromIndex; i < max; i++) {
//其实返回的是分隔符第一次出现的下标值
if (value[i] == ch) {
return i;
}
}
return -1;
} else {
return indexOfSupplementary(ch, fromIndex);
}
}
结果:
第1个字符串为[i]
第2个字符串为[am]
第3个字符串为[a]
第4个字符串为[student]
字符串共有:4个
三、源码解析String中的join()方法
1.首先我们写一个str1.join()其中str1定义为一个集合,下面我们就开始跳进我们的源码,开始分析。
public static String join(CharSequence delimiter,
Iterable<? extends CharSequence> elements) {
Objects.requireNonNull(delimiter);
Objects.requireNonNull(elements);
StringJoiner joiner = new StringJoiner(delimiter);
for (CharSequence cs: elements) {
joiner.add(cs);
}
return joiner.toString();
}
2.joiner.add(cs)
public StringJoiner add(CharSequence newElement) {
prepareBuilder().append(newElement);//类似java的nio,底层进行拼接的
return this;
}
四、源码解析String中的toLowerCase()方法
public String toLowerCase(Locale locale) {
//若locale参数为null,则直接抛出NullPointerException异常
if (locale == null) {
throw new NullPointerException();
}
int firstUpper; //默认为0
final int len = value.length;//要转化的字符串
//先扫描出前面本身已经是小写形式的字符,其实就是第一个参数
scan: {
for (firstUpper = 0 ; firstUpper < len; ) {
char c = value[firstUpper];
//若当前字符在High Surrogate的字符范围内
if ((c >= Character.MIN_HIGH_SURROGATE)
&& (c <= Character.MAX_HIGH_SURROGATE)) {
int supplChar = codePointAt(firstUpper);
if (supplChar != Character.toLowerCase(supplChar)) {
break scan;
}
//通过Character.charCount计算实际字符的个数
firstUpper += Character.charCount(supplChar);
} else {
//就是通过Character.toLowerCase(c)进行转化为小写的
if (c != Character.toLowerCase(c)) {
//如果条件不满足就直接跳出扫描了
break scan;
}
firstUpper++;
}
}
return this;
}
char[] result = new char[len];
int resultOffset = 0;
/* 复制第一个小写字符 */
System.arraycopy(value, 0, result, 0, firstUpper);
String lang = locale.getLanguage();
boolean localeDependent =
(lang == "tr" || lang == "az" || lang == "lt");
char[] lowerCharArray;
int lowerChar;
int srcChar;
int srcCount;
//从firstUpper索引位置开始,后面的字符都是需要进行小写处理的
for (int i = firstUpper; i < len; i += srcCount) {
srcChar = (int)value[i];
//若当前字符是HIGH SURROGATE
if ((char)srcChar >= Character.MIN_HIGH_SURROGATE
&& (char)srcChar <= Character.MAX_HIGH_SURROGATE) {
//获取实际的Unicode代码点
srcChar = codePointAt(i);
//计算实际字符长度
srcCount = Character.charCount(srcChar);
} else {
srcCount = 1;
}
//考虑特殊情况
if (localeDependent ||
srcChar == '\u03A3' || // 希腊大写字母σ
srcChar == '\u0130') { // 拉丁大写字母I
lowerChar = ConditionalSpecialCasing.toLowerCaseEx(this, i, locale);
} else {
//一般情况,直接Character.toLowerCase()方式转换成小写
lowerChar = Character.toLowerCase(srcChar);
}
//若转换后得到的是错误字符,或者是一个Unicode补充代码点
if ((lowerChar == Character.ERROR)
|| (lowerChar >= Character.MIN_SUPPLEMENTARY_CODE_POINT)) {
if (lowerChar == Character.ERROR) {
lowerCharArray =
ConditionalSpecialCasing.toLowerCaseCharArray(this, i, locale);
} else if (srcCount == 2) {
resultOffset += Character.toChars(lowerChar, result, i + resultOffset) - srcCount;
continue;
} else {
lowerCharArray = Character.toChars(lowerChar);
}
//得到最终小写字符数组的长度
int mapLen = lowerCharArray.length;
//如果大于原字符串长度
if (mapLen > srcCount) {
//小写字符数组扩容
char[] result2 = new char[result.length + mapLen - srcCount];
//result --> result2
System.arraycopy(result, 0, result2, 0, i + resultOffset);
result = result2;
}
for (int x = 0; x < mapLen; ++x) {
result[i + resultOffset + x] = lowerCharArray[x];
}
resultOffset += (mapLen - srcCount);
} else {
//代表每一个字符result[i+resultOffset],就是已经转化过的
result[i + resultOffset] = (char)lowerChar;
}
}
return new String(result, 0, len + resultOffset);
}
五、结束
今天就看到这吧,String类中的源码基本上分析完了,天道酬勤!!!共勉