zoukankan      html  css  js  c++  java
  • 在R语言中使用Stringr进行字符串操作

    今天来学习下R中字符串处理操作,主要是stringr包中的字符串处理函数的用法。

    先导入stringr包,library(stringr),require(stringr),或者stringr::函数名;这几种方式都行。

    一、检测是否匹配

    我们先定义一个字符串和变量,在此基础上演示各个函数基本用法。

      1 library(stringr)
      2 animal<-c("cow","dog","sheep","goat","pig","monkey","cat","cat")
      3 str1<-"I love cat, cat cat !"
      4 str2<-"lovelovelove"
      5 
      6 str_detect(animal,"cow") #匹配到指定字符串返回True,否则返回False
      7 [1]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
      8 
      9 str_detect(str1,"love")
     10 [1] TRUE
     11 
     12 str_which(animal,"dog") #返回指定字符串位置
     13 [1] 2
     14 
     15 str_which(animal,"cat")
     16 [1] 7 8
     17 
     18 str_which(str2,"love") #连续重复字符只返回第一个
     19 [1] 1
     20 
     21 str_count(animal,"cat") #返回匹配次数
     22 [1] 0 0 0 0 0 0 1 1
     23 
     24 str_count(str1,"cat")
     25 [1] 3
     26 
     27 str_locate(animal,"cat") #返回匹配起始位置
     28      start end
     29 [1,]    NA  NA
     30 [2,]    NA  NA
     31 [3,]    NA  NA
     32 [4,]    NA  NA
     33 [5,]    NA  NA
     34 [6,]    NA  NA
     35 [7,]     1   3
     36 [8,]     1   3
     37 
     38 str_locate(str1,"cat")
     39     start end
     40 [1,]     8  10
     41 
     42 str_locate(str2,"love") #连续重复字符值返回第一个字符起始位置
     43      start end
     44 [1,]     1   4
     45 

    二、子串提取

      1 str_sub(str1,1,3) # 后面两个参数为起始,结束位置
      2 [1] "I l"
      3 
      4 str_sub(str1,1) # 可以只跟起始位置,默认到结束位置
      5 [1] "I love cat, cat cat !"
      6 
      7 str_sub(str1,3)
      8 [1] "love cat, cat cat !"
      9 
     10 str_sub(str1,-5) #位置还可以为负数
     11 [1] "cat !"
     12 
     13 str_sub(str1,-5,-1)
     14 [1] "cat !"
     15 
     16 str_subset(str1,"a") #匹配到指定字符就返回整个字符串
     17 [1] "I love cat, cat cat !"
     18 
     19 str_subset(str1,"x") #匹配不到则返回空
     20 character(0)
     21 
     22 str_extract(str1,"cat") #返回第一个匹配到字符串
     23 [1] "cat"
     24 str_extract(str1,"ca")
     25 [1] "ca"
     26 
     27 str_extract_all(str1,"cat") #返回所有匹配到字符串  列表形式返回
     28 [[1]]
     29 [1] "cat" "cat" "cat"
     30 
     31 str_extract_all(str1,"[aoe]")   #返回所有匹配到字符串  列表形式返回
     32 [[1]]
     33 [1] "o" "e" "a" "a" "a"
     34 
     35 str_match(str1,"cat")  #返回第一个匹配到字符串  矩阵形式返回
     36      [,1]
     37 [1,] "cat"
     38 
     39 str_match_all(str1,"cat") #返回所有匹配到字符串  矩阵形式返回
     40 [[1]]
     41      [,1]
     42 [1,] "cat"
     43 [2,] "cat"
     44 [3,] "cat"
     45 
     46 str_match_all(str2,"love")
     47 [[1]]
     48      [,1]
     49 [1,] "love"
     50 [2,] "love"
     51 [3,] "love"
     52 
     53 str_match(str2,"love")
     54      [,1]
     55 [1,] "love"
     56 
     57 str_match_all(str1,"(I|cat)") #可以多个匹配,不过这个返回结果我没看懂
     58 [[1]]
     59      [,1]  [,2]
     60 [1,] "I"   "I"
     61 [2,] "cat" "cat"
     62 [3,] "cat" "cat"
     63 [4,] "cat" "cat"

    三、字符串长度处理

      1 str_length(str2) # 返回字符串长度
      2 [1] 12
      3 
      4 str_length("good job !") # 空格也算一个字符长度
      5 [1] 10
      6 
      7 str_trunc(str2,4) #指定字符串替换成替他字符,
      8 [1] "l..."
      9 
     10 str_trunc(str2,4,ellipsis = "*") #ellipsis 指定替换符
     11 [1] "lov*"
     12 
     13 str_trunc(str2,width = 8,ellipsis = "#") #width指定长度,此处指前8个字符
     14 [1] "lovelov#"
     15 
     16 str_trunc(str2,width = 8,side = c("left"),ellipsis = "#") # side指定方向(right,center,left)
     17 [1] "#ovelove"
     18 
     19 str_trim("sssss
    ") # 去掉字符串首尾空字符,换行,空格等;字符串内部空字符无法去除
     20 [1] "sssss"
     21 str_trim(" sssss
    ")
     22 [1] "sssss"

    四、字符串替换

      1 str1
      2 [1] "I love cat, cat cat !"
      3 
      4 str_sub(str1,1,6) #提取子串
      5 [1] "I love"
      6 
      7 str_sub(str1,1,6)<-"she love" #子串替换
      8 str1
      9 [1] "she love cat, cat cat !"
     10 
     11 str_sub(animal,1,1)<-"F" #向量替换也可以
     12 animal
     13 [1] "Fow"    "Fog"    "Fheep"  "Foat"   "Fig"    "Fonkey" "Fat"
     14 [8] "Fat"
     15 
     16 str1<-"I love cat, cat cat !"
     17 
     18 str_replace(str1,"cat","dog") #替换第一个匹配项
     19 [1] "I love dog, cat cat !"
     20 
     21 str_replace_all(str1,"cat","dog") # 替换所有匹配项
     22 [1] "I love dog, dog dog !"
     23 
     24 str_to_lower(str1) # 全部转为小写字母
     25 [1] "i love cat, cat cat !"
     26 
     27 str_to_upper(str1) # 全部转为大写字母
     28 [1] "I LOVE CAT, CAT CAT !"
     29 
     30 str_to_title(str1) # 单词首字母转为大写
     31 [1] "I Love Cat, Cat Cat !"
     32 
     33 str_to_title(str2)
     34 [1] "Lovelovelove"
     35 


    五、字符串分割和连接

      1 str_c(str1,str2,sep="+") # 字符串连接
      2 [1] "I love cat, cat cat !+lovelovelove"
      3 
      4 str_c(animal,str2,sep="+") #向量一次连接字符串
      5 [1] "Fow+lovelovelove"    "Fog+lovelovelove"    "Fheep+lovelovelove"
      6 [4] "Foat+lovelovelove"   "Fig+lovelovelove"    "Fonkey+lovelovelove"
      7 [7] "Fat+lovelovelove"    "Fat+lovelovelove"
      8 
      9 str_c(animal,sep="",collapse = "+") # 向量字符串连接
     10 [1] "Fow+Fog+Fheep+Foat+Fig+Fonkey+Fat+Fat"
     11 
     12 str_dup(str1,2) #字符串重复,数字代表次数
     13 [1] "I love cat, cat cat !I love cat, cat cat !"
     14 str_dup(str2,3)
     15 [1] "lovelovelovelovelovelovelovelovelove"
     16 
     17 str_split_fixed(animal,"",n=2) #分割字符串,分隔符,n=分割份数,返回矩阵
     18      [,1] [,2]
     19 [1,] "F"  "ow"
     20 [2,] "F"  "og"
     21 [3,] "F"  "heep"
     22 [4,] "F"  "oat"
     23 [5,] "F"  "ig"
     24 [6,] "F"  "onkey"
     25 [7,] "F"  "at"
     26 [8,] "F"  "at"
     27 
     28 str_split_fixed(str2,"",n=4)
     29      [,1] [,2] [,3] [,4]
     30 [1,] "l"  "o"  "v"  "elovelove"
     31 
     32 str_split(str2,"",4) #  #分割字符串,分隔符,n=分割份数,返回列表
     33 [[1]]
     34 [1] "l"         "o"         "v"         "elovelove"
     35 
     36 str_glue("pi is {str1}") # 字符串连接变量,{}花括号内是系统变量
     37 pi is I love cat, cat cat !
     38 
     39 str_glue("pi is {pi}")
     40 pi is 3.14159265358979
     41 
     42 str_glue("log2(8) is {log2(8)}")
     43 log2(8) is 3
     44 
     45 str_glue_data(mtcars, "{rownames(mtcars)} has {hp} hp") #数据框或列表对应行连接字符串
     46 Mazda RX4 has 110 hp
     47 Mazda RX4 Wag has 110 hp
     48 Datsun 710 has 93 hp
     49 Hornet 4 Drive has 110 hp
     50 Hornet Sportabout has 175 hp
     51 Valiant has 105 hp
     52 
     53  str_glue_data(mtcars, "{rownames(mtcars)} has {hp*1000} hp") # 话可以做相应计算
     54 Mazda RX4 has 110000 hp
     55 Mazda RX4 Wag has 110000 hp
     56 Datsun 710 has 93000 hp
     57 Hornet 4 Drive has 110000 hp
     58 
     59 str_glue_data(mtcars, "{rownames(mtcars)} has {substr(wt,1,2)} wt") # 子串分割
     60 Mazda RX4 has 2. wt
     61 Mazda RX4 Wag has 2. wt
     62 Datsun 710 has 2. wt
     63 Hornet 4 Drive has 3. wt


    六、字符串排序

      1 str2
      2 [1] "lovelovelove"
      3 str_order(str2,decreasing = T) # 返回字符串下标
      4 [1] 1
      5 
      6 animal
      7 [1] "Fow"    "Fog"    "Fheep"  "Foat"   "Fig"    "Fonkey" "Fat"
      8 [8] "Fat"
      9 animal[str_order(animal,decreasing = T)]
     10 [1] "Fow"    "Fonkey" "Fog"    "Foat"   "Fig"    "Fheep"  "Fat"
     11 [8] "Fat"
     12 
     13 animal
     14 [1] "Fow"    "Fog"    "Fheep"  "Foat"   "Fig"    "Fonkey" "Fat"
     15 [8] "Fat"
     16 str_sort(animal) #直接对向量字符串排序
     17 [1] "Fat"    "Fat"    "Fheep"  "Fig"    "Foat"   "Fog"    "Fonkey"
     18 [8] "Fow"
     19 
    
    本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接,否则保留追究法律责任的权利.
  • 相关阅读:
    sql常用语句
    java学习(东软睿道)2019-09-06(预课)《随堂笔记》
    Servlet和JSP学习总结
    由字符集的转换想到的问题
    mysql主从搭建
    CentOS 源码安装MySQL5.7
    Linux搭建FTP服务器
    连接MySQL报错误代码 ERROR 1045时的解决方案
    [js]使用百度编辑器uediter时遇到的一些问题(span,div等被过滤)
    [css]将textarea前的文字设置在左上角
  • 原文地址:https://www.cnblogs.com/mmtinfo/p/11975120.html
Copyright © 2011-2022 走看看