zoukankan      html  css  js  c++  java
  • Solr5 DataImport 处理1对多关系

    “问题” 和“派发处理” 是2个对象。 每个问题可以被派发N次, 最后结束问题。

    实际查询中数据量可能很大,同时即可以从问题查派发部门, 也可以从派发情况查询问题(如本部门处理的问题)

    使用的技巧就是:增加doc_id, doc_type字段。 实现多Entity查询。

    schema.xml

      1 <?xml version="1.0" encoding="UTF-8" ?>
      2 <schema name="uum" version="1.2">
      3   <types>
      4     <fieldType name="boolean" class="solr.BoolField"/>
      5     <fieldType name="date" class="solr.TrieDateField"/>
      6     <fieldType name="float" class="solr.TrieFloatField"/>
      7     <fieldType name="int" class="solr.TrieIntField"/>
      8     <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
      9     <fieldType name="string" class="solr.StrField"/>
     10     <fieldType name="url" class="solr.StrField" indexed="false" stored="true" />
     11 
     12     <fieldType name="simpletext" 
     13                class="solr.TextField" 
     14                positionIncrementGap="100">
     15       <analyzer>
     16         <tokenizer class="solr.StandardTokenizerFactory"/>
     17         <filter class="solr.LowerCaseFilterFactory"/>
     18       </analyzer>
     19     </fieldType>
     20 
     21     <fieldType name="ignored" class="solr.StrField" 
     22                indexed="false" stored="false" />
     23                
     24    <fieldType name="date_l" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
     25                
     26   </types>
     27     
     28   <fields>
     29 
     30     <!--
     31        FIELDS THAT ARE IN DOCS OF MULTIPLE TYPES
     32       -->
     33 
     34     <!-- this will be our uniqueKey, so it has to be distinct across
     35          all types of documents
     36       -->
     37     <field name="doc_id" type="string" />
     38 
     39     <!-- the type (or domain) of our document -->
     40     <field name="doc_type" type="string" />
     41 
     42     <!-- external URLs -->
     43     <dynamicField name="*_url" type="url" multiValued="false" />
     44     <dynamicField name="*_urls" type="url" multiValued="true"/>
     45 
     46     <!-- dates -->
     47     <dynamicField name="*_dt" type="date" />
     48 
     49     <!-- numeric values that might come in hand for relevancy biasing
     50          (they all relate to popularity)
     51       -->
     52     <dynamicField name="*_count" type="int" multiValued="false"/>
     53 
     54     <field name="_version_" type="long" indexed="true" stored="true"/>
     55     <field name="_root_" type="string" indexed="true" stored="false"/>
     56     <!-- Field used by Suggester for autocompletion -->
     57     <field name="autocomplete" 
     58            type="simpletext" 
     59            stored="false" 
     60            multiValued="true" />
     61 
     62     <!-- quick search field -->
     63     <field name="catchall" 
     64            type="simpletext" 
     65            stored="false"
     66            omitNorms="true"
     67            multiValued="true" />
     68     <field name="ID" type="string" multiValued="false"/>
     69     
     70     <!-- 
     71        PETITION
     72       -->
     73     <field name="TenantId"  type="string" multiValued="false"/>
     74     <field name="PetitionId"  type="string" multiValued="false"/>
     75     <field name="PetitionNumber"  type="string" multiValued="false"/>
     76     <field name="Title"  type="simpletext" multiValued="false"/>
     77     <field name="Content"  type="simpletext" multiValued="false"/>
     78     <field name="Tel"  type="string" multiValued="false"/>
     79     <field name="EventAddress"  type="simpletext" multiValued="false"/>
     80     <field name="DutyGridName"  type="string" multiValued="false"/>
     81     <field name="ComplaintType"  type="string" multiValued="false"/>
     82     <field name="IsVoid"  type="boolean" multiValued="false"/>
     83     <field name="IsEnd"  type="boolean" multiValued="false"/>
     84     <field name="GridAddress"  type="simpletext" multiValued="false"/>
     85     <field name="CategoryName"  type="string" multiValued="false"/>
     86     <field name="Category"  type="string" multiValued="false"/>
     87     
     88     <field name="Status"  type="string" multiValued="false"/>
     89     <field name="RegisterOn"  type="date" multiValued="false"/>
     90     <field name="DeadLine"  type="date" multiValued="false"/>
     91     <field name="ReportOn"  type="date" multiValued="false"/>
     92     <field name="EndCaseOn"  type="date" multiValued="false"/>
     93     <field name="CreatedBy"  type="string" multiValued="false"/>
     94     <field name="SourceWay"  type="string" multiValued="false"/>
     95     <field name="ISWGXTSB"  type="string" multiValued="false"/>
     96     <field name="RegisterOffice"  type="string" multiValued="false"/>
     97     <field name="EventLevel"  type="string" multiValued="false"/>
     98     <field name="ImportantLevel"  type="string" multiValued="false"/>
     99     
    100         <!--
    101                 PETITION/DISPATCH
    102         -->
    103           <field name="DispatchOffices" type="string" multiValued="true"/>
    104           <!--<field name="DispatchOfficeNames" />-->
    105           <field name="ReceiveOffices"  type="string" multiValued="true"/>
    106           <field name="ReceiveOfficeNames"  type="string" multiValued="true"/>
    107           
    108         
    109         <!--
    110                 PETITION/PARTICIPANT
    111         -->
    112           <field name="OrgUnits" type="string" multiValued="true"/>
    113           <field name="Participants"  type="string" multiValued="true"/>
    114 
    115     <!-- 
    116        DISPATCH
    117       -->
    118 
    119       <field name="Dispatcher" type="string" multiValued="false"/>
    120       <field name="DispatchOn" type="date" multiValued="false"/>
    121       <field name="DispatchOffice" type="string" multiValued="false"/>
    122       <field name="DispatchOfficeName" type="string" multiValued="false"/>
    123       <field name="ReceiveOffice" type="string" multiValued="false"/>
    124       <field name="ReceiveOfficeName" type="string" multiValued="false"/>
    125       <field name="StartOn"  type="date" multiValued="false"/>
    126       <field name="DealWay"  type="string" multiValued="false"/>
    127       <field name="FeedBackType"  type="string" multiValued="false"/>
    128       <field name="FeedBackPeople"  type="string" multiValued="false"/>
    129       <field name="FeedBackOn"   type="date" multiValued="false"/>
    130       <field name="FeedBackMsg"   type="simpletext" multiValued="false"/>
    131       <field name="NoPublicOpinion"   type="simpletext" multiValued="false"/>
    132       <field name="IsPublic"  type="boolean" multiValued="false"/>
    133       <field name="IsAlreadyReply"  type="boolean" multiValued="false"/>
    134       <field name="IsAlreadyContact"  type="boolean" multiValued="false"/>
    135 
    136   </fields>
    137 
    138 
    139   <!-- copy author names and title titles to a field to autocomplete
    140   <copyField source="canonical_name" dest="autocomplete"/>
    141   <copyField source="title" dest="autocomplete"/> -->
    142   
    143   <!-- copy everything into one big field for easy searching -->
    144   <copyField source="PetitionNumber" dest="catchall"/>
    145   <copyField source="Title" dest="catchall"/>
    146   <copyField source="Content" dest="catchall"/>
    147   <copyField source="Tel" dest="catchall"/>
    148   <copyField source="EventAddress" dest="catchall"/>
    149 
    150   <!-- A unique Key field isn't neccessary, but it's the only way Solr -->
    151   <!-- can automaticly replace docs when they change -->
    152   <!-- DataImportHandler is also very unhappy if you don't have one -->
    153   <uniqueKey>doc_id</uniqueKey>
    154 
    155   <!-- It's a *very* good idea to have a default search field -->
    156   <defaultSearchField>catchall</defaultSearchField>
    157 
    158 </schema>
    schema.xml

    db-data-config.xml

      1 <dataConfig>
      2     <dataSource type="JdbcDataSource"
      3                 driver="oracle.jdbc.driver.OracleDriver"
      4                 url="jdbc:oracle:thin:@192.168.0.0:1521:test" 
      5                 user="user" 
      6                 password="pwd"
      7                 />
      8     <document>
      9         <entity name="petition" 
     10                 pk="ID"
     11                 transformer="TemplateTransformer"
     12                 query="select * from VW_HIS_EventInfo "
     13                 deltaImportQuery="select t.* from VW_HIS_EventInfo t where t.id='${dataimporter.delta.ID}'"
     14                 deltaQuery="select t.* from VW_HIS_EventInfo t where  
     15                 RegistorOn> NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')
     16                 or ReportOn> NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')
     17                 or endcaseon> NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT') ">
     18                 >
     19           <field  column="doc_id" template="PE_${petition.ID}" />
     20           <field  column="doc_type"  template="PE" />
     21 
     22           <!--<field  column="LATLON" name="LatLon_p"/>-->
     23           <field  column="TENANTID" name="TenantId" />
     24           <field  column="ID" name="PetitionId" />
     25           <field  column="PETITIONNUMBER" name="PetitionNumber" />
     26           <field  column="TITLE" name="Title" />
     27           <field  column="CONTENT" name="Content" />
     28           <field  column="TEL" name="Tel" />
     29           <field  column="EVENTADDRESS" name="EventAddress" />
     30           <!--<field  column="AREANAME" name="AreaName" />-->
     31           <field  column="DUTYGRIDNAME" name="DutyGridName" />
     32           <field  column="GRIDADDRESS" name="GridAddress" />
     33           <field  column="COMPLAINTQUALITYNAME" name="ComplaintType" />
     34           <field  column="ISVOID" name="IsVoid" />
     35           <field  column="ISEND" name="IsEnd" />
     36           <field  column="CATEGORYNAME" name="CategoryName" />
     37           <field  column="CATEGORYCODE" name="Category" />
     38           <field  column="STATUS" name="Status" />
     39           <field  column="REGISTORON" name="RegisterOn" />
     40           <field  column="DEADLINE" name="DeadLine" />
     41           <field  column="CREATEDBY" name="CreatedBy" />
     42           <field  column="REPORTON" name="ReportOn" />
     43           <field  column="SOURCEWAY" name="SourceWay" />
     44           <field  column="ISWGXTSB" name="ISWGXTSB" />
     45           <field  column="REGISTOROFFICE" name="RegisterOffice" />
     46           <!--<field  column="TOOFFICENAME" name="ToOfficeName" />-->
     47           <field  column="EVENTLEVEL" name="EventLevel" />
     48           <field  column="IMPORTANTLEVEL" name="ImportantLevel" />
     49           <field  column="ENDCASEON" name="EndCaseOn" />
     50           <!--<field  column="ENDOPINION" name="EndOpinion" />-->
     51           
     52             <entity name="petition_dispatch"
     53                   pk="ID"
     54                   query="select * from VW_HIS_DispatchInfo where petitionid='${petition.ID}'"
     55                   deltaQuery="select ID from VW_HIS_DispatchInfo where 
     56                             DISPATCHON > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')
     57                         or  FEEDBACKON > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')"
     58                   parentDeltaQuery="select ID from VW_HIS_EventInfo where ID='${petition_dispatch.PETITIONID}'">
     59                   <!--<field  column="DISPATCHER" name="Dispatchers" />-->
     60                  
     61                   <field  column="DISPATCHOFFICE" name="DispatchOffices" />
     62                   <!--<field  column="DISPATCHOFFICENAME" name="DispatchOfficeNames" />-->
     63                   <field  column="RECEIVEOFFICE" name="ReceiveOffices" />
     64                   <field  column="RECEIVEOFFICENAME" name="ReceiveOfficeNames" />
     65             </entity>
     66             
     67             <entity name="petiton_participant"
     68                   pk="PARTICIPANT"
     69                   query="select distinct ORGUNIT,PARTICIPANT from vw_his_participant where petitionid='${petition.ID}'"
     70                   deltaQuery="select PARTICIPANT from vw_his_participant where HandleOn > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT') "
     71                   parentDeltaQuery="select ID from VW_HIS_EventInfo where ID='${petiton_participant.PETITIONID}'">
     72                     <!--<field column="HANDLEON" name="HandleOns"/>-->
     73                     <field column="ORGUNIT" name="OrgUnits"/>
     74                     <!--<field column="PARTICIPANTNAME" name="ParticipantNames"/>-->
     75                     <field column="PARTICIPANT" name="Participants"/>
     76                     <!--<field column="PARTICIPANTTYPE" name="ParticipantTypes"/>-->
     77             </entity>
     78                   
     79         </entity>
     80         
     81         <entity name="dispatch"
     82               pk="ID"
     83               transformer="TemplateTransformer"
     84               query="select * from VW_HIS_DispatchInfo"
     85               deltaImportQuery="select t.* from VW_HIS_DispatchInfo t where t.id='${dataimporter.delta.ID}'"
     86               deltaQuery="select ID from VW_HIS_DispatchInfo where DISPATCHON > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')
     87                     or  FEEDBACKON > NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')">
     88             
     89             <field  column="doc_id" template="DIS_${dispatch.ID}" />
     90             <field  column="doc_type" template="DIS" />
     91             
     92             <field  column="TENANTID" name="TenantId" />
     93             <field  column="PETITIONID" name="PetitionId" />
     94               <field  column="DISPATCHER" name="Dispatcher" />
     95               <field  column="DISPATCHON" name="DispatchOn" />
     96               <field  column="DISPATCHOFFICE" name="DispatchOffice" />
     97               <field  column="DISPATCHOFFICENAME" name="DispatchOfficeName" />
     98               <field  column="RECEIVEOFFICE" name="ReceiveOffice" />
     99               <field  column="RECEIVEOFFICENAME" name="ReceiveOfficeName" />
    100               <field  column="STARTON" name="StartOn" />
    101               <field  column="DEADLINE" name="DeadLine" />
    102               <field  column="DEALWAY" name="DealWay" />
    103               <field  column="STATUS" name="Status" />
    104               <field  column="FEEDBACKTYPE" name="FeedBackType" />
    105               <field  column="FEEDBACKPEOPLE" name="FeedBackPeople" />
    106               <field  column="FEEDBACKON" name="FeedBackOn" />
    107               <field  column="FEEDBACKMSG" name="FeedBackMsg" />
    108               <field  column="NOPUBLICOPINION" name="NoPublicOpinion" />
    109               <field  column="ISPUBLIC" name="IsPublic" />
    110               <field  column="ISALREADYREPLY" name="IsAlreadyReply" />
    111               <field  column="ISALREADYCONCAT" name="IsAlreadyContact" />
    112               
    113               <entity name="dispatch_petition"
    114                   pk="ID"
    115                   query="select * from VW_HIS_EventInfo where id='${dispatch.PETITIONID}'"
    116                   deltaQuery="select ID from VW_HIS_EventInfo where endcaseon> NEW_TIME(to_date('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss'), 'PST', 'GMT')"
    117                   parentDeltaQuery="select ID from VW_HIS_DispatchInfo where PETITIONID='${dispatch_petition.ID}'">
    118                   <field  column="PETITIONNUMBER" name="PetitionNumber" />
    119                   <field  column="TITLE" name="Title" />
    120                   <field  column="CONTENT" name="Content" />
    121                   <field  column="TEL" name="Tel" />
    122                   <field  column="EVENTADDRESS" name="EventAddress" />
    123                   <!--<field  column="AREANAME" name="AreaName" />-->
    124                   <field  column="DUTYGRIDNAME" name="DutyGridName" />
    125                   <field  column="GRIDADDRESS" name="GridAddress" />
    126                   <field  column="COMPLAINTQUALITYNAME" name="ComplaintType" />
    127                   <field  column="CATEGORYNAME" name="CategoryName" />
    128                   <field  column="CATEGORYCODE" name="Category" />
    129             </entity>
    130         </entity>
    131           
    132      </document>
    133 </dataConfig>
    db-data-config.xml
  • 相关阅读:
    Python属性、方法和类管理系列之----__slots__属性
    Python属性、方法和类管理系列之----属性初探
    解释型语言和编译型语言的不同以及Python如何运行
    Python的字符串操作和Unicode
    Python中异常(Exception)的总结
    leetcode经典动态规划题解题报告
    mybatis源码分析一
    ReentrantLock源码分析
    CopyOnWriteArrayList,CopyOnWriteArraySet源码分析
    CyclicBarrier源码分析
  • 原文地址:https://www.cnblogs.com/crabo/p/solr_dih_multi_entity.html
Copyright © 2011-2022 走看看