Skip to content

Commit

Permalink
Merge pull request #237 from tulibraries/BL-1924-solr-synonyms
Browse files Browse the repository at this point in the history
Bl 1924 solr synonyms
  • Loading branch information
cdoyle-temple authored Dec 16, 2024
2 parents 465cd73 + 6124f5d commit 05decf0
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 29 deletions.
42 changes: 13 additions & 29 deletions schema.xml
Original file line number Diff line number Diff line change
Expand Up @@ -137,15 +137,16 @@
-->
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<charFilter class="solr.MappingCharFilterFactory" mapping="char-filter-mapping.txt"/>
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="\s+(\p{Punct}+)\s+" replacement=" " />
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="\s+(&quot;)" replacement=" " />
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="([;:.,&quot;]+)\s*$" replacement=" " />
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.ICUFoldingFilterFactory" /> <!-- NFKC, case folding, diacritics removed -->
<filter class="solr.PorterStemFilterFactory" />
<filter class="solr.WordDelimiterGraphFilterFactory"
splitOnCaseChange="1" generateWordParts="1" catenateWords="1"
splitOnNumerics="0" generateNumberParts="1" catenateNumbers="1"
catenateAll="0" preserveOriginal="0" stemEnglishPossessive="1" />
<filter class="solr.WordDelimiterGraphFilterFactory" catenateAll="1" preserveOriginal="1"/>
<filter class="solr.FlattenGraphFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
<filter class="solr.PorterStemFilterFactory" />
</analyzer>
</fieldType>

Expand Down Expand Up @@ -174,15 +175,10 @@
cross-language defaults: it tokenizes with StandardTokenizer,
and down cases. -->
<fieldType name="text_exact" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<charFilter class="solr.MappingCharFilterFactory" mapping="char-filter-mapping.txt"/>
<tokenizer class="solr.StandardTokenizerFactory"/>
<!-- The LowerCaseFilterFactory gives us better results by normalizing case. -->
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<analyzer>
<charFilter class="solr.MappingCharFilterFactory" mapping="char-filter-mapping.txt"/>
<tokenizer class="solr.StandardTokenizerFactory"/>
<!-- The LowerCaseFilterFactory gives us better results by normalizing case. -->
<filter class="solr.FlattenGraphFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
Expand All @@ -193,27 +189,15 @@
finally applies Porter's stemming. The query time analyzer
also applies synonyms from synonyms.txt. -->
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<charFilter class="solr.MappingCharFilterFactory" mapping="char-filter-mapping.txt"/>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.ICUFoldingFilterFactory" /> <!-- NFKC, case folding, diacritics removed -->
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.WordDelimiterGraphFilterFactory"
splitOnCaseChange="1" generateWordParts="1" catenateWords="1"
splitOnNumerics="0" generateNumberParts="1" catenateNumbers="1"
catenateAll="0" preserveOriginal="0" stemEnglishPossessive="1" />
<filter class="solr.FlattenGraphFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
<analyzer>
<charFilter class="solr.MappingCharFilterFactory" mapping="char-filter-mapping.txt"/>
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="\s+(\p{Punct}+)\s+" replacement=" " />
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="\s+(&quot;)" replacement=" " />
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="([;:.,&quot;]+)\s*$" replacement=" " />
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.ICUFoldingFilterFactory" /> <!-- NFKC, case folding, diacritics removed -->
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.WordDelimiterGraphFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="0"
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.WordDelimiterGraphFilterFactory" catenateAll="1" preserveOriginal="1"/>
<filter class="solr.FlattenGraphFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
Expand Down
4 changes: 4 additions & 0 deletions spec/fixtures/punctuation.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<?xml version='1.0' encoding='UTF-8'?>
<collection xmlns='http://www.loc.gov/MARC21/slim' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd'>
<record xmlns='http://www.loc.gov/MARC21/slim'><leader>01159nam a2200277Ii 4500</leader><controlfield tag='005'>20240916104053.0</controlfield><controlfield tag='008'>240816s2020 bl a 000 p por d</controlfield><controlfield tag='001'>991039108455203811</controlfield><datafield ind1=' ' ind2=' ' tag='035'><subfield code='a'>(OCoLC)1455640620</subfield></datafield><datafield ind1=' ' ind2=' ' tag='040'><subfield code='a'>PPT</subfield><subfield code='b'>eng</subfield><subfield code='e'>rda</subfield><subfield code='c'>PPT</subfield></datafield><datafield ind1=' ' ind2='4' tag='050'><subfield code='a'>PQ9698.13.A554</subfield><subfield code='b'>P373 2020</subfield></datafield><datafield ind1='1' ind2=' ' tag='100'><subfield code='a'>Campos, Augusto de,</subfield><subfield code='e'>author.</subfield><subfield code='0'>https://id.loc.gov/authorities/names/n50030974</subfield></datafield><datafield ind1='1' ind2='0' tag='245'><subfield code='a'>Paraulas para Palau /</subfield><subfield code='c'>Augusto de Campos.</subfield></datafield><datafield ind1=' ' ind2=' ' tag='250'><subfield code='a'>1ª edição.</subfield></datafield><datafield ind1=' ' ind2='1' tag='264'><subfield code='a'>Londrina :</subfield><subfield code='b'>Galileu Edições,</subfield><subfield code='c'>2020.</subfield></datafield><datafield ind1=' ' ind2=' ' tag='300'><subfield code='a'>1 volume (unpaged) :</subfield><subfield code='b'>illustrations (some color) ;</subfield><subfield code='c'>21 cm</subfield></datafield><datafield ind1=' ' ind2=' ' tag='336'><subfield code='a'>text</subfield><subfield code='b'>txt</subfield><subfield code='2'>rdacontent</subfield></datafield><datafield ind1=' ' ind2=' ' tag='337'><subfield code='a'>unmediated</subfield><subfield code='b'>n</subfield><subfield code='2'>rdamedia</subfield></datafield><datafield ind1=' ' ind2=' ' tag='338'><subfield code='a'>volume</subfield><subfield code='b'>nc</subfield><subfield code='2'>rdacarrier</subfield></datafield><datafield ind1=' ' ind2='7' tag='655'><subfield code='a'>Concrete poetry.</subfield><subfield code='2'>lcgft</subfield><subfield code='0'>https://id.loc.gov/authorities/genreForms/gf2014026270</subfield></datafield><datafield ind1=' ' ind2='7' tag='655'><subfield code='a'>Concrete poetry.</subfield><subfield code='2'>fast</subfield><subfield code='0'>https://id.worldcat.org/fast/1726570</subfield></datafield><datafield ind1=' ' ind2='7' tag='655'><subfield code='a'>Poetry.</subfield><subfield code='2'>lcgft</subfield><subfield code='0'>https://id.loc.gov/authorities/genreForms/gf2014026481</subfield></datafield><datafield ind1=' ' ind2='7' tag='655'><subfield code='a'>Poetry.</subfield><subfield code='2'>fast</subfield><subfield code='0'>https://id.worldcat.org/fast/1423828</subfield></datafield><datafield ind1=' ' ind2=' ' tag='904'><subfield code='a'>MARCIVE-TEUM 20240906</subfield></datafield><datafield ind1=' ' ind2=' ' tag='979'><subfield code='a'>(OCoLC)1455640620</subfield></datafield><datafield ind1=' ' ind2=' ' tag='997'><subfield code='a'>20240822064047.0</subfield><subfield code='c'>CDC</subfield><subfield code='d'>TEU</subfield><subfield code='s'>ORIG</subfield><subfield code='e'>WCAT</subfield></datafield><datafield ind1=' ' ind2=' ' tag='ADM'><subfield code='b'>2024-09-16 14:40:53</subfield><subfield code='e'>991039108455203811</subfield><subfield code='d'>OCLC</subfield><subfield code='f'>20240822064123.0</subfield><subfield code='a'>2024-08-16 19:23:59</subfield><subfield code='c'>false</subfield></datafield><datafield ind1='0' ind2=' ' tag='HLD'><subfield code='b'>ASRS</subfield><subfield code='c'>ASRS</subfield><subfield code='h'>PQ9698.13.A554</subfield><subfield code='i'>P373 2020</subfield><subfield code='8'>22668600640003811</subfield><subfield code='updated'>2024-08-22 19:20:54</subfield><subfield code='created'>2024-08-22 19:20:54</subfield></datafield><datafield ind1=' ' ind2=' ' tag='ITM'><subfield code='r'>22668600640003811</subfield><subfield code='b'>1</subfield><subfield code='h'>0</subfield><subfield code='g'>ASRS</subfield><subfield code='t'>BOOK</subfield><subfield code='9'>39074024406001</subfield><subfield code='e'>ASRS</subfield><subfield code='8'>23668602720003811</subfield><subfield code='a'>0</subfield><subfield code='i'>PQ9698.13.A554 P373 2020</subfield><subfield code='updated'>2024-11-21 19:29:21</subfield><subfield code='q'>2024-08-16 19:25:14</subfield><subfield code='d'>ASRS</subfield><subfield code='f'>ASRS</subfield></datafield></record>
</collection>

0 comments on commit 05decf0

Please sign in to comment.