<?xml version="1.0" encoding="UTF-8" ?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">

<xsl:template match="/FIELDS">

<xsl:param name="textFieldType" select="text_common" />
<xsl:variable name="smallcase" select="'abcdefghijklmnopqrstuvwxyz'" />
<xsl:variable name="uppercase" select="'ABCDEFGHIJKLMNOPQRSTUVWXYZ'" />

<!-- 
D-Net index schema template

CHANGELOG

0.1 : first release
0.2 : added preserveOriginal="1" for text field type in the index analyzer and catenateWords="1" for the query analyzer
0.3 : changed language for SnowballPorterFilterFactory to language="German2" (index/query) in the text field type
0.4 : added solr.ASCIIFoldingFilterFactory filter (index/query) in the text field type
0.5 : added long_keyword field type, to be used for objIdentifiers
0.6 : added field types for spellchecking
0.7 : added parameter for text field type

 -->
<schema name="dnet" version="0.7">

  <types>

    <fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
    <fieldtype name="binary" class="solr.BinaryField"/>

    <!--
      Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
    -->
    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>

    <!--
     Numeric field types that index each value at various levels of precision
     to accelerate range queries when the number of values between the range
     endpoints is large. See the javadoc for NumericRangeQuery for internal
     implementation details.

     Smaller precisionStep values (specified in bits) will lead to more tokens
     indexed per value, slightly larger index size, and faster range queries.
     A precisionStep of 0 disables indexing at different precision levels.
    -->
    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>

    <fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>

    <!-- A Trie based date field for faster date range queries and date faceting. -->
    <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>

    <!--
      Note:
      These should only be used for compatibility with existing indexes (created with older Solr versions)
      or if "sortMissingFirst" or "sortMissingLast" functionality is needed. Use Trie based fields instead.

      Plain numeric field types that store and index the text
      value verbatim (and hence don't support range queries, since the
      lexicographic ordering isn't equal to the numeric ordering)
    -->
    <fieldType name="pint" class="solr.IntField" omitNorms="true"/>
    <fieldType name="plong" class="solr.LongField" omitNorms="true"/>
    <fieldType name="pfloat" class="solr.FloatField" omitNorms="true"/>
    <fieldType name="pdouble" class="solr.DoubleField" omitNorms="true"/>
    <fieldType name="pdate" class="solr.DateField" sortMissingLast="true" omitNorms="true"/>

    <!--
      Note:
      These should only be used for compatibility with existing indexes (created with older Solr versions)
      or if "sortMissingFirst" or "sortMissingLast" functionality is needed. Use Trie based fields instead.

      Numeric field types that manipulate the value into
      a string value that isn't human-readable in its internal form,
      but with a lexicographic ordering the same as the numeric ordering,
      so that range queries work correctly.
    -->
    <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/>
    <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/>
    <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/>
    <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/>

    <!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
        words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
        so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
        Synonyms and stopwords are customized by external files, and stemming is enabled.
        -->
    <fieldType name="text_common" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">
		<tokenizer class="solr.WhitespaceTokenizerFactory"/>  
        <filter class="solr.StopFilterFactory"	ignoreCase="true" 
        		words="stopwords.txt" 			enablePositionIncrements="true" />
        <filter class="solr.WordDelimiterFilterFactory" 
        		preserveOriginal="1"	generateWordParts="1" 
        		generateNumberParts="1"	catenateWords="1" 
        		catenateNumbers="1" 	catenateAll="0" />
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ASCIIFoldingFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
        <filter class="solr.StopFilterFactory" 	ignoreCase="true" 
        		words="stopwords.txt" 			enablePositionIncrements="true" />
        <filter class="solr.WordDelimiterFilterFactory" 
        		preserveOriginal="1" 	generateWordParts="1" 
        		generateNumberParts="1" catenateWords="0" 
        		catenateNumbers="0" 	catenateAll="0" />
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ASCIIFoldingFilterFactory"/>
      </analyzer>
    </fieldType>
    
    <fieldType name="text_EFG" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">
		<tokenizer class="solr.WhitespaceTokenizerFactory"/> 
        <filter class="solr.StopFilterFactory" ignoreCase="true" 
        		words="stopwords.txt" 			enablePositionIncrements="true" />
        <filter class="solr.WordDelimiterFilterFactory" 
        		preserveOriginal="1" 	generateWordParts="1" 
        		generateNumberParts="1" catenateWords="1" 
        		catenateNumbers="1" 	catenateAll="0" 
        		splitOnCaseChange="1"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.SnowballPorterFilterFactory" language="German2" protected="protwords.txt"/>
        <filter class="solr.ASCIIFoldingFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
        <filter class="solr.StopFilterFactory" 	ignoreCase="true" 
        		words="stopwords.txt"			enablePositionIncrements="true" />
        <filter class="solr.WordDelimiterFilterFactory" 
        		generateWordParts="0"	generateNumberParts="1" 
        		catenateWords="1" 		catenateNumbers="0" 
        		catenateAll="0" 		splitOnCaseChange="1"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.SnowballPorterFilterFactory" language="German2" protected="protwords.txt"/>
        <filter class="solr.ASCIIFoldingFilterFactory"/>
      </analyzer>
    </fieldType>    

    <!-- A general unstemmed text field that indexes tokens normally and also
         reversed (via ReversedWildcardFilterFactory), to enable more efficient 
	 leading wildcard queries. -->
    <fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" 	ignoreCase="true" 
        		words="stopwords.txt" 			enablePositionIncrements="true" />
        <filter class="solr.WordDelimiterFilterFactory" 
        		generateWordParts="1" 	generateNumberParts="1" 
        		catenateWords="1" 		catenateNumbers="1" 
        		catenateAll="0" 		splitOnCaseChange="0"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.ReversedWildcardFilterFactory" 
        		withOriginal="true"	maxPosAsterisk="3" 
        		maxPosQuestion="2" 	maxFractionAsterisk="0.33"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
        <filter class="solr.StopFilterFactory" 	ignoreCase="true"
                words="stopwords.txt"			enablePositionIncrements="true"
                />
        <filter class="solr.WordDelimiterFilterFactory" 
        		generateWordParts="1" 	generateNumberParts="1" 
        		catenateWords="0" 		catenateNumbers="0" 
        		catenateAll="0" 		splitOnCaseChange="0"/>
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
    </fieldType>    

	<fieldType name="spelltext" class="solr.TextField" positionIncrementGap="100">
		<analyzer type="index">
			<tokenizer class="solr.StandardTokenizerFactory" />
			<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
			<filter class="solr.StandardFilterFactory" />
			<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
		</analyzer>
		<analyzer type="query">
			<tokenizer class="solr.StandardTokenizerFactory" />
			<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true" />
			<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
			<filter class="solr.StandardFilterFactory" />
			<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
		</analyzer>
	</fieldType>
    
    <!-- used for objIdentifiers -->
    <fieldType name="long_keyword" class="solr.TextField">
      <analyzer>
        <tokenizer class="solr.KeywordTokenizerFactory"/>
      </analyzer>
    </fieldType>      

 </types>

 <fields>

 	<xsl:for-each select="./FIELD">
		<xsl:variable name="fieldname" select="translate(@name, $uppercase, $smallcase)" />
		<xsl:variable name="fieldtype">
		  <xsl:choose>
			<xsl:when test="@type='int'">sint</xsl:when>		  
 			<xsl:when test="@type='date'">date</xsl:when>
 			<xsl:when test="@type='long_keyword'">long_keyword</xsl:when>
 			<xsl:when test="@tokenizable='false'">string</xsl:when>
			<xsl:otherwise>
				<xsl:value-of select="$textFieldType" />
			</xsl:otherwise>
		  </xsl:choose>
		</xsl:variable>
		<xsl:variable name="isMultivalued">
		  <xsl:choose>
			<xsl:when test="@multivalued='false'">false</xsl:when>		  
			<xsl:otherwise>true</xsl:otherwise>
		  </xsl:choose>
		</xsl:variable>		
		
		<field name="{$fieldname}" type="{$fieldtype}" indexed="{@indexable}" stored="{@indexable}" multiValued="{normalize-space($isMultivalued)}" />

	</xsl:for-each>
   
   <field name="__indexrecordidentifier" type="string" indexed="true" stored="true" multiValued="false" required="true" />
   
   <field name="__deleted" type="boolean" indexed="true" stored="true" default="false" />
   
   <field name="__dsid" type="string" indexed="true" stored="true" />
   
   <field name="__dsversion" type="tdate" indexed="true" stored="true" />
   
   <field name="__result" type="{$textFieldType}" indexed="false" stored="true" compressed="true" termVectors="on" termPositions="on" termOffsets="on" />
   
   <field name="__fulltext" type="{$textFieldType}" indexed="false" stored="true" compressed="true" default="" />
   
   <field name="__all" type="{$textFieldType}" indexed="true" stored="false" multiValued="true"/>
   
   <field name="__spell" type="spelltext" indexed="true" stored="false" omitNorms="true" omitTermFreqAndPositions="true" />
   
   <field name="cql.serverchoice" type="{$textFieldType}" indexed="true" stored="false" multiValued="true"/>

   <!-- catchall text field that indexes tokens both normally and in reverse for efficient
        leading wildcard queries. -->
   <field name="text_rev" type="text_rev" indexed="true" stored="false" multiValued="true"/>

 </fields>

 <!-- Field to use to determine and enforce document uniqueness. 
      Unless this field is marked with required="false", it will be a required field
   -->
 <uniqueKey>__indexrecordidentifier</uniqueKey>

 <!-- field for the QueryParser to use when an explicit fieldname is absent -->
 <defaultSearchField>__all</defaultSearchField>

 <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
 <solrQueryParser defaultOperator="OR"/>

  	<xsl:for-each select="./FIELD">
  		<xsl:variable name="fieldname" select="translate(@name, $uppercase, $smallcase)" />
		<copyField source="{$fieldname}" dest="__all" />
	</xsl:for-each>

  	<xsl:for-each select="./FIELD[@spellcheck = 'true']">
  		<xsl:variable name="fieldname" select="translate(@name, $uppercase, $smallcase)" />
		<copyField source="{$fieldname}" dest="__spell" />
	</xsl:for-each>	
	
	<copyField source="cql.serverchoice" dest="__all"/>
	<copyField source="__fulltext" dest="__all"/>
	

 <!-- Similarity is the scoring routine for each document vs. a query.
      A custom similarity may be specified here, but the default is fine
      for most applications.  -->
 <!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
 <!-- ... OR ...
      Specify a SimilarityFactory class name implementation
      allowing parameters to be used.
 -->
 <!--
 <similarity class="com.example.solr.CustomSimilarityFactory">
   <str name="paramkey">param value</str>
 </similarity>
 -->


</schema>
</xsl:template>
</xsl:stylesheet>