<?xml version="1.0"?>
<!--
 ! Copyright 2006-2011 The FLWOR Foundation.
 ! 
 ! Licensed under the Apache License, Version 2.0 (the "License");
 ! you may not use this file except in compliance with the License.
 ! You may obtain a copy of the License at
 ! 
 ! http://www.apache.org/licenses/LICENSE-2.0
 ! 
 ! Unless required by applicable law or agreed to in writing, software
 ! distributed under the License is distributed on an "AS IS" BASIS,
 ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ! See the License for the specific language governing permissions and
 ! limitations under the License.
-->

<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
  targetNamespace="http://www.zorba-xquery.com/modules/full-text"
  xmlns="http://www.zorba-xquery.com/modules/full-text"
  elementFormDefault="qualified"
  attributeFormDefault="unqualified">

  <!--======================================================================-->

  <xs:element name="compare-options">
    <xs:complexType>
      <xs:attributeGroup ref="compare-attributes"/>
    </xs:complexType>
  </xs:element>

  <xs:attributeGroup name="compare-attributes">
    <xs:attribute name="case" type="case-type" default="case insensitive"/>
    <xs:attribute name="diacritics" type="diacritics-type" default="diacritics insensitive"/>
    <xs:attribute name="stemming" type="stemming-type" default="no stemming"/>
  </xs:attributeGroup>

  <xs:simpleType name="case-type">
    <xs:restriction base="xs:string">

      <!--
       ! Tokens are matched regardless of the case of characters.
      -->
      <xs:enumeration value="case insensitive"/>

      <!--
       ! Tokens are matched only if the case of their characters is the same.
      -->
      <xs:enumeration value="case sensitive"/>

      <!--
       ! Tokens are matched only if they match without regard to character
       ! case, but contain only lowercase characters.
      -->
      <xs:enumeration value="lower"/>

      <!--
       ! Tokens are matched only if they match without regard to character
       ! case, but contain only uppercase characters.
      -->
      <xs:enumeration value="upper"/>

    </xs:restriction>
  </xs:simpleType>

  <xs:simpleType name="diacritics-type">
    <xs:restriction base="xs:string">

      <!--
       ! Matches tokens with and without diacritics.
      -->
      <xs:enumeration value="diacritics insensitive"/>

      <!--
       ! Matches tokens only if they contain the same diacritics.
       -->
      <xs:enumeration value="diacritics sensitive"/>

    </xs:restriction>
  </xs:simpleType>

  <xs:simpleType name="stemming-type">
    <xs:restriction base="xs:string">

      <!--
       ! Tokens are not stemmed.
       -->
      <xs:enumeration value="no stemming"/>

      <!--
       ! Matches may contain tokens that have the same stem.
       -->
      <xs:enumeration value="stemming"/>

    </xs:restriction>
  </xs:simpleType>

  <xs:complexType name="boolean-value">
    <xs:attribute name="value" type="xs:boolean" use="required"/>
  </xs:complexType>

  <!--======================================================================-->

  <xs:element name="token">
    <xs:complexType>

      <!-- The language of the token. -->
      <xs:attribute name="lang" type="xs:language"/>

      <!-- The sentence number. -->
      <xs:attribute name="sentence" type="xs:nonNegativeInteger" use="required"/>

      <!-- The paragraph number. -->
      <xs:attribute name="paragraph" type="xs:nonNegativeInteger" use="required"/>

      <!-- The token string value. -->
      <xs:attribute name="value" type="xs:string" use="required"/>

      <!--
       ! A reference to the originating node.  If the token occurred within an
       ! element, the reference refers to the text node.  If the token occurred
       ! within an attribute, the reference refers to the attribute node.
      -->
      <xs:attribute name="node-ref" type="xs:anyURI"/>

    </xs:complexType>
  </xs:element>

  <!--======================================================================-->

  <xs:element name="tokenizer-properties">
    <xs:complexType>
      <xs:all>

        <!--
         ! If true, XML comments separate tokens.  (No example can be provided
         ! here because it is illegal to nest an XML comment inside an XML
         ! comment.)
        -->
        <xs:element name="comments-separate-tokens" type="boolean-value"/>

        <!--
         ! If true, XML elements separate tokens.  For example,
         ! <b>B</b>old would be 2 tokens instead of 1.
        -->
        <xs:element name="elements-separate-tokens" type="boolean-value"/>

        <!--
         ! If true, XML processing instructions separate tokens.  For example,
         ! net<?PI pi?>work would be 2 tokens instead of 1.
        -->
        <xs:element name="processing-instructions-separate-tokens" type="boolean-value"/>

        <!--
         ! The list of languages that the tokenizer can tokenize.
        -->
        <xs:element name="supported-languages">
          <xs:complexType>
            <xs:sequence>
              <xs:element name="lang" type="xs:language" maxOccurs="unbounded"/>
            </xs:sequence>
          </xs:complexType>
        </xs:element>

      </xs:all>

      <!--
       !  The tokenizer's identifying URI.
      -->
      <xs:attribute name="uri" type="xs:anyURI"/>

    </xs:complexType>
  </xs:element>

  <!--======================================================================-->

</xs:schema>
<!-- vim:set et sw=2 ts=2: -->