61ce29b17f
* java/text/CollationElementIterator.java (text_decomposition): Changed type to RuleBasedCollator.CollationElement[] (setText): Use ArrayList instead of Vector. From-SVN: r84857
467 lines
13 KiB
Java
467 lines
13 KiB
Java
/* CollationElementIterator.java -- Walks through collation elements
|
|
Copyright (C) 1998, 1999, 2001, 2002, 2003, 2004 Free Software Foundation
|
|
|
|
This file is part of GNU Classpath.
|
|
|
|
GNU Classpath is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2, or (at your option)
|
|
any later version.
|
|
|
|
GNU Classpath is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with GNU Classpath; see the file COPYING. If not, write to the
|
|
Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
|
02111-1307 USA.
|
|
|
|
Linking this library statically or dynamically with other modules is
|
|
making a combined work based on this library. Thus, the terms and
|
|
conditions of the GNU General Public License cover the whole
|
|
combination.
|
|
|
|
As a special exception, the copyright holders of this library give you
|
|
permission to link this library with independent modules to produce an
|
|
executable, regardless of the license terms of these independent
|
|
modules, and to copy and distribute the resulting executable under
|
|
terms of your choice, provided that you also meet, for each linked
|
|
independent module, the terms and conditions of the license of that
|
|
module. An independent module is a module which is not derived from
|
|
or based on this library. If you modify this library, you may extend
|
|
this exception to your version of the library, but you are not
|
|
obligated to do so. If you do not wish to do so, delete this
|
|
exception statement from your version. */
|
|
|
|
|
|
package java.text;
|
|
|
|
import java.util.ArrayList;
|
|
|
|
/* Written using "Java Class Libraries", 2nd edition, plus online
|
|
* API docs for JDK 1.2 from http://www.javasoft.com.
|
|
* Status: Believed complete and correct to JDK 1.1.
|
|
*/
|
|
|
|
/**
|
|
* This class walks through the character collation elements of a
|
|
* <code>String</code> as defined by the collation rules in an instance of
|
|
* <code>RuleBasedCollator</code>. There is no public constructor for
|
|
* this class. An instance is created by calling the
|
|
* <code>getCollationElementIterator</code> method on
|
|
* <code>RuleBasedCollator</code>.
|
|
*
|
|
* @author Aaron M. Renn <arenn@urbanophile.com>
|
|
* @author Tom Tromey <tromey@cygnus.com>
|
|
* @author Guilhem Lavaux <guilhem.lavaux@free.fr>
|
|
*/
|
|
public final class CollationElementIterator
|
|
{
|
|
/**
|
|
* This is a constant value that is returned to indicate that the end of
|
|
* the string was encountered.
|
|
*/
|
|
public static final int NULLORDER = -1;
|
|
|
|
/**
|
|
* This is the RuleBasedCollator this object was created from.
|
|
*/
|
|
RuleBasedCollator collator;
|
|
|
|
/**
|
|
* This is the String that is being iterated over.
|
|
*/
|
|
String text;
|
|
|
|
/**
|
|
* This is the index into the collation decomposition where we are currently scanning.
|
|
*/
|
|
int index;
|
|
|
|
/**
|
|
* This is the index into the String where we are currently scanning.
|
|
*/
|
|
int textIndex;
|
|
|
|
/**
|
|
* Array containing the collation decomposition of the
|
|
* text given to the constructor.
|
|
*/
|
|
private RuleBasedCollator.CollationElement[] text_decomposition;
|
|
|
|
/**
|
|
* Array containing the index of the specified block.
|
|
*/
|
|
private int[] text_indexes;
|
|
|
|
/**
|
|
* This method initializes a new instance of <code>CollationElementIterator</code>
|
|
* to iterate over the specified <code>String</code> using the rules in the
|
|
* specified <code>RuleBasedCollator</code>.
|
|
*
|
|
* @param collator The <code>RuleBasedCollation</code> used for calculating collation values
|
|
* @param text The <code>String</code> to iterate over.
|
|
*/
|
|
CollationElementIterator(RuleBasedCollator collator, String text)
|
|
{
|
|
this.collator = collator;
|
|
|
|
setText (text);
|
|
}
|
|
|
|
RuleBasedCollator.CollationElement nextBlock()
|
|
{
|
|
if (index >= text_decomposition.length)
|
|
return null;
|
|
|
|
RuleBasedCollator.CollationElement e = text_decomposition[index];
|
|
|
|
textIndex = text_indexes[index+1];
|
|
|
|
index++;
|
|
|
|
return e;
|
|
}
|
|
|
|
RuleBasedCollator.CollationElement previousBlock()
|
|
{
|
|
if (index == 0)
|
|
return null;
|
|
|
|
index--;
|
|
RuleBasedCollator.CollationElement e = text_decomposition[index];
|
|
|
|
textIndex = text_indexes[index+1];
|
|
|
|
return e;
|
|
}
|
|
|
|
/**
|
|
* This method returns the collation ordering value of the next character sequence
|
|
* in the string (it may be an extended character following collation rules).
|
|
* This method will return <code>NULLORDER</code> if the
|
|
* end of the string was reached.
|
|
*
|
|
* @return The collation ordering value.
|
|
*/
|
|
public int next()
|
|
{
|
|
RuleBasedCollator.CollationElement e = nextBlock();
|
|
|
|
if (e == null)
|
|
return NULLORDER;
|
|
|
|
return e.getValue();
|
|
}
|
|
|
|
/**
|
|
* This method returns the collation ordering value of the previous character
|
|
* in the string. This method will return <code>NULLORDER</code> if the
|
|
* beginning of the string was reached.
|
|
*
|
|
* @return The collation ordering value.
|
|
*/
|
|
public int previous()
|
|
{
|
|
RuleBasedCollator.CollationElement e = previousBlock();
|
|
|
|
if (e == null)
|
|
return NULLORDER;
|
|
|
|
return e.getValue();
|
|
}
|
|
|
|
/**
|
|
* This method returns the primary order value for the given collation
|
|
* value.
|
|
*
|
|
* @param value The collation value returned from <code>next()</code> or <code>previous()</code>.
|
|
*
|
|
* @return The primary order value of the specified collation value. This is the high 16 bits.
|
|
*/
|
|
public static final int primaryOrder(int order)
|
|
{
|
|
// From the JDK 1.2 spec.
|
|
return order >>> 16;
|
|
}
|
|
|
|
/**
|
|
* This method resets the internal position pointer to read from the
|
|
* beginning of the <code>String</code> again.
|
|
*/
|
|
public void reset()
|
|
{
|
|
index = 0;
|
|
textIndex = 0;
|
|
}
|
|
|
|
/**
|
|
* This method returns the secondary order value for the given collation
|
|
* value.
|
|
*
|
|
* @param value The collation value returned from <code>next()</code> or <code>previous()</code>.
|
|
*
|
|
* @return The secondary order value of the specified collation value. This is the bits 8-15.
|
|
*/
|
|
public static final short secondaryOrder(int order)
|
|
{
|
|
// From the JDK 1.2 spec.
|
|
return (short) ((order >>> 8) & 255);
|
|
}
|
|
|
|
/**
|
|
* This method returns the tertiary order value for the given collation
|
|
* value.
|
|
*
|
|
* @param value The collation value returned from <code>next()</code> or <code>previous()</code>.
|
|
*
|
|
* @return The tertiary order value of the specified collation value. This is the low eight bits.
|
|
*/
|
|
public static final short tertiaryOrder(int order)
|
|
{
|
|
// From the JDK 1.2 spec.
|
|
return (short) (order & 255);
|
|
}
|
|
|
|
/**
|
|
* This method sets the <code>String</code> that it is iterating over
|
|
* to the specified <code>String</code>.
|
|
*
|
|
* @param text The new <code>String</code> to iterate over.
|
|
*
|
|
* @since 1.2
|
|
*/
|
|
public void setText(String text)
|
|
{
|
|
int idx = 0;
|
|
int idx_idx = 0;
|
|
int alreadyExpanded = 0;
|
|
int idxToMove = 0;
|
|
|
|
this.text = text;
|
|
this.index = 0;
|
|
|
|
String work_text = text.intern();
|
|
|
|
ArrayList a_element = new ArrayList();
|
|
ArrayList a_idx = new ArrayList();
|
|
|
|
// Build element collection ordered as they come in "text".
|
|
while (idx < work_text.length())
|
|
{
|
|
String key, key_old;
|
|
|
|
Object object = null;
|
|
int p = 1;
|
|
|
|
// IMPROVE: use a TreeMap with a prefix-ordering rule.
|
|
key_old = key = null;
|
|
do
|
|
{
|
|
if (object != null)
|
|
key_old = key;
|
|
key = work_text.substring (idx, idx+p);
|
|
object = collator.prefix_tree.get (key);
|
|
if (object != null && idx < alreadyExpanded)
|
|
{
|
|
RuleBasedCollator.CollationElement prefix = (RuleBasedCollator.CollationElement)object;
|
|
if (prefix.expansion != null &&
|
|
prefix.expansion.startsWith(work_text.substring(0, idx)))
|
|
{
|
|
object = null;
|
|
key = key_old;
|
|
}
|
|
}
|
|
p++;
|
|
}
|
|
while (idx+p <= work_text.length());
|
|
|
|
if (object == null)
|
|
key = key_old;
|
|
|
|
RuleBasedCollator.CollationElement prefix =
|
|
(RuleBasedCollator.CollationElement) collator.prefix_tree.get (key);
|
|
|
|
/*
|
|
* First case: There is no such sequence in the database.
|
|
* We will have to build one from the context.
|
|
*/
|
|
if (prefix == null)
|
|
{
|
|
/*
|
|
* We are dealing with sequences in an expansion. They
|
|
* are treated as accented characters (tertiary order).
|
|
*/
|
|
if (alreadyExpanded > 0)
|
|
{
|
|
RuleBasedCollator.CollationElement e =
|
|
collator.getDefaultAccentedElement (work_text.charAt (idx));
|
|
|
|
a_element.add (e);
|
|
a_idx.add (new Integer(idx_idx));
|
|
idx++;
|
|
alreadyExpanded--;
|
|
if (alreadyExpanded == 0)
|
|
{
|
|
/* There is not any characters left in the expansion set.
|
|
* We can increase the pointer in the source string.
|
|
*/
|
|
idx_idx += idxToMove;
|
|
idxToMove = 0;
|
|
}
|
|
else
|
|
idx_idx++;
|
|
}
|
|
else
|
|
{
|
|
/* This is a normal character. */
|
|
RuleBasedCollator.CollationElement e =
|
|
collator.getDefaultElement (work_text.charAt (idx));
|
|
Integer i_ref = new Integer(idx_idx);
|
|
|
|
/* Don't forget to mark it as a special sequence so the
|
|
* string can be ordered.
|
|
*/
|
|
a_element.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ);
|
|
a_idx.add (i_ref);
|
|
a_element.add (e);
|
|
a_idx.add (i_ref);
|
|
idx_idx++;
|
|
idx++;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Second case: Here we have found a matching sequence.
|
|
* Here we have an expansion string prepend it to the "work text" and
|
|
* add the corresponding sorting element. We must also mark
|
|
*/
|
|
if (prefix.expansion != null)
|
|
{
|
|
work_text = prefix.expansion
|
|
+ work_text.substring (idx+prefix.key.length());
|
|
idx = 0;
|
|
a_element.add (prefix);
|
|
a_idx.add (new Integer(idx_idx));
|
|
if (alreadyExpanded == 0)
|
|
idxToMove = prefix.key.length();
|
|
alreadyExpanded += prefix.expansion.length()-prefix.key.length();
|
|
}
|
|
else
|
|
{
|
|
/* Third case: the simplest. We have got the prefix and it
|
|
* has not to be expanded.
|
|
*/
|
|
a_element.add (prefix);
|
|
a_idx.add (new Integer(idx_idx));
|
|
idx += prefix.key.length();
|
|
/* If the sequence is in an expansion, we must decrease the
|
|
* counter.
|
|
*/
|
|
if (alreadyExpanded > 0)
|
|
{
|
|
alreadyExpanded -= prefix.key.length();
|
|
if (alreadyExpanded == 0)
|
|
{
|
|
idx_idx += idxToMove;
|
|
idxToMove = 0;
|
|
}
|
|
} else
|
|
idx_idx += prefix.key.length();
|
|
}
|
|
}
|
|
|
|
text_decomposition = (RuleBasedCollator.CollationElement[])
|
|
a_element.toArray(new RuleBasedCollator.CollationElement[a_element.size()]);
|
|
text_indexes = new int[a_idx.size()+1];
|
|
for (int i = 0; i < a_idx.size(); i++)
|
|
{
|
|
text_indexes[i] = ((Integer)a_idx.get(i)).intValue();
|
|
}
|
|
text_indexes[a_idx.size()] = text.length();
|
|
}
|
|
|
|
/**
|
|
* This method sets the <code>String</code> that it is iterating over
|
|
* to the <code>String</code> represented by the specified
|
|
* <code>CharacterIterator</code>.
|
|
*
|
|
* @param source The <code>CharacterIterator</code> containing the new
|
|
* <code>String</code> to iterate over.
|
|
*/
|
|
public void setText(CharacterIterator source)
|
|
{
|
|
StringBuffer expand = new StringBuffer();
|
|
|
|
// For now assume we read from the beginning of the string.
|
|
for (char c = source.first();
|
|
c != CharacterIterator.DONE;
|
|
c = source.next())
|
|
expand.append(c);
|
|
|
|
setText(expand.toString());
|
|
}
|
|
|
|
/**
|
|
* This method returns the current offset into the <code>String</code>
|
|
* that is being iterated over.
|
|
*
|
|
* @return The iteration index position.
|
|
*
|
|
* @since 1.2
|
|
*/
|
|
public int getOffset()
|
|
{
|
|
return textIndex;
|
|
}
|
|
|
|
/**
|
|
* This method sets the iteration index position into the current
|
|
* <code>String</code> to the specified value. This value must not
|
|
* be negative and must not be greater than the last index position
|
|
* in the <code>String</code>.
|
|
*
|
|
* @param offset The new iteration index position.
|
|
*
|
|
* @exception IllegalArgumentException If the new offset is not valid.
|
|
*/
|
|
public void setOffset(int offset)
|
|
{
|
|
if (offset < 0)
|
|
throw new IllegalArgumentException("Negative offset: " + offset);
|
|
|
|
if (offset > (text.length() - 1))
|
|
throw new IllegalArgumentException("Offset too large: " + offset);
|
|
|
|
for (index = 0; index < text_decomposition.length; index++)
|
|
{
|
|
if (offset <= text_indexes[index])
|
|
break;
|
|
}
|
|
/*
|
|
* As text_indexes[0] == 0, we should not have to take care whether index is
|
|
* greater than 0. It is always.
|
|
*/
|
|
if (text_indexes[index] == offset)
|
|
textIndex = offset;
|
|
else
|
|
textIndex = text_indexes[index-1];
|
|
}
|
|
|
|
/**
|
|
* This method returns the maximum length of any expansion sequence that
|
|
* ends with the specified collation order value. (Whatever that means).
|
|
*
|
|
* @param value The collation order value
|
|
*
|
|
* @param The maximum length of an expansion sequence.
|
|
*/
|
|
public int getMaxExpansion(int value)
|
|
{
|
|
return 1;
|
|
}
|
|
}
|