# Copyright (C) 2008, IWAMURO Motonori
# All rights reserved.
#
# License: BSD License (revised)
# see http://vmi.jp/software/ruby/COPYING

module UTF8Sec
  # 110xxxxx 10xxxxxx (8-11bit)
  # 1100000x 10xxxxxx (duplicate 1byte sequence)
  # NG: [\xC0-\xC1]
  #
  # 1110xxxx 10xxxxxx 10xxxxxx (12-16bit)
  # 11100000 100xxxxx 10xxxxxx (duplicate 2bytes sequence)
  # ng: \xE0[\x80-\x9F]
  #
  # 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx (17-21bit)
  # 11110000 1000xxxx 10xxxxxx 10xxxxxx (duplicate 3bytes sequence)
  # NG: \xF0[\x80-\x8F]
  #
  # All following sequences are NG.
  # 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx (22-26bit)
  # NG: [\xF8-\xFB]
  #
  # 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx (27-31bit)
  # NG: [\xFC-\xFD]
  INVALID_UTF8 = /[\xC0\xC1\xF8-\xFD]|\xE0[\x80-\x9F]|\xF0[\x80-\x8F]/n

  def is_invalid(seq)
    INVALID_UTF8 =~ seq
  end
end
