module MARC::REXMLReader

The REXMLReader is the 'default' parser, since we can at least be assured that REXML is probably there. It uses REXML's PullParser to handle larger document sizes without consuming insane amounts of memory, but it's still REXML (read: slow), so it's a good idea to use an alternative parser if available. If you don't know the best parser available, you can use the MagicReader or set:

MARC::XMLReader.parser=MARC::XMLReader::USE_BEST_AVAILABLE

or

MARC::XMLReader.parser=“magic”

or

reader = MARC::XMLReader.new(fh, :parser=>“magic”) (or the constant)

which will cascade down to REXML if nothing better is found.

Public Class Methods

extended(receiver) click to toggle source
# File lib/marc/xml_parsers.rb, line 159
def self.extended(receiver)
  require 'rexml/document'
  require 'rexml/parsers/pullparser'
  receiver.init
end

Public Instance Methods

each() { |build_record| ... } click to toggle source

Loop through the MARC records in the XML document

# File lib/marc/xml_parsers.rb, line 171
def each
  unless block_given?
    return self.enum_for(:each)
  else
    while @parser.has_next?
      event = @parser.pull
      # if it's the start of a record element 
      if event.start_element? and strip_ns(event[0]) == 'record'
        yield build_record
      end
    end    
  end
end
init() click to toggle source

Sets our parser

# File lib/marc/xml_parsers.rb, line 166
def init
  @parser = REXML::Parsers::PullParser.new(@handle)
end

Private Instance Methods

build_record() click to toggle source

will accept parse events until a record has been built up

# File lib/marc/xml_parsers.rb, line 192
def build_record
  record = MARC::Record.new
  data_field = nil
  control_field = nil
  subfield = nil
  text = '' 
  attrs = nil
  if Module.constants.index('Nokogiri') and @parser.is_a?(Nokogiri::XML::Reader)
    datafield = nil
    cursor = nil
    open_elements = []
    @parser.each do | node |
      if node.value? && cursor
        if cursor.is_a?(Symbol) and cursor == :leader
          record.leader = node.value
        else
          cursor.value = node.value
        end
        cursor = nil
      end
      next unless node.namespace_uri == @ns
      if open_elements.index(node.local_name.downcase)
        open_elements.delete(node.local_name.downcase)
        next
      else
        open_elements << node.local_name.downcase
      end
      case node.local_name.downcase
      when "leader"
        cursor = :leader
      when "controlfield"
        record << datafield if datafield
        datafield = nil
        control_field = MARC::ControlField.new(node.attribute('tag'))
        record << control_field
        cursor = control_field
      when "datafield"  
        record << datafield if datafield
        datafield = nil
        data_field = MARC::DataField.new(node.attribute('tag'), node.attribute('ind1'), node.attribute('ind2'))
        datafield = data_field
      when "subfield"
        raise "No datafield to add to" unless datafield
        subfield = MARC::Subfield.new(node.attribute('code'))
        datafield.append(subfield)
        cursor = subfield
      when "record"
        record << datafield if datafield
        return record
      end          
      #puts node.name
    end
    
  else
    while @parser.has_next?
      event = @parser.pull

      if event.text?
        text += REXML::Text::unnormalize(event[0])
        next
      end

      if event.start_element?
        text = ''
        attrs = event[1]
        case strip_ns(event[0])
        when 'controlfield'
          text = ''
          control_field = MARC::ControlField.new(attrs['tag'])
        when 'datafield'
          text = ''
          data_field = MARC::DataField.new(attrs['tag'], attrs['ind1'], 
            attrs['ind2'])
        when 'subfield'
          text = ''
          subfield = MARC::Subfield.new(attrs['code'])
        end
      end

      if event.end_element?
        case strip_ns(event[0])
        when 'leader'
          record.leader = text
        when 'record'
          return record
        when 'controlfield'
          control_field.value = text
          record.append(control_field)
        when 'datafield'
          record.append(data_field)
        when 'subfield'
          subfield.value = text
          data_field.append(subfield)
        end
      end
    end
  end
end
strip_ns(str) click to toggle source
# File lib/marc/xml_parsers.rb, line 186
def strip_ns(str)
  return str.sub(/^.*:/, '')
end