public class HtmlModule extends ModuleBase
Modifier and Type | Field and Description |
---|---|
protected ChecksumInputStream |
_cstream
PRIVATE INSTANCE FIELDS.
|
protected java.lang.String |
_doctype |
protected java.io.DataInputStream |
_dstream |
protected TextMDMetadata |
_textMD |
protected boolean |
_withTextMD |
static int |
HTML_3_2 |
static int |
HTML_4_0_FRAMESET |
static int |
HTML_4_0_STRICT |
static int |
HTML_4_0_TRANSITIONAL |
static int |
HTML_4_01_FRAMESET |
static int |
HTML_4_01_STRICT |
static int |
HTML_4_01_TRANSITIONAL |
static int |
XHTML_1_0_FRAMESET |
static int |
XHTML_1_0_STRICT |
static int |
XHTML_1_0_TRANSITIONAL |
static int |
XHTML_1_1 |
_app, _bigEndian, _checksumFinished, _countStream, _coverage, _crc32, _date, _defaultParams, _features, _format, _init, _isRandomAccess, _je, _logger, _md5, _mimeType, _name, _nByte, _note, _param, _release, _repInfoNote, _rights, _sha1, _signature, _specification, _validityNote, _vendor, _verbosity, _wellFormedNote
MAXIMUM_VERBOSITY, MINIMUM_VERBOSITY
Constructor and Description |
---|
HtmlModule()
Instantiate an HtmlModule object.
|
Modifier and Type | Method and Description |
---|---|
protected int |
checkDoctype(java.util.List elements) |
void |
checkSignatures(java.io.File file,
java.io.InputStream stream,
RepInfo info)
Check if the digital object conforms to this Module's
internal signature information.
|
protected static boolean |
isXmlAvailable() |
int |
parse(java.io.InputStream stream,
RepInfo info,
int parseIndex)
Parse the content of a purported HTML stream digital object and store the
results in RepInfo.
|
protected int |
seemsToBeXHTML(java.util.List elements) |
protected java.lang.String |
stripQuotes(java.lang.String str) |
addIntegerProperty, addIntegerProperty, applyDefaultParams, calcRAChecksum, checkSignatures, getApp, getBase, getBufferedDataStream, getCoverage, getCRC32, getDate, getDefaultParams, getFeatures, getFormat, getMimeType, getName, getNByte, getNote, getRelease, getRepInfoNote, getRights, getSignature, getSpecification, getValidityNote, getVendor, getWellFormedNote, hasFeature, init, initFeatures, initParse, isBigEndian, isRandomAccess, param, parse, readByteBuf, readDouble, readDouble, readDouble, readFloat, readFloat, readSignedByte, readSignedByte, readSignedByte, readSignedInt, readSignedInt, readSignedInt, readSignedLong, readSignedRational, readSignedRational, readSignedShort, readSignedShort, readSignedShort, readUnsignedByte, readUnsignedByte, readUnsignedByte, readUnsignedInt, readUnsignedInt, readUnsignedInt, readUnsignedRational, readUnsignedRational, readUnsignedRational, readUnsignedShort, readUnsignedShort, readUnsignedShort, resetParams, setApp, setBase, setChecksums, setCRC32, setDefaultParams, setMD5, setNByte, setSHA1, setValidityNote, setVerbosity, show, skipBytes, skipBytes, vectorToPropArray
protected ChecksumInputStream _cstream
protected java.io.DataInputStream _dstream
protected java.lang.String _doctype
public static final int HTML_3_2
public static final int HTML_4_0_STRICT
public static final int HTML_4_0_FRAMESET
public static final int HTML_4_0_TRANSITIONAL
public static final int HTML_4_01_STRICT
public static final int HTML_4_01_FRAMESET
public static final int HTML_4_01_TRANSITIONAL
public static final int XHTML_1_0_STRICT
public static final int XHTML_1_0_TRANSITIONAL
public static final int XHTML_1_0_FRAMESET
public static final int XHTML_1_1
protected boolean _withTextMD
protected TextMDMetadata _textMD
public int parse(java.io.InputStream stream, RepInfo info, int parseIndex) throws java.io.IOException
parse
in interface Module
parse
in class ModuleBase
stream
- An InputStream, positioned at its beginning,
which is generated from the object to be parsed.
If multiple calls to parse
are made
on the basis of a nonzero value being returned,
a new InputStream must be provided each time.info
- A fresh (on the first call) RepInfo object
which will be modified
to reflect the results of the parsing
If multiple calls to parse
are made
on the basis of a nonzero value being returned,
the same RepInfo object should be passed with each
call.parseIndex
- Must be 0 in first call to parse
. If
parse
returns a nonzero value, it must be
called again with parseIndex
equal to that return value.java.io.IOException
public void checkSignatures(java.io.File file, java.io.InputStream stream, RepInfo info) throws java.io.IOException
checkSignatures
in interface Module
checkSignatures
in class ModuleBase
file
- A File object for the object being parsedstream
- An InputStream, positioned at its beginning,
which is generated from the object to be parsedinfo
- A fresh RepInfo object which will be modified
to reflect the results of the testjava.io.IOException
protected int checkDoctype(java.util.List elements)
protected int seemsToBeXHTML(java.util.List elements)
protected java.lang.String stripQuotes(java.lang.String str)
protected static boolean isXmlAvailable()