|
Data Structures |
| class | bvector |
| | bitvector with runtime compression of bits. More...
|
| struct | gap_len_table |
| | Default GAP lengths table. More...
|
| struct | gap_len_table_min |
| | Alternative GAP lengths table. Good for for memory saver mode and very sparse bitsets. More...
|
| struct | bit_count_table |
| | Structure to aid in counting bits table contains count of bits in 0-255 diapason of numbers. More...
|
| struct | block_set_table |
| | Structure keeps all-left/right ON bits masks. More...
|
| struct | first_bit_table |
| | Structure keeps index of first ON bit for every byte. More...
|
| struct | all_set |
| | Structure carries pointer on bit block with all bits 1. More...
|
| struct | _copyright |
| | Internal structure. More...
|
| struct | globals |
| | Internal structure. More...
|
| class | miniset |
| | Template class implements memory saving set functionality. More...
|
| class | bvmini |
| | Mini bitvector used in bvector template to keep block type flags. More...
|
| class | bvector_mini |
| | Bitvector class with very limited functionality. More...
|
| class | encoder |
| | Memory encoding. More...
|
| class | decoder_base |
| | Base class for all decoding functionality. More...
|
| class | decoder |
| | Class for decoding data from memory buffer. More...
|
| class | decoder_little_endian |
| | Class for decoding data from memory buffer. More...
|
| class | sse2_empty_guard |
| | SSE2 reinitialization guard class. More...
|
| struct | distance_metric_descriptor |
| | Distance metric descriptor, holds metric code and result. More...
|
Typedefs |
typedef bm::miniset< bm::block_allocator,
bm::set_total_blocks > | mem_save_set |
| typedef decoder | decoder_big_endian |
| | Class for decoding data from memory buffer.
|
| typedef unsigned long long | id64_t |
| typedef unsigned int | id_t |
| typedef unsigned int | word_t |
| typedef unsigned short | short_t |
| typedef unsigned short | gap_word_t |
| typedef id64_t | wordop_t |
Enumerations |
| enum | operation { BM_AND = 0,
BM_OR,
BM_SUB,
BM_XOR
} |
| | Bit operations enumeration. More...
|
| enum | ByteOrder { BigEndian = 0,
LittleEndian = 1
} |
| | Byte orders recognized by the library. More...
|
| enum | strategy { BM_BIT = 0,
BM_GAP = 1
} |
| | Block allocation strategies. More...
|
| enum | distance_metric {
COUNT_AND,
COUNT_XOR,
COUNT_OR,
COUNT_SUB_AB,
COUNT_SUB_BA,
COUNT_A,
COUNT_B
} |
| | Distance metrics codes defined for vectors A and B. More...
|
Functions |
| template<class Alloc, class MS> |
| bvector< Alloc, MS > | operator & (const bvector< Alloc, MS > &v1, const bvector< Alloc, MS > &v2) |
| template<class Alloc, class MS> |
| bvector< Alloc, MS > | operator| (const bvector< Alloc, MS > &v1, const bvector< Alloc > &v2) |
| template<class Alloc, class MS> |
| bvector< Alloc, MS > | operator^ (const bvector< Alloc, MS > &v1, const bvector< Alloc, MS > &v2) |
| template<class Alloc, class MS> |
| bvector< Alloc, MS > | operator- (const bvector< Alloc, MS > &v1, const bvector< Alloc, MS > &v2) |
| bm::id_t | word_bitcount64 (bm::id64_t w) |
| template<typename W> |
| void | xor_swap (W &x, W &y) |
| | XOR swap two scalar variables.
|
| template<typename T> |
| int | wordcmp0 (T w1, T w2) |
| | Lexicographical comparison of two words as bit strings. Auxiliary implementation for testing and reference purposes.
|
| template<typename T> |
| int | wordcmp (T a, T b) |
| | Lexicographical comparison of two words as bit strings. Auxiliary implementation for testing and reference purposes.
|
| template<typename T> |
| unsigned | gap_bfind (const T *buf, unsigned pos, unsigned *is_set) |
| template<typename T> |
| unsigned | gap_test (const T *buf, unsigned pos) |
| | Tests if bit = pos is true.
|
| template<class T, class F> |
| void | for_each_nzblock (T ***root, unsigned size1, unsigned size2, F &f) |
| template<class T, class F> |
| bool | for_each_nzblock_if (T ***root, unsigned size1, unsigned size2, F &f) |
| template<class T, class F> |
| void | for_each_block (T ***root, unsigned size1, unsigned size2, F &f) |
| template<class T, class F> |
| F | bmfor_each (T first, T last, F f) |
| template<class T> |
| T | sum_arr (T *first, T *last) |
| template<typename T> |
| unsigned | gap_bit_count (const T *buf) |
| | Calculates number of bits ON in GAP buffer.
|
| template<typename T> |
| unsigned | gap_bit_count_range (const T *buf, T left, T right) |
| | Counts 1 bits in GAP buffer in the closed [left, right] diapason.
|
| template<typename T> |
| int | gapcmp (const T *buf1, const T *buf2) |
| | Lexicographical comparison of GAP buffers.
|
| template<typename T, class F> |
| void | gap_buff_op (T *BMRESTRICT dest, const T *BMRESTRICT vect1, unsigned vect1_mask, const T *BMRESTRICT vect2, unsigned vect2_mask, F f) |
| | Abstract operation for GAP buffers. Receives functor F as a template argument.
|
| template<typename T> |
| unsigned | gap_set_value (unsigned val, T *BMRESTRICT buf, unsigned pos, unsigned *BMRESTRICT is_set) |
| | Abstract distance(similarity) operation for GAP buffers. Receives functor F as a template argument Sets or clears bit in the GAP buffer.
|
| template<typename T> |
| int | gap_find_in_block (const T *buf, unsigned nbit, bm::id_t *prev) |
| | Searches for the next 1 bit in the GAP block.
|
| void | or_bit_block (unsigned *dest, unsigned bitpos, unsigned bitcount) |
| | Sets bits to 1 in the bitblock.
|
| void | sub_bit_block (unsigned *dest, unsigned bitpos, unsigned bitcount) |
| | SUB (AND NOT) bit interval to 1 in the bitblock.
|
| void | xor_bit_block (unsigned *dest, unsigned bitpos, unsigned bitcount) |
| | XOR bit interval to 1 in the bitblock.
|
| template<typename T> |
| void | gap_sub_to_bitset (unsigned *dest, const T *buf) |
| | SUB (AND NOT) GAP block to bitblock.
|
| template<typename T> |
| void | gap_xor_to_bitset (unsigned *dest, const T *buf) |
| | XOR GAP block to bitblock.
|
| template<typename T> |
| void | gap_add_to_bitset (unsigned *dest, const T *buf) |
| | Adds(OR) GAP block to bitblock.
|
| template<typename T> |
| void | gap_and_to_bitset (unsigned *dest, const T *buf) |
| | ANDs GAP block to bitblock.
|
| template<typename T> |
| bm::id_t | gap_bitset_and_count (const unsigned *block, const T *buf) |
| | Compute bitcount of bit block AND masked by GAP block.
|
| template<typename T> |
| bm::id_t | gap_bitset_sub_count (const unsigned *block, const T *buf) |
| | Compute bitcount of bit block SUB masked by GAP block.
|
| template<typename T> |
| bm::id_t | gap_bitset_xor_count (const unsigned *block, const T *buf) |
| | Compute bitcount of bit block XOR masked by GAP block.
|
| template<typename T> |
| bm::id_t | gap_bitset_or_count (const unsigned *block, const T *buf) |
| | Compute bitcount of bit block OR masked by GAP block.
|
| void | bit_block_set (bm::word_t *BMRESTRICT dst, bm::word_t value) |
| | Bitblock memset operation.
|
| template<typename T> |
| void | gap_convert_to_bitset (unsigned *dest, const T *buf) |
| | GAP block to bitblock conversion.
|
| template<typename T> |
| void | gap_convert_to_bitset (unsigned *dest, const T *buf, unsigned dest_len) |
| | GAP block to bitblock conversion.
|
| template<typename T> |
| unsigned * | gap_convert_to_bitset_smart (unsigned *dest, const T *buf, id_t set_max) |
| | Smart GAP block to bitblock conversion.
|
| template<typename T> |
| unsigned | gap_control_sum (const T *buf) |
| | Calculates sum of all words in GAP block. (For debugging purposes).
|
| template<class T> |
| void | gap_set_all (T *buf, unsigned set_max, unsigned value) |
| | Sets all bits to 0 or 1 (GAP).
|
| template<class T> |
| void | gap_init_range_block (T *buf, unsigned from, unsigned to, unsigned value, unsigned set_max) |
| | Init gap block so it has block in it (can be whole block).
|
| template<typename T> |
| void | gap_invert (T *buf) |
| | Inverts all bits in the GAP buffer.
|
| template<typename T> |
| bool | gap_is_all_zero (const T *buf, unsigned set_max) |
| | Temporary inverts all bits in the GAP buffer. Checks if GAP block is all-zero.
|
| template<typename T> |
| bool | gap_is_all_one (const T *buf, unsigned set_max) |
| | Checks if GAP block is all-one.
|
| template<typename T> |
| unsigned | gap_length (const T *buf) |
| | Returs GAP block length.
|
| template<typename T> |
| unsigned | gap_capacity (const T *buf, const T *glevel_len) |
| | Returs GAP block capacity.
|
| template<typename T> |
| unsigned | gap_limit (const T *buf, const T *glevel_len) |
| | Returs GAP block capacity limit.
|
| template<typename T> |
| unsigned | gap_level (const T *buf) |
| | Returs GAP blocks capacity level.
|
| template<typename T> |
| void | set_gap_level (T *buf, unsigned level) |
| | Sets GAP block capacity level.
|
| template<typename T> |
| int | gap_calc_level (int len, const T *glevel_len) |
| | Calculates GAP block capacity level.
|
| template<typename T> |
| unsigned | gap_free_elements (const T *buf, const T *glevel_len) |
| | Returns number of free elements in GAP block array. Difference between GAP block capacity on this level and actual GAP length.
|
| template<typename T> |
| int | bitcmp (const T *buf1, const T *buf2, unsigned len) |
| | Lexicographical comparison of BIT buffers.
|
| template<typename T> |
| unsigned | bit_convert_to_gap (T *BMRESTRICT dest, const unsigned *BMRESTRICT src, bm::id_t bits, unsigned dest_len) |
| | Converts bit block to GAP.
|
| template<typename D, typename T> |
| D | gap_convert_to_arr (D *BMRESTRICT dest, const T *BMRESTRICT buf, unsigned dest_len) |
| | Convert gap block into array of ints corresponding to 1 bits.
|
| template<typename T> |
| T | bit_convert_to_arr (T *BMRESTRICT dest, const unsigned *BMRESTRICT src, bm::id_t bits, unsigned dest_len) |
| | Convert bit block into an array of ints corresponding to 1 bits.
|
| bm::id_t | bit_block_calc_count (const bm::word_t *block, const bm::word_t *block_end) |
| | Bitcount for bit string.
|
| bm::id_t | bit_count_change (bm::word_t w) |
| bm::id_t | bit_block_calc_count_change (const bm::word_t *block, const bm::word_t *block_end) |
| bm::id_t | bit_block_calc_count_range (const bm::word_t *block, bm::word_t left, bm::word_t right) |
| template<typename T> |
| void | bit_invert (T *start, T *end) |
| bool | is_bits_one (const bm::wordop_t *start, const bm::wordop_t *end) |
| | Returns "true" if all bits in the block are 1.
|
| bool | bit_is_all_zero (const bm::wordop_t *start, const bm::wordop_t *end) |
| | Returns "true" if all bits in the block are 0.
|
| unsigned | and_op (unsigned v1, unsigned v2) |
| | GAP and functor.
|
| unsigned | xor_op (unsigned v1, unsigned v2) |
| | GAP xor functor.
|
| gap_word_t * | gap_operation_and (const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2, gap_word_t *BMRESTRICT tmp_buf) |
| | GAP AND operation.
|
| gap_word_t * | gap_operation_xor (const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2, gap_word_t *BMRESTRICT tmp_buf) |
| | GAP XOR operation.
|
| gap_word_t * | gap_operation_or (const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2, gap_word_t *BMRESTRICT tmp_buf) |
| | GAP OR operation.
|
| gap_word_t * | gap_operation_sub (const gap_word_t *BMRESTRICT vect1, const gap_word_t *BMRESTRICT vect2, gap_word_t *BMRESTRICT tmp_buf) |
| | GAP SUB (AND NOT) operation.
|
| void | bit_block_copy (bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) |
| | Bitblock copy operation.
|
| void | bit_block_and (bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) |
| | Plain bitblock AND operation. Function does not analyse availability of source and destination blocks.
|
| unsigned | bit_block_and_count (const bm::word_t *src1, const bm::word_t *src1_end, const bm::word_t *src2) |
| | Function ANDs two bitblocks and computes the bitcount. Function does not analyse availability of source blocks.
|
| unsigned | bit_block_xor_count (const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src1_end, const bm::word_t *BMRESTRICT src2) |
| | Function XORs two bitblocks and computes the bitcount. Function does not analyse availability of source blocks.
|
| unsigned | bit_block_sub_count (const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src1_end, const bm::word_t *BMRESTRICT src2) |
| | Function SUBs two bitblocks and computes the bitcount. Function does not analyse availability of source blocks.
|
| unsigned | bit_block_or_count (const bm::word_t *src1, const bm::word_t *src1_end, const bm::word_t *src2) |
| | Function ORs two bitblocks and computes the bitcount. Function does not analyse availability of source blocks.
|
| bm::word_t * | bit_operation_and (bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) |
| | bitblock AND operation.
|
| bm::id_t | bit_operation_and_count (const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src1_end, const bm::word_t *BMRESTRICT src2) |
| | Performs bitblock AND operation and calculates bitcount of the result.
|
| bm::id_t | bit_operation_sub_count (const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src1_end, const bm::word_t *BMRESTRICT src2) |
| | Performs bitblock SUB operation and calculates bitcount of the result.
|
| bm::id_t | bit_operation_or_count (const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src1_end, const bm::word_t *BMRESTRICT src2) |
| | Performs bitblock OR operation and calculates bitcount of the result.
|
| void | bit_block_or (bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) |
| | Plain bitblock OR operation. Function does not analyse availability of source and destination blocks.
|
| bm::word_t * | bit_operation_or (bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) |
| | Block OR operation. Makes analysis if block is 0 or FULL.
|
| void | bit_block_sub (bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) |
| | Plain bitblock SUB (AND NOT) operation. Function does not analyse availability of source and destination blocks.
|
| bm::word_t * | bit_operation_sub (bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) |
| | bitblock SUB operation.
|
| void | bit_block_xor (bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) |
| | Plain bitblock XOR operation. Function does not analyse availability of source and destination blocks.
|
| bm::word_t * | bit_operation_xor (bm::word_t *BMRESTRICT dst, const bm::word_t *BMRESTRICT src) |
| | bitblock XOR operation.
|
| bm::id_t | bit_operation_xor_count (const bm::word_t *BMRESTRICT src1, const bm::word_t *BMRESTRICT src1_end, const bm::word_t *BMRESTRICT src2) |
| | Performs bitblock XOR operation and calculates bitcount of the result.
|
| void | bit_find_head_tail (const bm::word_t *data, unsigned *head_idx, unsigned *tail_idx) |
| | Inspects bit block for zero words at the head and at the end.
|
| int | bit_find_in_block (const bm::word_t *data, unsigned nbit, bm::id_t *prev) |
| | Searches for the next 1 bit in the BIT block.
|
| template<typename T, typename B> |
| unsigned | bit_list (T w, B *bits) |
| | Unpacks word into list of ON bit indexes.
|
| template<typename T> |
| unsigned | gap_overhead (const T *length, const T *length_end, const T *glevel_len) |
| | Calculates memory overhead for number of gap blocks sharing the same memory allocation table (level lengths table).
|
| template<typename T> |
| bool | improve_gap_levels (const T *length, const T *length_end, T *glevel_len) |
| | Finds optimal gap blocks lengths.
|
| BMFORCEINLINE void | sse2_xor_arr_2_mask (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end, bm::word_t mask) |
| | XOR array elements to specified mask dst = *src ^ mask.
|
| BMFORCEINLINE void | sse2_andnot_arr_2_mask (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end, bm::word_t mask) |
| | Inverts array elements and NOT them to specified mask dst = ~*src & mask.
|
| BMFORCEINLINE void | sse2_and_arr (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end) |
| | AND array elements against another array dst &= *src.
|
| BMFORCEINLINE void | sse2_or_arr (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end) |
| | OR array elements against another array dst |= *src.
|
| BMFORCEINLINE void | sse2_xor_arr (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end) |
| | OR array elements against another array dst |= *src.
|
| BMFORCEINLINE void | sse2_sub_arr (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end) |
| | AND-NOT (SUB) array elements against another array dst &= ~*src.
|
| BMFORCEINLINE void | sse2_set_block (__m128i *BMRESTRICT dst, __m128i *BMRESTRICT dst_end, bm::word_t value) |
| | SSE2 block memset dst = value.
|
| BMFORCEINLINE void | sse2_copy_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end) |
| | SSE2 block copy dst = *src.
|
| BMFORCEINLINE void | sse2_invert_arr (bm::word_t *first, bm::word_t *last) |
| | Invert array elements dst = ~*dst or dst ^= *dst.
|
| bm::id_t | sse2_bit_count (const __m128i *block, const __m128i *block_end) |
| BMFORCEINLINE __m128i | sse2_and (__m128i a, __m128i b) |
| BMFORCEINLINE __m128i | sse2_or (__m128i a, __m128i b) |
| BMFORCEINLINE __m128i | sse2_xor (__m128i a, __m128i b) |
| BMFORCEINLINE __m128i | sse2_sub (__m128i a, __m128i b) |
| template<class Func> |
| bm::id_t | sse2_bit_count_op (const __m128i *BMRESTRICT block, const __m128i *BMRESTRICT block_end, const __m128i *BMRESTRICT mask_block, Func sse2_func) |
| void | combine_count_operation_with_block (const bm::word_t *blk, unsigned gap, const bm::word_t *arg_blk, int arg_gap, bm::word_t *temp_blk, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end) |
| | Internal function computes different distance metrics.
|
| template<class BV> |
| void | distance_operation (const BV &bv1, const BV &bv2, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end) |
| | Distance computing template function.
|
| template<class BV> |
| bm::id_t | count_and (const BV &bv1, const BV &bv2) |
| | Computes bitcount of AND operation of two bitsets.
|
| template<class BV> |
| bm::id_t | count_xor (const BV &bv1, const BV &bv2) |
| | Computes bitcount of XOR operation of two bitsets.
|
| template<class BV> |
| bm::id_t | count_sub (const BV &bv1, const BV &bv2) |
| | Computes bitcount of SUB operation of two bitsets.
|
| template<class BV> |
| bm::id_t | count_or (const BV &bv1, const BV &bv2) |
| | Computes bitcount of OR operation of two bitsets.
|
| template<class It> |
| It | block_range_scan (It first, It last, unsigned nblock, unsigned *max_id) |
| | Internal algorithms scans the input for the block range limit.
|
| template<class BV, class It> |
| void | combine_or (BV &bv, It first, It last) |
| | OR Combine bitvector and the iterable sequence.
|
| template<class BV, class It> |
| void | combine_xor (BV &bv, It first, It last) |
| | XOR Combine bitvector and the iterable sequence.
|
| template<class BV, class It> |
| void | combine_sub (BV &bv, It first, It last) |
| | SUB Combine bitvector and the iterable sequence.
|
| template<class BV, class It> |
| void | combine_and (BV &bv, It first, It last) |
| | AND Combine bitvector and the iterable sequence.
|
| template<class BV> |
| bm::id_t | count_intervals (const BV &bv) |
| | Compute number of bit intervals (GAPs) in the bitvector.
|
| template<class BV, class It> |
| void | export_array (BV &bv, It first, It last) |
| | Export bitset from an array of binary data representing the bit vector.
|
Variables |
| const unsigned | id_max = 0xFFFFFFFF |
| const unsigned | set_block_size = 2048u |
| const unsigned | set_block_shift = 16u |
| const unsigned | set_block_mask = 0xFFFFu |
| const unsigned | set_blkblk_mask = 0xFFFFFFu |
| const unsigned | set_word_shift = 5u |
| const unsigned | set_word_mask = 0x1Fu |
| const unsigned | gap_max_buff_len = 1280 |
| const unsigned | gap_max_bits = 65536 |
| const unsigned | gap_equiv_len |
| const unsigned | gap_levels = 4 |
| const unsigned | gap_max_level = bm::gap_levels - 1 |
| const unsigned | set_array_size = 256u |
| const unsigned | set_array_shift = 8u |
| const unsigned | set_array_mask = 0xFFu |
| const unsigned | set_total_blocks = (bm::set_array_size * bm::set_array_size) |
| const unsigned | bits_in_block = bm::set_block_size * sizeof(bm::word_t) * 8 |
| const unsigned | bits_in_array = bm::bits_in_block * bm::set_array_size |
| const id64_t | all_bits_mask = 0xffffffffffffffff |
| const unsigned | set_block_size_op = bm::set_block_size / 2 |