librsync  2.0.2
sumset.c
1 /*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*-
2  *
3  * librsync -- library for network deltas
4  *
5  * Copyright (C) 1999, 2000, 2001 by Martin Pool <mbp@sourcefrog.net>
6  * Copyright (C) 1999 by Andrew Tridgell
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU Lesser General Public License as published by
10  * the Free Software Foundation; either version 2.1 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21  */
22 
23 #include "config.h"
24 
25 #include <assert.h>
26 #include <stddef.h>
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <string.h>
30 
31 #include "librsync.h"
32 #include "sumset.h"
33 #include "util.h"
34 #include "trace.h"
35 
36 const int RS_MD4_SUM_LENGTH = 16;
37 const int RS_BLAKE2_SUM_LENGTH = 32;
38 
39 static void rs_block_sig_init(rs_block_sig_t *sig, rs_weak_sum_t weak_sum,
40  rs_strong_sum_t *strong_sum, int strong_len)
41 {
42  sig->weak_sum = weak_sum;
43  if (strong_sum)
44  memcpy(sig->strong_sum, strong_sum, strong_len);
45 }
46 
47 static inline unsigned rs_block_sig_hash(const rs_block_sig_t *sig)
48 {
49  return (unsigned)sig->weak_sum;
50 }
51 
52 typedef struct rs_block_match {
53  rs_block_sig_t block_sig;
54  rs_signature_t *signature;
55  const void *buf;
56  size_t len;
58 
59 static void rs_block_match_init(rs_block_match_t *match, rs_signature_t *sig,
60  rs_weak_sum_t weak_sum,
61  rs_strong_sum_t *strong_sum, const void *buf,
62  size_t len)
63 {
64  rs_block_sig_init(&match->block_sig, weak_sum, strong_sum,
65  sig->strong_sum_len);
66  match->signature = sig;
67  match->buf = buf;
68  match->len = len;
69 }
70 
71 static inline int rs_block_match_cmp(rs_block_match_t *match,
72  const rs_block_sig_t *block_sig)
73 {
74  /* If buf is not NULL, the strong sum is yet to be calculated. */
75  if (match->buf) {
76 #ifndef HASHTABLE_NSTATS
77  match->signature->calc_strong_count++;
78 #endif
79  rs_signature_calc_strong_sum(match->signature, match->buf, match->len,
80  &(match->block_sig.strong_sum));
81  match->buf = NULL;
82  }
83  return memcmp(&match->block_sig.strong_sum, &block_sig->strong_sum,
84  match->signature->strong_sum_len);
85 }
86 
87 /* Instantiate hashtable for rs_block_sig and rs_block_match. */
88 #define ENTRY rs_block_sig
89 #define MATCH rs_block_match
90 #define NAME hashtable
91 #include "hashtable.h"
92 
93 /* Get the size of a packed rs_block_sig_t. */
94 static inline size_t rs_block_sig_size(const rs_signature_t *sig)
95 {
96  /* Round up to next multiple of sizeof(weak_sum) to align memory correctly.
97  */
98  return offsetof(rs_block_sig_t,
99  strong_sum) + ((sig->strong_sum_len +
100  sizeof(rs_weak_sum_t)-
101  1) / sizeof(rs_weak_sum_t)) *
102  sizeof(rs_weak_sum_t);
103 }
104 
105 /* Get the pointer to the block_sig_t from a block index. */
106 static inline rs_block_sig_t *rs_block_sig_ptr(const rs_signature_t *sig,
107  int block_idx)
108 {
109  return (rs_block_sig_t *)((char *)sig->block_sigs +
110  block_idx * rs_block_sig_size(sig));
111 }
112 
113 /* Get the index of a block from a block_sig_t pointer. */
114 static inline int rs_block_sig_idx(const rs_signature_t *sig,
115  rs_block_sig_t *block_sig)
116 {
117  return ((char *)block_sig -
118  (char *)sig->block_sigs) / rs_block_sig_size(sig);
119 }
120 
121 rs_result rs_signature_init(rs_signature_t *sig, int magic, int block_len,
122  int strong_len, rs_long_t sig_fsize)
123 {
124  int max_strong_len;
125 
126  /* Check and set default arguments. */
127  magic = magic ? magic : RS_BLAKE2_SIG_MAGIC;
128  switch (magic) {
129  case RS_BLAKE2_SIG_MAGIC:
130  max_strong_len = RS_BLAKE2_SUM_LENGTH;
131  break;
132  case RS_MD4_SIG_MAGIC:
133  max_strong_len = RS_MD4_SUM_LENGTH;
134  break;
135  default:
136  rs_error("invalid magic %#x", magic);
137  return RS_BAD_MAGIC;
138  }
139  strong_len = strong_len ? strong_len : max_strong_len;
140  if (strong_len < 1 || max_strong_len < strong_len) {
141  rs_error("invalid strong_sum_len %d for magic %#x", strong_len, magic);
142  return RS_PARAM_ERROR;
143  }
144  /* Set attributes from args. */
145  sig->magic = magic;
146  sig->block_len = block_len;
147  sig->strong_sum_len = strong_len;
148  sig->count = 0;
149  /* Calculate the number of blocks if we have the signature file size. */
150  /* Magic+header is 12 bytes, each block thereafter is 4 bytes
151  weak_sum+strong_sum_len bytes */
152  sig->size = (int)(sig_fsize ? (sig_fsize - 12) / (4 + strong_len) : 0);
153  if (sig->size)
154  sig->block_sigs =
155  rs_alloc(sig->size * rs_block_sig_size(sig),
156  "signature->block_sigs");
157  else
158  sig->block_sigs = NULL;
159  sig->hashtable = NULL;
160 #ifndef HASHTABLE_NSTATS
161  sig->calc_strong_count = 0;
162 #endif
163  rs_signature_check(sig);
164  return RS_DONE;
165 }
166 
167 void rs_signature_done(rs_signature_t *sig)
168 {
169  hashtable_free(sig->hashtable);
170  rs_bzero(sig, sizeof(*sig));
171 }
172 
173 rs_block_sig_t *rs_signature_add_block(rs_signature_t *sig,
174  rs_weak_sum_t weak_sum,
175  rs_strong_sum_t *strong_sum)
176 {
177  rs_signature_check(sig);
178  /* If block_sigs is full, allocate more space. */
179  if (sig->count == sig->size) {
180  sig->size = sig->size ? sig->size * 2 : 16;
181  sig->block_sigs =
182  rs_realloc(sig->block_sigs, sig->size * rs_block_sig_size(sig),
183  "signature->block_sigs");
184  }
185  rs_block_sig_t *b = rs_block_sig_ptr(sig, sig->count++);
186  rs_block_sig_init(b, weak_sum, strong_sum, sig->strong_sum_len);
187  return b;
188 }
189 
190 rs_long_t rs_signature_find_match(rs_signature_t *sig, rs_weak_sum_t weak_sum,
191  void const *buf, size_t len)
192 {
194  rs_block_sig_t *b;
195 
196  rs_signature_check(sig);
197  rs_block_match_init(&m, sig, weak_sum, NULL, buf, len);
198  if ((b = hashtable_find(sig->hashtable, &m))) {
199  return (rs_long_t)rs_block_sig_idx(sig, b) * sig->block_len;
200  }
201  return -1;
202 }
203 
204 void rs_signature_log_stats(rs_signature_t const *sig)
205 {
206 #ifndef HASHTABLE_NSTATS
207  hashtable_t *t = sig->hashtable;
208 
209  rs_log(RS_LOG_INFO | RS_LOG_NONAME,
210  "match statistics: signature[%ld searches, %ld (%.3f%%) matches, "
211  "%ld (%.3fx) weak sum compares, %ld (%.3f%%) strong sum compares, "
212  "%ld (%.3f%%) strong sum calcs]", t->find_count, t->match_count,
213  100.0 * (double)t->match_count / t->find_count, t->hashcmp_count,
214  (double)t->hashcmp_count / t->find_count, t->entrycmp_count,
215  100.0 * (double)t->entrycmp_count / t->find_count,
216  sig->calc_strong_count,
217  100.0 * (double)sig->calc_strong_count / t->find_count);
218 #endif
219 }
220 
222 {
224  rs_block_sig_t *b;
225  int i;
226 
227  rs_signature_check(sig);
228  sig->hashtable = hashtable_new(sig->count);
229  if (!sig->hashtable)
230  return RS_MEM_ERROR;
231  for (i = 0; i < sig->count; i++) {
232  b = rs_block_sig_ptr(sig, i);
233  rs_block_match_init(&m, sig, b->weak_sum, &b->strong_sum, NULL, 0);
234  if (!hashtable_find(sig->hashtable, &m))
235  hashtable_add(sig->hashtable, b);
236  }
237  hashtable_stats_init(sig->hashtable);
238  return RS_DONE;
239 }
240 
242 {
243  rs_signature_done(psums);
244  free(psums);
245 }
246 
248 {
249  int i;
250  rs_block_sig_t *b;
251  char strong_hex[RS_MAX_STRONG_SUM_LENGTH * 3];
252 
253  rs_log(RS_LOG_INFO | RS_LOG_NONAME,
254  "sumset info: magic=%#x, block_len=%d, block_num=%d", sums->magic,
255  sums->block_len, sums->count);
256 
257  for (i = 0; i < sums->count; i++) {
258  b = rs_block_sig_ptr(sums, i);
259  rs_hexify(strong_hex, b->strong_sum, sums->strong_sum_len);
260  rs_log(RS_LOG_INFO | RS_LOG_NONAME,
261  "sum %6d: weak=" FMT_WEAKSUM ", strong=%s", i, b->weak_sum,
262  strong_hex);
263  }
264 }
int size
Total number of blocks allocated.
Definition: sumset.h:42
hashtable_t * hashtable
The hashtable for finding matches.
Definition: sumset.h:44
rs_result rs_build_hash_table(rs_signature_t *sums)
Call this after loading a signature to index it.
Definition: sumset.c:221
logging functions.
int count
Total number of blocks.
Definition: sumset.h:41
long hashcmp_count
The count of hash compares done.
Definition: hashtable.h:133
int block_len
The block length.
Definition: sumset.h:39
A signature file using the BLAKE2 hash.
Definition: librsync.h:92
Bad value passed in to library, probably an application bug.
Definition: librsync.h:181
Out of memory.
Definition: librsync.h:170
long match_count
The count of matches found.
Definition: hashtable.h:132
Bad magic number at start of stream.
Definition: librsync.h:174
void rs_hexify(char *to_buf, void const *from_buf, int from_len)
Convert from_len bytes at from_buf into a hex representation in to_buf, which must be twice as long p...
Definition: hex.c:32
rs_weak_sum_t weak_sum
Block&#39;s weak checksum.
Definition: sumset.h:29
int strong_sum_len
The block strong sum length.
Definition: sumset.h:40
void rs_free_sumset(rs_signature_t *)
Deep deallocation of checksums.
Definition: sumset.c:241
Don&#39;t show function name in message.
Definition: trace.h:90
A signature file with MD4 signatures.
Definition: librsync.h:85
long entrycmp_count
The count of entry compares done.
Definition: hashtable.h:134
void * block_sigs
The packed block_sigs for all blocks.
Definition: sumset.h:43
Public header for librsync.
Signature of a whole file.
Definition: sumset.h:37
Informational.
Definition: librsync.h:107
rs_result
Return codes from nonblocking rsync operations.
Definition: librsync.h:161
int magic
The signature magic value.
Definition: sumset.h:38
long calc_strong_count
The count of strongsum calcs done.
Definition: sumset.h:47
void rs_sumset_dump(rs_signature_t const *)
Dump signatures to the log.
Definition: sumset.c:247
Signature of a single block.
Definition: sumset.h:28
long find_count
The count of finds tried.
Definition: hashtable.h:131
The hashtable type.
Definition: hashtable.h:126
Completed successfully.
Definition: librsync.h:162
A generic open addressing hashtable.
rs_strong_sum_t strong_sum
Block&#39;s strong checksum.
Definition: sumset.h:30