Line data Source code
1 : /*
2 : Unix SMB/CIFS implementation.
3 :
4 : trivial database library
5 :
6 : Copyright (C) Volker Lendecke 2012,2013
7 : Copyright (C) Stefan Metzmacher 2013,2014
8 : Copyright (C) Michael Adam 2014
9 :
10 : ** NOTE! The following LGPL license applies to the tdb
11 : ** library. This does NOT imply that all of Samba is released
12 : ** under the LGPL
13 :
14 : This library is free software; you can redistribute it and/or
15 : modify it under the terms of the GNU Lesser General Public
16 : License as published by the Free Software Foundation; either
17 : version 3 of the License, or (at your option) any later version.
18 :
19 : This library is distributed in the hope that it will be useful,
20 : but WITHOUT ANY WARRANTY; without even the implied warranty of
21 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 : Lesser General Public License for more details.
23 :
24 : You should have received a copy of the GNU Lesser General Public
25 : License along with this library; if not, see <http://www.gnu.org/licenses/>.
26 : */
27 : #include "tdb_private.h"
28 : #include "system/threads.h"
29 :
30 : #ifdef USE_TDB_MUTEX_LOCKING
31 :
32 : /*
33 : * If we run with mutexes, we store the "struct tdb_mutexes" at the
34 : * beginning of the file. We store an additional tdb_header right
35 : * beyond the mutex area, page aligned. All the offsets within the tdb
36 : * are relative to the area behind the mutex area. tdb->map_ptr points
37 : * behind the mmap area as well, so the read and write path in the
38 : * mutex case can remain unchanged.
39 : *
40 : * Early in the mutex development the mutexes were placed between the hash
41 : * chain pointers and the real tdb data. This had two drawbacks: First, it
42 : * made pointer calculations more complex. Second, we had to mmap the mutex
43 : * area twice. One was the normal map_ptr in the tdb. This frequently changed
44 : * from within tdb_oob. At least the Linux glibc robust mutex code assumes
45 : * constant pointers in memory, so a constantly changing mmap area destroys
46 : * the mutex list. So we had to mmap the first bytes of the file with a second
47 : * mmap call. With that scheme, very weird errors happened that could be
48 : * easily fixed by doing the mutex mmap in a second file. It seemed that
49 : * mapping the same memory area twice does not end up in accessing the same
50 : * physical page, looking at the mutexes in gdb it seemed that old data showed
51 : * up after some re-mapping. To avoid a separate mutex file, the code now puts
52 : * the real content of the tdb file after the mutex area. This way we do not
53 : * have overlapping mmap areas, the mutex area is mmapped once and not
54 : * changed, the tdb data area's mmap is constantly changed but does not
55 : * overlap.
56 : */
57 :
58 : struct tdb_mutexes {
59 : struct tdb_header hdr;
60 :
61 : /* protect allrecord_lock */
62 : pthread_mutex_t allrecord_mutex;
63 :
64 : /*
65 : * F_UNLCK: free,
66 : * F_RDLCK: shared,
67 : * F_WRLCK: exclusive
68 : */
69 : short int allrecord_lock;
70 :
71 : /*
72 : * Index 0 is the freelist mutex, followed by
73 : * one mutex per hashchain.
74 : */
75 : pthread_mutex_t hashchains[1];
76 : };
77 :
78 977733694 : bool tdb_have_mutexes(struct tdb_context *tdb)
79 : {
80 977733694 : return ((tdb->feature_flags & TDB_FEATURE_FLAG_MUTEX) != 0);
81 : }
82 :
83 8182570 : size_t tdb_mutex_size(struct tdb_context *tdb)
84 : {
85 203303 : size_t mutex_size;
86 :
87 7997020 : if (!tdb_have_mutexes(tdb)) {
88 6588776 : return 0;
89 : }
90 :
91 1428554 : mutex_size = sizeof(struct tdb_mutexes);
92 1428554 : mutex_size += tdb->hash_size * sizeof(pthread_mutex_t);
93 :
94 1408244 : return TDB_ALIGN(mutex_size, tdb->page_size);
95 : }
96 :
97 : /*
98 : * Get the index for a chain mutex
99 : */
100 751925735 : static bool tdb_mutex_index(struct tdb_context *tdb, off_t off, off_t len,
101 : unsigned *idx)
102 : {
103 : /*
104 : * Weird but true: We fcntl lock 1 byte at an offset 4 bytes before
105 : * the 4 bytes of the freelist start and the hash chain that is about
106 : * to be locked. See lock_offset() where the freelist is -1 vs the
107 : * "+1" in TDB_HASH_TOP(). Because the mutex array is represented in
108 : * the tdb file itself as data, we need to adjust the offset here.
109 : */
110 751925735 : const off_t freelist_lock_ofs = FREELIST_TOP - sizeof(tdb_off_t);
111 :
112 751925735 : if (!tdb_have_mutexes(tdb)) {
113 517936514 : return false;
114 : }
115 218199652 : if (len != 1) {
116 : /* Possibly the allrecord lock */
117 308040 : return false;
118 : }
119 217883164 : if (off < freelist_lock_ofs) {
120 : /* One of the special locks */
121 35673684 : return false;
122 : }
123 180898488 : if (tdb->hash_size == 0) {
124 : /* tdb not initialized yet, called from tdb_open_ex() */
125 0 : return false;
126 : }
127 180898488 : if (off >= TDB_DATA_START(tdb->hash_size)) {
128 : /* Single record lock from traverses */
129 313218 : return false;
130 : }
131 :
132 : /*
133 : * Now we know it's a freelist or hash chain lock. Those are always 4
134 : * byte aligned. Paranoia check.
135 : */
136 180582410 : if ((off % sizeof(tdb_off_t)) != 0) {
137 0 : abort();
138 : }
139 :
140 : /*
141 : * Re-index the fcntl offset into an offset into the mutex array
142 : */
143 180582410 : off -= freelist_lock_ofs; /* rebase to index 0 */
144 180582410 : off /= sizeof(tdb_off_t); /* 0 for freelist 1-n for hashchain */
145 :
146 180582410 : *idx = off;
147 180582410 : return true;
148 : }
149 :
150 56294051 : static bool tdb_have_mutex_chainlocks(struct tdb_context *tdb)
151 : {
152 1306472 : int i;
153 :
154 92171342 : for (i=0; i < tdb->num_lockrecs; i++) {
155 1281403 : bool ret;
156 1281403 : unsigned idx;
157 :
158 37158694 : ret = tdb_mutex_index(tdb,
159 35877291 : tdb->lockrecs[i].off,
160 35877291 : tdb->lockrecs[i].count,
161 : &idx);
162 35877291 : if (!ret) {
163 35877291 : continue;
164 : }
165 :
166 0 : if (idx == 0) {
167 : /* this is the freelist mutex */
168 0 : continue;
169 : }
170 :
171 0 : return true;
172 : }
173 :
174 54987579 : return false;
175 : }
176 :
177 91292826 : static int chain_mutex_lock(pthread_mutex_t *m, bool waitflag)
178 : {
179 1581184 : int ret;
180 :
181 91292826 : if (waitflag) {
182 90912014 : ret = pthread_mutex_lock(m);
183 : } else {
184 380812 : ret = pthread_mutex_trylock(m);
185 : }
186 91292826 : if (ret != EOWNERDEAD) {
187 89711641 : return ret;
188 : }
189 :
190 : /*
191 : * For chainlocks, we don't do any cleanup (yet?)
192 : */
193 2 : return pthread_mutex_consistent(m);
194 : }
195 :
196 18 : static int allrecord_mutex_lock(struct tdb_mutexes *m, bool waitflag)
197 : {
198 0 : int ret;
199 :
200 18 : if (waitflag) {
201 13 : ret = pthread_mutex_lock(&m->allrecord_mutex);
202 : } else {
203 5 : ret = pthread_mutex_trylock(&m->allrecord_mutex);
204 : }
205 18 : if (ret != EOWNERDEAD) {
206 17 : return ret;
207 : }
208 :
209 : /*
210 : * The allrecord lock holder died. We need to reset the allrecord_lock
211 : * to F_UNLCK. This should also be the indication for
212 : * tdb_needs_recovery.
213 : */
214 1 : m->allrecord_lock = F_UNLCK;
215 :
216 1 : return pthread_mutex_consistent(&m->allrecord_mutex);
217 : }
218 :
219 413008954 : bool tdb_mutex_lock(struct tdb_context *tdb, int rw, off_t off, off_t len,
220 : bool waitflag, int *pret)
221 : {
222 413008954 : struct tdb_mutexes *m = tdb->mutexes;
223 10754951 : pthread_mutex_t *chain;
224 10754951 : int ret;
225 10754951 : unsigned idx;
226 10754951 : bool allrecord_ok;
227 :
228 413008954 : if (!tdb_mutex_index(tdb, off, len, &idx)) {
229 313542654 : return false;
230 : }
231 90292533 : chain = &m->hashchains[idx];
232 :
233 90292536 : again:
234 90292536 : ret = chain_mutex_lock(chain, waitflag);
235 90292536 : if (ret == EBUSY) {
236 2641 : ret = EAGAIN;
237 : }
238 90292526 : if (ret != 0) {
239 2651 : errno = ret;
240 2651 : goto fail;
241 : }
242 :
243 90289885 : if (idx == 0) {
244 : /*
245 : * This is a freelist lock, which is independent to
246 : * the allrecord lock. So we're done once we got the
247 : * freelist mutex.
248 : */
249 33995834 : *pret = 0;
250 33995834 : return true;
251 : }
252 :
253 56294051 : if (tdb_have_mutex_chainlocks(tdb)) {
254 : /*
255 : * We can only check the allrecord lock once. If we do it with
256 : * one chain mutex locked, we will deadlock with the allrecord
257 : * locker process in the following way: We lock the first hash
258 : * chain, we check for the allrecord lock. We keep the hash
259 : * chain locked. Then the allrecord locker locks the
260 : * allrecord_mutex. It walks the list of chain mutexes,
261 : * locking them all in sequence. Meanwhile, we have the chain
262 : * mutex locked, so the allrecord locker blocks trying to lock
263 : * our chain mutex. Then we come in and try to lock the second
264 : * chain lock, which in most cases will be the freelist. We
265 : * see that the allrecord lock is locked and put ourselves on
266 : * the allrecord_mutex. This will never be signalled though
267 : * because the allrecord locker waits for us to give up the
268 : * chain lock.
269 : */
270 :
271 0 : *pret = 0;
272 0 : return true;
273 : }
274 :
275 : /*
276 : * Check if someone is has the allrecord lock: queue if so.
277 : */
278 :
279 56294051 : allrecord_ok = false;
280 :
281 56294051 : if (m->allrecord_lock == F_UNLCK) {
282 : /*
283 : * allrecord lock not taken
284 : */
285 56294039 : allrecord_ok = true;
286 : }
287 :
288 56294051 : if ((m->allrecord_lock == F_RDLCK) && (rw == F_RDLCK)) {
289 : /*
290 : * allrecord shared lock taken, but we only want to read
291 : */
292 6 : allrecord_ok = true;
293 : }
294 :
295 56294051 : if (allrecord_ok) {
296 56294045 : *pret = 0;
297 56294045 : return true;
298 : }
299 :
300 6 : ret = pthread_mutex_unlock(chain);
301 6 : if (ret != 0) {
302 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
303 : "(chain_mutex) failed: %s\n", strerror(ret)));
304 0 : errno = ret;
305 0 : goto fail;
306 : }
307 6 : ret = allrecord_mutex_lock(m, waitflag);
308 6 : if (ret == EBUSY) {
309 3 : ret = EAGAIN;
310 : }
311 6 : if (ret != 0) {
312 3 : if (waitflag || (ret != EAGAIN)) {
313 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_%slock"
314 : "(allrecord_mutex) failed: %s\n",
315 : waitflag ? "" : "try_", strerror(ret)));
316 : }
317 3 : errno = ret;
318 3 : goto fail;
319 : }
320 3 : ret = pthread_mutex_unlock(&m->allrecord_mutex);
321 3 : if (ret != 0) {
322 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
323 : "(allrecord_mutex) failed: %s\n", strerror(ret)));
324 0 : errno = ret;
325 0 : goto fail;
326 : }
327 3 : goto again;
328 :
329 2654 : fail:
330 2654 : *pret = -1;
331 2654 : return true;
332 : }
333 :
334 303039490 : bool tdb_mutex_unlock(struct tdb_context *tdb, int rw, off_t off, off_t len,
335 : int *pret)
336 : {
337 303039490 : struct tdb_mutexes *m = tdb->mutexes;
338 8237872 : pthread_mutex_t *chain;
339 8237872 : int ret;
340 8237872 : unsigned idx;
341 :
342 303039490 : if (!tdb_mutex_index(tdb, off, len, &idx)) {
343 206092914 : return false;
344 : }
345 90289877 : chain = &m->hashchains[idx];
346 :
347 90289877 : ret = pthread_mutex_unlock(chain);
348 90289877 : if (ret == 0) {
349 90289877 : *pret = 0;
350 90289877 : return true;
351 : }
352 0 : errno = ret;
353 0 : *pret = -1;
354 0 : return true;
355 : }
356 :
357 13 : int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype,
358 : enum tdb_lock_flags flags)
359 : {
360 13 : struct tdb_mutexes *m = tdb->mutexes;
361 0 : int ret;
362 0 : uint32_t i;
363 13 : bool waitflag = (flags & TDB_LOCK_WAIT);
364 0 : int saved_errno;
365 :
366 13 : if (tdb->flags & TDB_NOLOCK) {
367 0 : return 0;
368 : }
369 :
370 13 : if (flags & TDB_LOCK_MARK_ONLY) {
371 1 : return 0;
372 : }
373 :
374 12 : ret = allrecord_mutex_lock(m, waitflag);
375 12 : if (!waitflag && (ret == EBUSY)) {
376 1 : errno = EAGAIN;
377 1 : tdb->ecode = TDB_ERR_LOCK;
378 1 : return -1;
379 : }
380 11 : if (ret != 0) {
381 0 : if (!(flags & TDB_LOCK_PROBE)) {
382 0 : TDB_LOG((tdb, TDB_DEBUG_TRACE,
383 : "allrecord_mutex_lock() failed: %s\n",
384 : strerror(ret)));
385 : }
386 0 : tdb->ecode = TDB_ERR_LOCK;
387 0 : return -1;
388 : }
389 :
390 11 : if (m->allrecord_lock != F_UNLCK) {
391 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
392 : (int)m->allrecord_lock));
393 0 : goto fail_unlock_allrecord_mutex;
394 : }
395 11 : m->allrecord_lock = (ltype == F_RDLCK) ? F_RDLCK : F_WRLCK;
396 :
397 1000294 : for (i=0; i<tdb->hash_size; i++) {
398 :
399 : /* ignore hashchains[0], the freelist */
400 1000284 : pthread_mutex_t *chain = &m->hashchains[i+1];
401 :
402 1000284 : ret = chain_mutex_lock(chain, waitflag);
403 1000284 : if (!waitflag && (ret == EBUSY)) {
404 1 : errno = EAGAIN;
405 1 : goto fail_unroll_allrecord_lock;
406 : }
407 1000283 : if (ret != 0) {
408 0 : if (!(flags & TDB_LOCK_PROBE)) {
409 0 : TDB_LOG((tdb, TDB_DEBUG_TRACE,
410 : "chain_mutex_lock() failed: %s\n",
411 : strerror(ret)));
412 : }
413 0 : errno = ret;
414 0 : goto fail_unroll_allrecord_lock;
415 : }
416 :
417 1000283 : ret = pthread_mutex_unlock(chain);
418 1000283 : if (ret != 0) {
419 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
420 : "(chainlock) failed: %s\n", strerror(ret)));
421 0 : errno = ret;
422 0 : goto fail_unroll_allrecord_lock;
423 : }
424 : }
425 : /*
426 : * We leave this routine with m->allrecord_mutex locked
427 : */
428 10 : return 0;
429 :
430 1 : fail_unroll_allrecord_lock:
431 1 : m->allrecord_lock = F_UNLCK;
432 :
433 1 : fail_unlock_allrecord_mutex:
434 1 : saved_errno = errno;
435 1 : ret = pthread_mutex_unlock(&m->allrecord_mutex);
436 1 : if (ret != 0) {
437 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
438 : "(allrecord_mutex) failed: %s\n", strerror(ret)));
439 : }
440 1 : errno = saved_errno;
441 1 : tdb->ecode = TDB_ERR_LOCK;
442 1 : return -1;
443 : }
444 :
445 2 : int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb)
446 : {
447 2 : struct tdb_mutexes *m = tdb->mutexes;
448 0 : int ret;
449 0 : uint32_t i;
450 :
451 2 : if (tdb->flags & TDB_NOLOCK) {
452 0 : return 0;
453 : }
454 :
455 : /*
456 : * Our only caller tdb_allrecord_upgrade()
457 : * guarantees that we already own the allrecord lock.
458 : *
459 : * Which means m->allrecord_mutex is still locked by us.
460 : */
461 :
462 2 : if (m->allrecord_lock != F_RDLCK) {
463 0 : tdb->ecode = TDB_ERR_LOCK;
464 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
465 : (int)m->allrecord_lock));
466 0 : return -1;
467 : }
468 :
469 2 : m->allrecord_lock = F_WRLCK;
470 :
471 8 : for (i=0; i<tdb->hash_size; i++) {
472 :
473 : /* ignore hashchains[0], the freelist */
474 6 : pthread_mutex_t *chain = &m->hashchains[i+1];
475 :
476 6 : ret = chain_mutex_lock(chain, true);
477 6 : if (ret != 0) {
478 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_lock"
479 : "(chainlock) failed: %s\n", strerror(ret)));
480 0 : goto fail_unroll_allrecord_lock;
481 : }
482 :
483 6 : ret = pthread_mutex_unlock(chain);
484 6 : if (ret != 0) {
485 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
486 : "(chainlock) failed: %s\n", strerror(ret)));
487 0 : goto fail_unroll_allrecord_lock;
488 : }
489 : }
490 :
491 2 : return 0;
492 :
493 0 : fail_unroll_allrecord_lock:
494 0 : m->allrecord_lock = F_RDLCK;
495 0 : tdb->ecode = TDB_ERR_LOCK;
496 0 : return -1;
497 : }
498 :
499 0 : void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb)
500 : {
501 0 : struct tdb_mutexes *m = tdb->mutexes;
502 :
503 : /*
504 : * Our only caller tdb_allrecord_upgrade() (in the error case)
505 : * guarantees that we already own the allrecord lock.
506 : *
507 : * Which means m->allrecord_mutex is still locked by us.
508 : */
509 :
510 0 : if (m->allrecord_lock != F_WRLCK) {
511 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
512 : (int)m->allrecord_lock));
513 0 : return;
514 : }
515 :
516 0 : m->allrecord_lock = F_RDLCK;
517 0 : return;
518 : }
519 :
520 :
521 7 : int tdb_mutex_allrecord_unlock(struct tdb_context *tdb)
522 : {
523 7 : struct tdb_mutexes *m = tdb->mutexes;
524 0 : short old;
525 0 : int ret;
526 :
527 7 : if (tdb->flags & TDB_NOLOCK) {
528 0 : return 0;
529 : }
530 :
531 : /*
532 : * Our only callers tdb_allrecord_unlock() and
533 : * tdb_allrecord_lock() (in the error path)
534 : * guarantee that we already own the allrecord lock.
535 : *
536 : * Which means m->allrecord_mutex is still locked by us.
537 : */
538 :
539 7 : if ((m->allrecord_lock != F_RDLCK) && (m->allrecord_lock != F_WRLCK)) {
540 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "allrecord_lock == %d\n",
541 : (int)m->allrecord_lock));
542 0 : return -1;
543 : }
544 :
545 7 : old = m->allrecord_lock;
546 7 : m->allrecord_lock = F_UNLCK;
547 :
548 7 : ret = pthread_mutex_unlock(&m->allrecord_mutex);
549 7 : if (ret != 0) {
550 0 : m->allrecord_lock = old;
551 0 : TDB_LOG((tdb, TDB_DEBUG_FATAL, "pthread_mutex_unlock"
552 : "(allrecord_mutex) failed: %s\n", strerror(ret)));
553 0 : return -1;
554 : }
555 7 : return 0;
556 : }
557 :
558 341759 : int tdb_mutex_init(struct tdb_context *tdb)
559 : {
560 9304 : struct tdb_mutexes *m;
561 9304 : pthread_mutexattr_t ma;
562 9304 : uint32_t i;
563 9304 : int ret;
564 :
565 341759 : ret = tdb_mutex_mmap(tdb);
566 341759 : if (ret == -1) {
567 0 : return -1;
568 : }
569 341759 : m = tdb->mutexes;
570 :
571 341759 : ret = pthread_mutexattr_init(&ma);
572 341759 : if (ret != 0) {
573 0 : goto fail_munmap;
574 : }
575 341759 : ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
576 341759 : if (ret != 0) {
577 0 : goto fail;
578 : }
579 341759 : ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
580 341759 : if (ret != 0) {
581 0 : goto fail;
582 : }
583 341759 : ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
584 341759 : if (ret != 0) {
585 0 : goto fail;
586 : }
587 :
588 2181019892 : for (i=0; i<tdb->hash_size+1; i++) {
589 2180678133 : pthread_mutex_t *chain = &m->hashchains[i];
590 :
591 2180678133 : ret = pthread_mutex_init(chain, &ma);
592 2180678133 : if (ret != 0) {
593 0 : goto fail;
594 : }
595 : }
596 :
597 341759 : m->allrecord_lock = F_UNLCK;
598 :
599 341759 : ret = pthread_mutex_init(&m->allrecord_mutex, &ma);
600 341759 : if (ret != 0) {
601 0 : goto fail;
602 : }
603 332455 : ret = 0;
604 341759 : fail:
605 341759 : pthread_mutexattr_destroy(&ma);
606 341759 : fail_munmap:
607 :
608 341759 : if (ret == 0) {
609 332455 : return 0;
610 : }
611 :
612 0 : tdb_mutex_munmap(tdb);
613 :
614 0 : errno = ret;
615 0 : return -1;
616 : }
617 :
618 696460 : int tdb_mutex_mmap(struct tdb_context *tdb)
619 : {
620 18608 : size_t len;
621 18608 : void *ptr;
622 :
623 696460 : len = tdb_mutex_size(tdb);
624 696460 : if (len == 0) {
625 0 : return 0;
626 : }
627 :
628 696460 : if (tdb->mutexes != NULL) {
629 332455 : return 0;
630 : }
631 :
632 354701 : ptr = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FILE,
633 : tdb->fd, 0);
634 354701 : if (ptr == MAP_FAILED) {
635 0 : return -1;
636 : }
637 354701 : tdb->mutexes = (struct tdb_mutexes *)ptr;
638 :
639 354701 : return 0;
640 : }
641 :
642 6817088 : int tdb_mutex_munmap(struct tdb_context *tdb)
643 : {
644 166942 : size_t len;
645 166942 : int ret;
646 :
647 6817088 : len = tdb_mutex_size(tdb);
648 6651848 : if (len == 0) {
649 6588776 : return 0;
650 : }
651 :
652 63072 : ret = munmap(tdb->mutexes, len);
653 63072 : if (ret == -1) {
654 0 : return -1;
655 : }
656 63072 : tdb->mutexes = NULL;
657 :
658 63072 : return 0;
659 : }
660 :
661 : static bool tdb_mutex_locking_cached;
662 :
663 40480 : static bool tdb_mutex_locking_supported(void)
664 : {
665 880 : pthread_mutexattr_t ma;
666 880 : pthread_mutex_t m;
667 880 : int ret;
668 880 : static bool initialized;
669 :
670 40480 : if (initialized) {
671 0 : return tdb_mutex_locking_cached;
672 : }
673 :
674 40480 : initialized = true;
675 :
676 40480 : ret = pthread_mutexattr_init(&ma);
677 40480 : if (ret != 0) {
678 0 : return false;
679 : }
680 40480 : ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
681 40480 : if (ret != 0) {
682 0 : goto cleanup_ma;
683 : }
684 40480 : ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
685 40480 : if (ret != 0) {
686 0 : goto cleanup_ma;
687 : }
688 40480 : ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
689 40480 : if (ret != 0) {
690 0 : goto cleanup_ma;
691 : }
692 40480 : ret = pthread_mutex_init(&m, &ma);
693 40480 : if (ret != 0) {
694 0 : goto cleanup_ma;
695 : }
696 40480 : ret = pthread_mutex_lock(&m);
697 40480 : if (ret != 0) {
698 0 : goto cleanup_m;
699 : }
700 : /*
701 : * This makes sure we have real mutexes
702 : * from a threading library instead of just
703 : * stubs from libc.
704 : */
705 40480 : ret = pthread_mutex_lock(&m);
706 40480 : if (ret != EDEADLK) {
707 0 : goto cleanup_lock;
708 : }
709 40480 : ret = pthread_mutex_unlock(&m);
710 40480 : if (ret != 0) {
711 0 : goto cleanup_m;
712 : }
713 :
714 40480 : tdb_mutex_locking_cached = true;
715 40480 : goto cleanup_m;
716 :
717 0 : cleanup_lock:
718 0 : pthread_mutex_unlock(&m);
719 40480 : cleanup_m:
720 40480 : pthread_mutex_destroy(&m);
721 40480 : cleanup_ma:
722 40480 : pthread_mutexattr_destroy(&ma);
723 40480 : return tdb_mutex_locking_cached;
724 : }
725 :
726 : static void (*tdb_robust_mutext_old_handler)(int) = SIG_ERR;
727 : static pid_t tdb_robust_mutex_pid = -1;
728 :
729 40480 : static bool tdb_robust_mutex_setup_sigchild(void (*handler)(int),
730 : void (**p_old_handler)(int))
731 : {
732 : #ifdef HAVE_SIGACTION
733 880 : struct sigaction act;
734 880 : struct sigaction oldact;
735 :
736 40480 : memset(&act, '\0', sizeof(act));
737 :
738 40480 : act.sa_handler = handler;
739 : #ifdef SA_RESTART
740 40480 : act.sa_flags = SA_RESTART;
741 : #endif
742 40480 : sigemptyset(&act.sa_mask);
743 40480 : sigaddset(&act.sa_mask, SIGCHLD);
744 40480 : sigaction(SIGCHLD, &act, &oldact);
745 40480 : if (p_old_handler) {
746 40480 : *p_old_handler = oldact.sa_handler;
747 : }
748 40480 : return true;
749 : #else /* !HAVE_SIGACTION */
750 : return false;
751 : #endif
752 : }
753 :
754 42168 : static void tdb_robust_mutex_handler(int sig)
755 : {
756 42168 : pid_t child_pid = tdb_robust_mutex_pid;
757 :
758 42168 : if (child_pid != -1) {
759 878 : pid_t pid;
760 :
761 40478 : pid = waitpid(child_pid, NULL, WNOHANG);
762 40478 : if (pid == -1) {
763 11435 : switch (errno) {
764 11435 : case ECHILD:
765 11435 : tdb_robust_mutex_pid = -1;
766 11435 : return;
767 :
768 0 : default:
769 0 : return;
770 : }
771 : }
772 29043 : if (pid == child_pid) {
773 29043 : tdb_robust_mutex_pid = -1;
774 29043 : return;
775 : }
776 : }
777 :
778 1690 : if (tdb_robust_mutext_old_handler == SIG_DFL) {
779 1661 : return;
780 : }
781 13 : if (tdb_robust_mutext_old_handler == SIG_IGN) {
782 0 : return;
783 : }
784 13 : if (tdb_robust_mutext_old_handler == SIG_ERR) {
785 0 : return;
786 : }
787 :
788 13 : tdb_robust_mutext_old_handler(sig);
789 : }
790 :
791 80960 : static void tdb_robust_mutex_wait_for_child(pid_t *child_pid)
792 : {
793 80960 : int options = WNOHANG;
794 :
795 80960 : if (*child_pid == -1) {
796 39600 : return;
797 : }
798 :
799 51843 : while (tdb_robust_mutex_pid > 0) {
800 1459 : pid_t pid;
801 :
802 : /*
803 : * First we try with WNOHANG, as the process might not exist
804 : * anymore. Once we've sent SIGKILL we block waiting for the
805 : * exit.
806 : */
807 22800 : pid = waitpid(*child_pid, NULL, options);
808 22800 : if (pid == -1) {
809 0 : if (errno == EINTR) {
810 0 : continue;
811 0 : } else if (errno == ECHILD) {
812 0 : break;
813 : } else {
814 0 : abort();
815 : }
816 : }
817 22800 : if (pid == *child_pid) {
818 10707 : break;
819 : }
820 :
821 11363 : kill(*child_pid, SIGKILL);
822 11363 : options = 0;
823 : }
824 :
825 40480 : tdb_robust_mutex_pid = -1;
826 40480 : *child_pid = -1;
827 : }
828 :
829 961177 : _PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void)
830 : {
831 961177 : void *ptr = NULL;
832 961177 : pthread_mutex_t *m = NULL;
833 25378 : pthread_mutexattr_t ma;
834 961177 : int ret = 1;
835 961177 : int pipe_down[2] = { -1, -1 };
836 961177 : int pipe_up[2] = { -1, -1 };
837 25378 : ssize_t nread;
838 961177 : char c = 0;
839 25378 : bool ok;
840 25378 : static bool initialized;
841 961177 : pid_t saved_child_pid = -1;
842 961177 : bool cleanup_ma = false;
843 :
844 961177 : if (initialized) {
845 920697 : return tdb_mutex_locking_cached;
846 : }
847 :
848 40480 : initialized = true;
849 :
850 40480 : ok = tdb_mutex_locking_supported();
851 40480 : if (!ok) {
852 0 : return false;
853 : }
854 :
855 40480 : tdb_mutex_locking_cached = false;
856 :
857 40480 : ptr = mmap(NULL, sizeof(pthread_mutex_t), PROT_READ|PROT_WRITE,
858 : MAP_SHARED|MAP_ANON, -1 /* fd */, 0);
859 40480 : if (ptr == MAP_FAILED) {
860 0 : return false;
861 : }
862 :
863 40480 : ret = pipe(pipe_down);
864 40480 : if (ret != 0) {
865 0 : goto cleanup;
866 : }
867 40480 : ret = pipe(pipe_up);
868 40480 : if (ret != 0) {
869 0 : goto cleanup;
870 : }
871 :
872 40480 : ret = pthread_mutexattr_init(&ma);
873 40480 : if (ret != 0) {
874 0 : goto cleanup;
875 : }
876 40480 : cleanup_ma = true;
877 40480 : ret = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK);
878 40480 : if (ret != 0) {
879 0 : goto cleanup;
880 : }
881 40480 : ret = pthread_mutexattr_setpshared(&ma, PTHREAD_PROCESS_SHARED);
882 40480 : if (ret != 0) {
883 0 : goto cleanup;
884 : }
885 40480 : ret = pthread_mutexattr_setrobust(&ma, PTHREAD_MUTEX_ROBUST);
886 40480 : if (ret != 0) {
887 0 : goto cleanup;
888 : }
889 40480 : ret = pthread_mutex_init(ptr, &ma);
890 40480 : if (ret != 0) {
891 0 : goto cleanup;
892 : }
893 40480 : m = (pthread_mutex_t *)ptr;
894 :
895 40480 : if (tdb_robust_mutex_setup_sigchild(tdb_robust_mutex_handler,
896 39600 : &tdb_robust_mutext_old_handler) == false) {
897 0 : goto cleanup;
898 : }
899 :
900 40480 : tdb_robust_mutex_pid = fork();
901 40480 : saved_child_pid = tdb_robust_mutex_pid;
902 40480 : if (tdb_robust_mutex_pid == 0) {
903 0 : size_t nwritten;
904 0 : close(pipe_down[1]);
905 0 : close(pipe_up[0]);
906 0 : ret = pthread_mutex_lock(m);
907 0 : nwritten = write(pipe_up[1], &ret, sizeof(ret));
908 0 : if (nwritten != sizeof(ret)) {
909 0 : _exit(1);
910 : }
911 0 : if (ret != 0) {
912 0 : _exit(1);
913 : }
914 0 : nread = read(pipe_down[0], &c, 1);
915 0 : if (nread != 1) {
916 0 : _exit(1);
917 : }
918 : /* leave locked */
919 0 : _exit(0);
920 : }
921 40480 : if (tdb_robust_mutex_pid == -1) {
922 0 : goto cleanup;
923 : }
924 40480 : close(pipe_down[0]);
925 40480 : pipe_down[0] = -1;
926 40480 : close(pipe_up[1]);
927 40480 : pipe_up[1] = -1;
928 :
929 40480 : nread = read(pipe_up[0], &ret, sizeof(ret));
930 40480 : if (nread != sizeof(ret)) {
931 0 : goto cleanup;
932 : }
933 :
934 40480 : ret = pthread_mutex_trylock(m);
935 40480 : if (ret != EBUSY) {
936 0 : if (ret == 0) {
937 0 : pthread_mutex_unlock(m);
938 : }
939 0 : goto cleanup;
940 : }
941 :
942 40480 : if (write(pipe_down[1], &c, 1) != 1) {
943 0 : goto cleanup;
944 : }
945 :
946 40480 : nread = read(pipe_up[0], &c, 1);
947 40480 : if (nread != 0) {
948 0 : goto cleanup;
949 : }
950 :
951 40480 : tdb_robust_mutex_wait_for_child(&saved_child_pid);
952 :
953 40480 : ret = pthread_mutex_trylock(m);
954 40480 : if (ret != EOWNERDEAD) {
955 0 : if (ret == 0) {
956 0 : pthread_mutex_unlock(m);
957 : }
958 0 : goto cleanup;
959 : }
960 :
961 40480 : ret = pthread_mutex_consistent(m);
962 40480 : if (ret != 0) {
963 0 : goto cleanup;
964 : }
965 :
966 40480 : ret = pthread_mutex_trylock(m);
967 40480 : if (ret != EDEADLK && ret != EBUSY) {
968 0 : pthread_mutex_unlock(m);
969 0 : goto cleanup;
970 : }
971 :
972 40480 : ret = pthread_mutex_unlock(m);
973 40480 : if (ret != 0) {
974 0 : goto cleanup;
975 : }
976 :
977 40480 : tdb_mutex_locking_cached = true;
978 :
979 40480 : cleanup:
980 : /*
981 : * Note that we don't reset the signal handler we just reset
982 : * tdb_robust_mutex_pid to -1. This is ok as this code path is only
983 : * called once per process.
984 : *
985 : * Leaving our signal handler avoids races with other threads potentially
986 : * setting up their SIGCHLD handlers.
987 : *
988 : * The worst thing that can happen is that the other newer signal
989 : * handler will get the SIGCHLD signal for our child and/or reap the
990 : * child with a wait() function. tdb_robust_mutex_wait_for_child()
991 : * handles the case where waitpid returns ECHILD.
992 : */
993 40480 : tdb_robust_mutex_wait_for_child(&saved_child_pid);
994 :
995 40480 : if (m != NULL) {
996 40480 : pthread_mutex_destroy(m);
997 : }
998 40480 : if (cleanup_ma) {
999 40480 : pthread_mutexattr_destroy(&ma);
1000 : }
1001 40480 : if (pipe_down[0] != -1) {
1002 0 : close(pipe_down[0]);
1003 : }
1004 40480 : if (pipe_down[1] != -1) {
1005 40480 : close(pipe_down[1]);
1006 : }
1007 40480 : if (pipe_up[0] != -1) {
1008 40480 : close(pipe_up[0]);
1009 : }
1010 40480 : if (pipe_up[1] != -1) {
1011 0 : close(pipe_up[1]);
1012 : }
1013 40480 : if (ptr != NULL) {
1014 40480 : munmap(ptr, sizeof(pthread_mutex_t));
1015 : }
1016 :
1017 40480 : return tdb_mutex_locking_cached;
1018 : }
1019 :
1020 : #else
1021 :
1022 : size_t tdb_mutex_size(struct tdb_context *tdb)
1023 : {
1024 : return 0;
1025 : }
1026 :
1027 : bool tdb_have_mutexes(struct tdb_context *tdb)
1028 : {
1029 : return false;
1030 : }
1031 :
1032 : int tdb_mutex_allrecord_lock(struct tdb_context *tdb, int ltype,
1033 : enum tdb_lock_flags flags)
1034 : {
1035 : tdb->ecode = TDB_ERR_LOCK;
1036 : return -1;
1037 : }
1038 :
1039 : int tdb_mutex_allrecord_unlock(struct tdb_context *tdb)
1040 : {
1041 : return -1;
1042 : }
1043 :
1044 : int tdb_mutex_allrecord_upgrade(struct tdb_context *tdb)
1045 : {
1046 : tdb->ecode = TDB_ERR_LOCK;
1047 : return -1;
1048 : }
1049 :
1050 : void tdb_mutex_allrecord_downgrade(struct tdb_context *tdb)
1051 : {
1052 : return;
1053 : }
1054 :
1055 : int tdb_mutex_mmap(struct tdb_context *tdb)
1056 : {
1057 : errno = ENOSYS;
1058 : return -1;
1059 : }
1060 :
1061 : int tdb_mutex_munmap(struct tdb_context *tdb)
1062 : {
1063 : errno = ENOSYS;
1064 : return -1;
1065 : }
1066 :
1067 : int tdb_mutex_init(struct tdb_context *tdb)
1068 : {
1069 : errno = ENOSYS;
1070 : return -1;
1071 : }
1072 :
1073 : _PUBLIC_ bool tdb_runtime_check_for_robust_mutexes(void)
1074 : {
1075 : return false;
1076 : }
1077 :
1078 : #endif
|