scanner.module 36.9 KB
Newer Older
1
<?php
2
// $Id$
3
4
5
6

/**
 * @file
 * Search and Replace Scanner - works on all nodes text content.
7
8
9
10
11
12
 *
 * The Search and Replace Scanner can do regular expression matches
 * against the title, body and CCK text content fields on all nodes in your system.
 * This is useful for finding html strings that Drupal's normal search will
 * ignore. And it can replace the matched text. Very handy if you are changing
 * the name of your company, or are changing the URL of a link included
Tao Starbow's avatar
Tao Starbow committed
13
 * multiple times in multiple nodes.
14
15
 *
 * The module allow you to configure which fields and tables to work with,
Tao Starbow's avatar
Tao Starbow committed
16
 * and also to add in custom tables and fields for modules that don't use CCK.
17
 *
Tao Starbow's avatar
Tao Starbow committed
18
19
 * Limitations:
 *  Only works with Mysql
20
 *
Tao Starbow's avatar
Tao Starbow committed
21
 * Warning:
22
 *  This is a very powerful tool, and as such is very dangerous.  You can
Tao Starbow's avatar
Tao Starbow committed
23
24
 *  easy distroy your entire site with it.  Be sure to backup your database
 *  before using it.  No, really.
25
 *
Tao Starbow's avatar
Tao Starbow committed
26
 * Todo:
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
 *  Provide better highlighting for search results
 *   - right now there's a known bug where multiple search terms
 *     on the same line aren't all highlighted. (The hit count
 *     is correct, though, and all items are replaced correctly.)
 *
 * Credits:
 *  Version 5.x-1.0 by:
 *   - Tao Starbow http://www.starbowconsulting.com
 *     Drupal username: starbow
 *  Version 5.x-2.0 by:
 *   - Amit Asaravala http://www.returncontrol.com
 *     Drupal username: aasarava
 *   - Jason Salter jason http://www.fivepaths.com
 *     Drupal username: jpsalter
 *   - Sponsored by Five Paths Consulting http://www.fivepaths.com
42
43
 */

44
45
46
47
48
49
50
51
52
53
54
55
56

//The special characters to escape if a search string is not a regex string:
define('SCANNER_REGEX_CHARS', '.\/+*?[^]$() {}=!<>|:');

//The modes that the search-and-replace process can be in.
//We need to track the modes to prevent accidentally starting a replacement
// or a long search if a user leaves mid-way through the process
// and comes back again w/ the same session variables.
define('SCANNER_STATUS_GO_SEARCH', 1);
define('SCANNER_STATUS_GO_CONFIRM', 2);
define('SCANNER_STATUS_GO_REPLACE', 3);


57
58
59
60
61
62
/**
 * Implementation of hook_menu().
 */
function scanner_menu($may_cache) {
  global $user;
  $items = array();
63

64
65
  if ($may_cache) {
    $items[] = array(
66
67
      'path' => 'admin/content/scanner',
      'title' => t('Search and Replace Scanner'),
68
      'callback' => 'scanner_view',
69
70
      'access' => user_access('perform search and replace'),
    );
71
    $items[] = array(
72
73
74
      'path' => 'admin/content/scanner/scan',
      'title' => t('Search'),
      'access' => user_access('perform search and replace'),
Tao Starbow's avatar
Tao Starbow committed
75
      'type' => MENU_DEFAULT_LOCAL_TASK,
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
    );
    $items[] = array(
      'path' => 'admin/content/scanner/scan/confirm',
      'title' => t('Confirm Replace'),
      'access' => user_access('perform search and replace'),
      'callback' => 'drupal_get_form',
      'callback arguments' => array('scanner_confirm_form'),
      'type' => MENU_CALLBACK,
    );
    $items[] = array(
      'path' => 'admin/content/scanner/undo/confirm',
      'title' => t('Confirm Undo'),
      'access' => user_access('perform search and replace'),
      'callback' => 'drupal_get_form',
      'callback arguments' => array('scanner_undo_confirm_form'),
      'type' => MENU_CALLBACK,
    );
Tao Starbow's avatar
Tao Starbow committed
93
94
95
    $items[] = array( // Shows up on scanner page as tab.
      'path' => 'admin/content/scanner/settings',
      'callback' => 'drupal_get_form',
96
97
      'callback arguments' => array('scanner_admin_form'),
      'access' => user_access('administer scanner settings'),
Tao Starbow's avatar
Tao Starbow committed
98
99
      'type' => MENU_LOCAL_TASK,
      'title' => t('Settings'),
100
101
102
103
104
105
106
107
108
      'weight' => 1,
    );
    $items[] = array( // Shows up on scanner page as tab.
      'path' => 'admin/content/scanner/undo',
      'callback' => 'scanner_undo_page',
      'access' => user_access('perform search and replace'),
      'type' => MENU_LOCAL_TASK,
      'title' => t('Undo'),
    );
Tao Starbow's avatar
Tao Starbow committed
109
    $items[] = array( // Shows up on admin page.
110
111
      'path' => 'admin/settings/scanner',
      'callback' => 'drupal_get_form',
112
113
114
115
      'callback arguments' => array('scanner_admin_form'),
      'access' => user_access('administer scanner settings'),
      'title' => t('Search and Replace Scanner'),
    );
116
117
118
119
  }
  return $items;
}

120
121
122
123
124
125
126
/**
 * Implementation of hook_perm().
 */
function scanner_perm() {
  return array('administer scanner settings', 'perform search and replace');
}

127
128
129
130
/**
 * Menu callback; presents the scan form and results.
 */
function scanner_view() {
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
  //using set_html_head because it seems unecessary to load a separate css
  // file for just two simple declarations:
  drupal_set_html_head('
    <style type="text/css">
      #scanner-form .form-submit { margin-top:0; }
      #scanner-form .form-item { margin-bottom:0; }
    </style>
  ');

  //javascript checks to make sure user has entered some search text:
  drupal_add_js("
    $(document).ready(function() {
      $('input[@type=submit][@value=Search]').click(function() {
        var searchfield = $('#edit-search');
        var chars = searchfield.val().length;
        if (chars == 0) {
          alert('Please provide some search text and try again.');
          searchfield.addClass('error');
          searchfield[0].focus();
          return FALSE;
        } else if (chars < 3) {
          return confirm('Searching for a keyword that has fewer than three characters could take a long time. Are you sure you want to continue?');
        }
        return TRUE;
      });
    });
  ", 'inline');

  $search = $_SESSION['scanner_search'];
  $status = $_SESSION['scanner_status'];
  if (!is_NULL($search) && $status >= SCANNER_STATUS_GO_SEARCH) {

    if ($status == SCANNER_STATUS_GO_CONFIRM) {
      drupal_goto('admin/content/scanner/scan/confirm');

    }
    else if ($status == SCANNER_STATUS_GO_REPLACE) {
      $resulttxt = '<a name="results"></a>'. t('Replacement Results');
      $results = scanner_execute('replace');

171
172
    }
    else {
173
174
      $resulttxt = t('Search Results');
      $results = scanner_execute('search');
175
    }
176

177
    if ($results) {
178
      $results = '<a name="results"></a>'. theme('box', $resulttxt, $results);
179
180
    }
    else {
181
      $results = theme('box', t('Your scan yielded no results'), NULL);
182
183
    }

184
    $output = drupal_get_form('scanner_form');
185
186
    $output .= $results;

187
188
189
190
191
192
193
194
195
196
197
198
199
    //clear any old search form input:
    unset($_SESSION['scanner_search']);
    unset($_SESSION['scanner_replace']);
    unset($_SESSION['scanner_preceded']);
    unset($_SESSION['scanner_followed']);
    unset($_SESSION['scanner_mode']);
    unset($_SESSION['scanner_wholeword']);
    unset($_SESSION['scanner_published']);
    unset($_SESSION['scanner_regex']);
    unset($_SESSION['scanner_terms']);
    //clear old status:
    unset($_SESSION['scanner_status']);

200
201
    return $output;
  }
202
203

  return $output . drupal_get_form('scanner_form');
204
205
206
207
208
209
210
211
212
}

/**
 * The search and replace form.
 *
 * @param str $search - regex to search for.
 * @param str $replace - string to substitute.
 * @return $form
 */
213
function scanner_form() {
214
  $form = array();
215
216
217
218
219
220
221
222
223
224
225
226

  $search = $_SESSION['scanner_search'];
  $replace = $_SESSION['scanner_replace'];
  $preceded = $_SESSION['scanner_preceded'];
  $followed = $_SESSION['scanner_followed'];
  $mode = isset($_SESSION['scanner_mode']) ? $_SESSION['scanner_mode'] : variable_get('scanner_mode', 0);
  $wholeword = isset($_SESSION['scanner_wholeword']) ? $_SESSION['scanner_wholeword'] : variable_get('scanner_wholeword', 0);
  $published = isset($_SESSION['scanner_published']) ? $_SESSION['scanner_published'] : variable_get('scanner_published', 1);
  $regex = isset($_SESSION['scanner_regex']) ? $_SESSION['scanner_regex'] : variable_get('scanner_regex', 0);
  $terms = $_SESSION['scanner_terms'];

  $form['search'] = array(
227
228
    '#type' => 'textfield',
    '#default_value' => $search,
229
230
    '#title' => t('Step 1: Search for'),
    '#maxlength' => 256,
231
  );
232
233
234
  $form['submit_search'] = array(
    '#type' => 'submit',
    '#value' => t('Search'),
235
236
  );

237
238
239
240
241
  $form['replace'] = array(
    '#type' => 'textfield',
    '#default_value' => $replace,
    '#title' => t('Step 2: Replace with'),
    '#maxlength' => 256,
242
243
244
245
  );
  $form['submit_replace'] = array(
    '#type' => 'submit',
    '#value' => t('Replace'),
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
  );

  $form['options'] = array(
    '#type' => 'fieldset',
    '#title' => t('Search Options'),
    '#collapsible' => TRUE,
    '#collapsed' => FALSE,
  );

  $form['options']['surrounding'] = array(
    '#type' => 'fieldset',
    '#title' => t('Surrounding Text'),
    '#collapsible' => FALSE,
    '#description' => t('You can limit matches by providing the text that should appear immediately before or after the search text. Remember to account for spaces.  Note: Case sensitivity and regular expression options will all apply here, too. Whole word is not recommended.'),
  );

  $form['options']['surrounding']['preceded'] = array(
    '#type' => 'textfield',
    '#title' => t('Preceded by'),
    '#default_value' => $preceded,
    '#maxlength' => 256,
  );

  /* TODO: for possible future implementation...
   * Depends on whether negative lookahead and negative lookbehind
   *  can accurately be approximated in MySQL...
  $form['options']['surrounding']['notpreceded'] = array(
    '#type' => 'checkbox',
    '#title' => t('NOT preceded by the text above'),
    '#default_value' => $notpreceded,
  );
  */

  $form['options']['surrounding']['followed'] = array(
    '#type' => 'textfield',
    '#title' => t('Followed by'),
    '#default_value' => $followed,
    '#maxlength' => 256,
  );

  /* TODO: for possible future implementation...
   * Depends on whether negative lookahead and negative lookbehind
   *  can accurately be approximated in MySQL...
  $form['options']['surrounding']['notfollowed'] = array(
    '#type' => 'checkbox',
    '#title' => t('NOT followed by the text above'),
    '#default_value' => $notfollowed,
  );
  */

  $form['options']['mode'] = array(
    '#type' => 'checkbox',
    '#title' => t('Case sensitive search'),
    '#default_value' => $mode,
    '#description' => t("Check this if the search should only return results that exactly match the capitalization of your search terms."),
  );

  $form['options']['wholeword'] = array(
    '#type' => 'checkbox',
    '#title' => t('Match whole word'),
    '#default_value' => $wholeword,
    '#description' => t("Check this if you don't want the search to match any partial words. For instance, if you search for 'run', a whole word search will <em>not</em> match 'running'."),
  );

  $form['options']['regex'] = array(
    '#type' => 'checkbox',
    '#title' => t('Use regular expressions in search'),
    '#default_value' => $regex,
    '#description' => t('Check this if you want to use regular expressions in your search terms.'),
  );

  $form['options']['published'] = array(
    '#type' => 'checkbox',
    '#title' => t('Published nodes only'),
    '#default_value' => $published,
    '#description' => t('Check this if you only want your search and replace to affect fields in nodes that are published.'),
  );

  $scanner_vocabularies = array_filter(variable_get('scanner_vocabulary', array()));

  if (count($scanner_vocabularies)) {

    $vocabularies = taxonomy_get_vocabularies();
    $options = array();

    foreach ($vocabularies as $vid => $vocabulary) {
      if (in_array($vid, $scanner_vocabularies) ) {
        $tree = taxonomy_get_tree($vid);
        if ($tree && (count($tree) > 0)) {
          $options[$vocabulary->name] = array();
          foreach ($tree as $term) {
            $options[$vocabulary->name][$term->tid] = str_repeat('-', $term->depth) . $term->name;
          }
        }
      }
    }

    $form['options']['terms'] = array(
      '#type' => 'select',
      '#title' => t('Only match nodes with these terms'),
      '#options' => $options,
      '#default_value' => $terms,
      '#multiple' => TRUE,
    );
  }

352
353
354
355
  return $form;
}

/**
356
357
358
359
360
361
362
363
364
365
366
 * Validate form input.
 */
function scanner_form_validate($form_id, $form_values) {
  $search = trim($form_values['search']);
  if ($search == '') {
    form_set_error('search', t('Please enter some keywords.'));
  }
}

/**
 * Handles submission of the search and replace form.
367
368
369
370
371
372
 *
 * @param $form_id
 * @param $form_values
 * @return the new path that will be goto'ed.
 */
function scanner_form_submit($form_id, $form_values) {
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
  //save form input:
  $_SESSION['scanner_search']    = $form_values['search'];
  $_SESSION['scanner_preceded']  = $form_values['preceded'];
  //$_SESSION['scanner_notpreceded'] = $form_values['notpreceded'];
  $_SESSION['scanner_followed']  = $form_values['followed'];
  //$_SESSION['scanner_notfollowed'] = $form_values['notfollowed'];
  $_SESSION['scanner_mode']      = $form_values['mode'];
  $_SESSION['scanner_wholeword'] = $form_values['wholeword'];
  $_SESSION['scanner_published'] = $form_values['published'];
  $_SESSION['scanner_regex']     = $form_values['regex'];
  $_SESSION['scanner_terms']     = array_filter($form_values['terms']);
  $_SESSION['scanner_replace']   = $form_values['replace'];

  if ($form_values['op'] == 'Replace') {
    $_SESSION['scanner_status'] = SCANNER_STATUS_GO_CONFIRM;
  }
  else {
    $_SESSION['scanner_status'] = SCANNER_STATUS_GO_SEARCH;
391
392
  }

393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
  return 'admin/content/scanner';
}


/**
 * Scanner confirmation form to prevent people from accidentally
 * replacing things they don't intend to.
 */
function scanner_confirm_form() {
  $form = array();

  $search    = $_SESSION['scanner_search'];
  $replace   = $_SESSION['scanner_replace'];
  $preceded  = $_SESSION['scanner_preceded'];
  $followed  = $_SESSION['scanner_followed'];
  $wholeword = $_SESSION['scanner_wholeword'];
  $regex     = $_SESSION['scanner_regex'];
  $mode      = $_SESSION['scanner_mode'];

  $modetxt = ($mode) ? t('Case sensitive') : t('Not case sensitive: will replace any matches regardless of capitalization.');

  $msg = (
    '<p>'. t('Are you sure you want to make the following replacement?') .'</p>'.
    '<div class="scanner-confirm">'.
    '  <label>'. t('Search for') .':</label> ['. check_plain($search) .']'.
    '</div>'
  );
  if ($preceded) {
    $msg .= (
      '<div class="scanner-confirm">'.
      '  <label>'. t('Preceded by') .':</label> ['. check_plain($preceded) .']'.
      '</div>'
    );
  }
  if ($followed) {
    $msg .= (
      '<div class="scanner-confirm">'.
      '  <label>'. t('Followed by') .':</label> ['. check_plain($followed) .']'.
      '</div>'
    );
  }
  $msg .= (
    '<div class="scanner-confirm">'.
    '  <label>'. t('Replace with') .':</label> ['. check_plain($replace) .']'
  );
  if ($replace === '') {
    $msg .= ' <span class="warning">This will delete any occurences of the search terms!</span>';
  }
  $msg .= (
    '</div>'.
    '<div class="scanner-confirm">'.
    '  <label>'. t('Mode') .':</label> '. $modetxt .
    '</div>'
  );
  if ($wholeword) {
    $msg .= (
      '<div class="scanner-confirm">'.
      '  <label>'. t('Match whole word') .':</label> '. t('Yes') .
      '</div>'
    );
  }
  if ($regex) {
    $msg .= (
      '<div class="scanner-confirm">'.
      '  <label>'. t('Use regular expressions') .':</label> '. t('Yes') .
      '</div>'
    );
  }

  $form['warning'] = array(
    '#type' => 'markup',
    '#value' => $msg,
  );

  $form['confirm'] = array(
    '#type' => 'submit',
    '#value' => t('Yes, Continue'),
  );
  $form['cancel'] = array(
    '#type' => 'submit',
    '#value' => t('No, Cancel'),
  );

  return $form;
}


/**
 * Submission handling for scanner confirmation form.
 */
function scanner_confirm_form_submit($form_id, $form_values) {
  if ($form_values['op'] == t('Yes, Continue')) {
    $_SESSION['scanner_status'] = SCANNER_STATUS_GO_REPLACE;
  }
  else {
    unset($_SESSION['scanner_status']);
  }

  return 'admin/content/scanner';
}

function scanner_undo_page() {
  $header = array(t('Date'), t('Searched'), t('Replaced'), t('Count'), t('Operation'));

  $sandrs = db_query('SELECT undo_id, time, searched, replaced, count, undone FROM {scanner} ORDER BY undo_id DESC');

  while ($sandr = db_fetch_object($sandrs)) {

    $query = 'undo_id='. $sandr->undo_id;
    if ($sandr->undone) {
      $operation = l('Redo', 'admin/content/scanner/undo/confirm', array(), $query);
    }
    else {
      $operation = l('Undo', 'admin/content/scanner/undo/confirm', array(), $query);
507
    }
508
509
510
511
512
513
514
515

    $rows[] = array(
      format_date($sandr->time),
      check_plain($sandr->searched),
      check_plain($sandr->replaced),
      $sandr->count,
      $operation,
    );
516
  }
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599

  return theme('table', $header, $rows, NULL, 'Prior Search and Replace Events');
}

function scanner_undo_confirm_form() {

  $undo_id = $_GET['undo_id'];

  if ($undo_id > 0) {
    $undo = db_fetch_object(db_query('SELECT undo_id, searched, replaced FROM {scanner} WHERE undo_id = %d', $undo_id));
  }

  if ($undo->undo_id > 0) {
    $form['info'] = array(
      '#value' => '<h2>'. t('Do you want to undo:') .'</h2>'.
                  '<h3>'.  t('Searched for:') .'</h3>'.
                  '<p>[<em>'. check_plain($undo->searched) .'</em>]</p>'.
                  '<h3>'. t('Replaced with:') .'</h3>'.
                  '<p>[<em>'. check_plain($undo->replaced) .'</em>]</p>',
    );

    $form['undo_id'] = array(
      '#type' => 'hidden',
      '#value' => $undo->undo_id,
    );

    $form['confirm'] = array(
      '#type' => 'submit',
      '#value' => t('Yes, Continue'),
    );

    $form['cancel'] = array(
      '#type' => 'submit',
      '#value' => t('No, Cancel'),
    );

  }
  else {
    $form['info'] = array(
      '#value' => '<h2>'. t('No undo event was found') .'</h2>',
    );
  }

  return $form;
}

function scanner_undo_confirm_form_submit($form_id, $form) {

  if ($form['op'] == t('Yes, Continue')) {
    $undo = db_fetch_object(db_query('SELECT undo_data, undone FROM {scanner} WHERE undo_id = %d', $form['undo_id']));

    $undos = unserialize($undo->undo_data);

    foreach ($undos as $nid => $sandr_event) {

      if ($undo->undone == 0) {
        $vid = $sandr_event['old_vid'];
        $undone = 1;
      }
      else {
        $vid = $sandr_event['new_vid'];
        $undone = 0;
      }

      $node = node_load($nid, $vid);

      $node->revision = TRUE;
      $node->log = t('Copy of the revision from %date via Search and Replace Undo', array('%date' => format_date($node->revision_timestamp)));

      node_save($node);
      ++$count;

    }

    drupal_set_message($count .' '. t('Nodes reverted'));
    db_query('UPDATE {scanner} SET undone = %d WHERE undo_id = %d', $undone, $form['undo_id']);

  }
  else {
    drupal_set_message(t('Undo / Redo canceled'));
  }

  return 'admin/content/scanner/undo';
600
601
602
}

/**
603
 * Handles the actual search and replace.
604
 *
605
 * @param str $searchtype - either 'search', or 'replace'
606
607
 * @return The themed results.
 */
608
609
function scanner_execute($searchtype = 'search') {
  global $user;
610
  
611
612
613
614
615
616
617
618
  // variables to monitor possible timeout
  $max_execution_time = ini_get('max_execution_time');
  $start_time = time();
  $expanded = FALSE;

  // get process and undo data if saved from timeout
  $processed = variable_get('scanner_partially_processed', array());
  $undo_data = variable_get('scanner_partial_undo', array());
619
  
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
  unset($_SESSION['scanner_status']);

  $search      = $_SESSION['scanner_search'];
  $replace     = $_SESSION['scanner_replace'];
  $preceded    = $_SESSION['scanner_preceded'];
  //$notpreceded = $_SESSION['scanner_notpreceded'];
  $followed    = $_SESSION['scanner_followed'];
  //$notfollowed = $_SESSION['scanner_notfollowed'];
  $mode        = $_SESSION['scanner_mode'];
  $wholeword   = $_SESSION['scanner_wholeword'];
  $published   = $_SESSION['scanner_published'];
  $regex       = $_SESSION['scanner_regex'];
  $terms       = $_SESSION['scanner_terms'];

  if ($searchtype == 'search') {
    drupal_set_message(t('Scanning for: [%search] ...', array('%search' => $search)));
  }
  else { //searchtype == 'replace'
    drupal_set_message(t('Replacing [%search] with [%replace] ...', array('%search' => $search, '%replace' => $replace)));
  }

  if ($mode) { // Case Sensitive
    $where = "CAST(t.%s AS BINARY) "; // BINARY to force case sensative.
    $flag = NULL;
  }
  else { // Case Insensitive
    $where = "t.%s ";
    $flag = 'i'; //ci flag for use in php preg_search and preg_replace
  }

  $preceded_php = '';
  if (!empty($preceded)) {
    if (!$regex) {
      $preceded = addcslashes($preceded, SCANNER_REGEX_CHARS);
    }
    $preceded_php = '(?<='. $preceded .')';
  }
  $followed_php = '';
  if (!empty($followed)) {
    if (!$regex) {
      $followed = addcslashes($followed, SCANNER_REGEX_CHARS);
    }
    $followed_php = '(?='. $followed .')';
  }

  //Case 1:
  if ($wholeword && $regex) {
    $where .= "REGEXP '[[:<:]]%s[[:>:]]'";
    $search_db = $preceded . $search . $followed;
    $search_php = '\b'. $preceded_php . $search . $followed_php .'\b';
  }
  //Case 2:
  else if ($wholeword && !$regex) {
    $where .= "REGEXP '[[:<:]]%s[[:>:]]'";
    $search_db = $preceded . addcslashes($search, SCANNER_REGEX_CHARS) . $followed;
    $search_php = '\b'. $preceded_php . addcslashes($search, SCANNER_REGEX_CHARS) . $followed_php .'\b';
  }
  //Case 3:
  else if (!$wholeword && $regex) {
    $where .= "REGEXP '%s'";
    $search_db = $preceded . $search . $followed;
    $search_php = $preceded_php . $search . $followed_php;
  }
  //Case 4:
  else { //!wholeword and !regex:
    $where .= "REGEXP '%s'";
    $search_db = $preceded . addcslashes($search, SCANNER_REGEX_CHARS) . $followed;
    $search_php = $preceded_php . addcslashes($search, SCANNER_REGEX_CHARS) . $followed_php;
  }

  //if terms selected, then put together extra join and where clause:
  $join = '';
  if (is_array($terms) && count($terms)) {
    $terms_where = array();
    $terms_params = array();
    foreach ($terms as $term) {
      $terms_where[] = 'tn.tid = %d';
      $terms_params[] = $term;
    }
    $join = 'INNER JOIN {term_node} tn ON t.nid = tn.nid';
    $where .= ' AND ('. implode(' OR ', $terms_where) .')';
  }

  if ($published) {
    $where .= ' AND n.status = 1 ';
  }

  $tables_map = _scanner_get_selected_tables_map();

  foreach ( $tables_map as $map ) {
    
711
712
    $table = $map['table'];
    $field = $map['field'];
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
    $type  = $map['type'];
    $on    = $map['on'] ? $map['on'] : 'vid';

    $query_params = array($field, $table, $on, $on, $type, $field, $search_db);
    if (!empty($join)) {
      $query_params = array_merge($query_params, $terms_params);
    }

    $result = db_query("
      SELECT t.%s as content, t.nid, n.title
      FROM {%s} t
      INNER JOIN {node} n ON t.%s = n.%s
      $join
      WHERE n.type = '%s' AND $where
    ", $query_params);

    while ($row = db_fetch_object($result)) {
      $content = $row->content;
731
732
      $matches = array();
      $text = '';
733
734
735
736
737
738
739
740
741
742

      // checking for possible timeout
      // if within 5 seconds of timeout - attempt to expand environment
      if (time() >= ($start_time + $max_execution_time - 5)) {
        if(!$expanded) {
          if ($user->uid > 0) {
            $verbose = TRUE;
          }
          else {
            $verbose = FALSE;
743
          }
744
745
746
          if(_scanner_change_env('max_execution_time', '600', $verbose)) {
           drupal_set_message(t('Default max_execution_time too small and changed to 10 minutes.'),'error');
           $max_execution_time = 600;
747
          }
748
749
750
751
752
753
754
755
756
757
758
759
760
761
          $expanded = TRUE;
        }
        // if expanded environment still running out of time - shutdown process
        else {
          $shutting_down = TRUE;
          variable_set('scanner_partially_processed', $processed);
          variable_set('scanner_partial_undo', $undo_data);
          if($searchtype == 'search') {
            drupal_set_message(t('Did not have enough time to complete search.'),'error');
          }
          else {
            drupal_set_message(t('Did not have enough time to complete. Please re-submit replace'),'error');
          }
          break 2;
762
763
        }
      }
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794

      /*
       * SEARCH
       */
      if ($searchtype == 'search') {
        //pull out the terms and highlight them for display in search results:
        $regexstr = "/(.{0,130}?)($search_php)(.{0,130})/$flag";
        $hits = preg_match_all($regexstr, $content, $matches, PREG_SET_ORDER);
        if ($hits > 0) {
          foreach ( $matches as $match ) {
            if ( $match[1] ) {
              $text .= '...'. htmlentities($match[1], ENT_COMPAT, 'UTF-8');
            }
            $text .= '<strong>'. htmlentities($match[2], ENT_COMPAT, 'UTF-8') .'</strong>';
            if ( $match[3] ) {
              $text .= htmlentities($match[3], ENT_COMPAT, 'UTF-8') .'...';
            }
          }
        }
        else {
          $text = "<div class='warning'>" . t("Can't display search result due to conflict between search term and internal preg_match_all function.") .'</div>';
        }

        $results[] = array(
          'title' => $row->title,
          'type' => $type,
          'count' => $hits,
          'field' => $field,
          'nid' => $row->nid,
          'text' => $text,
        );
795
796
      }

797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
      /*
       * REPLACE
       * + check to see if already processed
       */
      else if (!isset($processed[$field][$row->nid])) {
        $hits = 0;
        $newcontent = preg_replace("/$search_php/$flag", $replace, $content, -1, $hits);

        $thenode = node_load(array('nid' => $row->nid));

        //see if we're dealing with a CCK text field and therefore need to strip the
        // "_value" off the end:
        preg_match('/(.+)_value$/', $field, $matches);
        if (empty($matches[0])) { //if not CCK text field:
          $thenode->$field = $newcontent;
        }
        else {
          //Is this the best way to copy the new content back into the node's CCK field???
          $tmpstr = ('$thenode->'. $matches[1] .'[0]["value"] = $newcontent;');
          eval($tmpstr);
        }

        // NOTE: a revision only created for the first change of the node.
        // subsequent changes of the same node do not generate additional revisions:
        if (!isset($undo_data[$thenode->nid]['new_vid'])) {
          $thenode->revision = TRUE;
          $thenode->log = t('@name replaced %search with %replace via Scanner Search and Replace module.', array('@name' => $user->name, '%search' => $search, '%replace' => $replace));
          $undo_data[$thenode->nid]['old_vid'] = $thenode->vid;
        }

        if (variable_get('scanner_rebuild_teasers', 1)) {
          $thenode->teaser = node_teaser($thenode->body, $thenode->format);
        }
        node_save($thenode);

        // array to log completed fields in case of shutdown
        $processed[$field][$row->nid] = TRUE;
        
        // undo data construction
        $undo_data[$thenode->nid]['new_vid'] = $thenode->vid; //now set to updated vid after node_save()
        $results[] = array(
          'title' => $thenode->title,
          'type' => $thenode->type,
          'count' => $hits,
          'field' => $field,
          'nid' => $thenode->nid,
        );

Tao Starbow's avatar
Tao Starbow committed
845
      }
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862

    } //end while
  } //end foreach
  
  // if completed
  if(!$shutting_down) {
    variable_del('scanner_partially_processed');
    variable_del('scanner_partial_undo');
  }

  if ($searchtype == 'search') {
    return theme('scanner_results', $results);
  }
  else { //searchtype == 'replace'
    if (count($undo_data) && !$shutting_down) {
      $undo_id = db_next_id('{scanner}_undo_id');
      db_query('INSERT INTO {scanner} (undo_id, undo_data, undone, searched, replaced, count, time) VALUES (%d, "%s", %d, "%s", "%s", %d, %d)', $undo_id, serialize($undo_data), 0, $search, $replace, count($undo_data), time());
Tao Starbow's avatar
Tao Starbow committed
863
    }
864
865

    return theme('scanner_replace_results', $results);
866
867
868
  }
}

869

Tao Starbow's avatar
Tao Starbow committed
870
871
872
873
// ***************************************************************************
// Settings ******************************************************************
// ***************************************************************************

874
875
876
877
878
879
880
/**
 * Search and Replace Settings form.
 *
 * @return $form
 */
function scanner_admin_form() {
  drupal_set_title('Scanner Settings');
881

882
  $table_map = _scanner_get_selected_tables_map();
883
884
885
886
887
  sort($table_map);
  foreach ($table_map as $item) {
    $output .= '<li><b>'. $item['type'] .':</b> '. $item['field'];
    if ( $item['on'] ) {
      $output .= t('on !on', array('!on' => $item['on']));
Tao Starbow's avatar
Tao Starbow committed
888
889
    }
    $output .= '</li>';
890
  }
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906

  $form['selected'] = array(
    '#type' => 'fieldset',
    '#title' => t('Current Settings'),
    '#collapsible' => TRUE,
  );

  $form['selected']['info']['#value'] = '<p>Fields that will be searched (in [nodetype: fieldname] order):</p><ul>'. $output .'</ul>';


  $form['settings'] = array(
    '#type' => 'fieldset',
    '#title' => t('Scanner Options'),
    '#collapsible' => TRUE,
  );

Tao Starbow's avatar
Tao Starbow committed
907
  $form['settings']['scanner_mode'] = array(
908
909
910
    '#type' => 'checkbox',
    '#title' => t('Default: Case Sensitive Search Mode'),
    '#default_value' => variable_get('scanner_mode', 0),
Tao Starbow's avatar
Tao Starbow committed
911
  );
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956

  $form['settings']['scanner_wholeword'] = array(
    '#type' => 'checkbox',
    '#title' => t('Default: Match Whole Word'),
    '#default_value' => variable_get('scanner_wholeword', 0),
  );

  $form['settings']['scanner_regex'] = array(
    '#type' => 'checkbox',
    '#title' => t('Default: Regular Expression Search'),
    '#default_value' => variable_get('scanner_regex', 0),
  );

  $form['settings']['scanner_published'] = array(
    '#type' => 'checkbox',
    '#title' => t('Default: Search Published Nodes Only'),
    '#default_value' => variable_get('scanner_published', 1),
  );

  $form['settings']['scanner_rebuild_teasers'] = array(
    '#type' => 'checkbox',
    '#title' => t('Rebuild Teasers on Replace'),
    '#default_value' => variable_get('scanner_rebuild_teasers', 1),
    '#description' => t('If this box is checked: The teasers for any nodes that are modified in a search-and-replace action will be rebuilt to reflect the replacements in other fields; you do not need to check any teaser fields for nodes in the "Fields" section below.  If this box is unchecked: Teasers will remain untouched; you can select specific teaser fields below to include in search-and-replaces.'),
  );

  if (module_exists('taxonomy')) {
    $vocabularies = taxonomy_get_vocabularies();

    if (count($vocabularies)) {

      $options = array();
      foreach ($vocabularies as $vocabulary) {
        $options[$vocabulary->vid] = $vocabulary->name;
      }

      $form['settings']['scanner_vocabulary'] = array(
        '#type' => 'checkboxes',
        '#title' => t("Allow restrictions by terms in a vocabulary"),
        '#options' => $options,
        '#default_value' => variable_get('scanner_vocabulary', array()),
      );
    }
  }

957
958
  $form['tables'] = array(
    '#type' => 'fieldset',
Tao Starbow's avatar
Tao Starbow committed
959
    '#title' => t('Fields that can be searched'),
960
    '#description' => t('Fields are listed in [nodetype: fieldname] order:'),
961
962
    '#collapsible' => TRUE,
  );
963

964
  $table_map = _scanner_get_all_tables_map();
965
966
967
  sort($table_map);
  foreach ($table_map as $item) {
    $key = 'scanner_'. $item['field'] .'_'. $item['table'] .'_'. $item['type'];
968
969
    $form['tables'][$key] = array(
      '#type' => 'checkbox',
970
971
      '#title' => '<b>'. $item['type'] .':</b> '. $item['field'],
      '#default_value' => variable_get($key, FALSE), // default to not checked
972
973
974
975
976
977
    );
  }

  $form['scanner_custom'] = array(
    '#type' => 'textarea',
    '#title' => t('Custom Fields'),
978
979
    '#default_value' => variable_get('scanner_custom', NULL),
    '#description' => "one per row, <i>field</i> in <i>table</i> of type <i>nodetype</i> on <i>vid or nid</i>",
980
  );
981

982
983
984
985
986
987
988
989
990
991
992
  return system_settings_form($form);
}



// ***************************************************************************
// Internal Utility Functions ************************************************
// ***************************************************************************

/**
 * Get all text fields.
Tao Starbow's avatar
Tao Starbow committed
993
994
 * This is all very fragle based on how CCK stores fields.
 * Works for CCK 1.6.
995
996
997
998
 *
 * @return map of fields and tables.
 */
function _scanner_get_all_tables_map() {
999
1000
  //note, each array in the multidim array that is returned should be in the
  // following order: type, field, table.