scanner.module 12.6 KB
Newer Older
1
2
3
4
5
6
<?php

/**
 * @file
 * Search and Replace Scanner - works on all nodes text content.
 * 
Tao Starbow's avatar
Tao Starbow committed
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
 * The Search and Replace Scanner can do regular expression matches 
 * against the body and CCK text content fields on all nodes in your system. 
 * This is useful for finding html strings that Drupal's normal search will 
 * ignore. And it can replace the matched text. Very handy if you are changing 
 * the name of your company, or are changing the URL of a link included 
 * multiple times in multiple nodes.
 * 
 * The module allow you to configure which fields and tables to work with, 
 * and also to add in custom tables and fields for modules that don't use CCK.
 * 
 * Limitations:
 *  Only works with Mysql
 * 
 * Warning:
 *  This is a very powerful tool, and as such is very dangerous.  You can 
 *  easy distroy your entire site with it.  Be sure to backup your database
 *  before using it.  No, really.
 * 
 * Todo:
 * * Add more warning text.
 * * Deal with lots of results and possible timeouts. 
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
 */

/**
 * Implementation of hook_menu().
 */
function scanner_menu($may_cache) {
  global $user;
  $items = array();
  if ($may_cache) {
    $items[] = array(
      'path' => 'admin/content/scanner', 
      'title' => t('Search and Replace'), 
      'callback' => 'scanner_view',
      'access' => ($user->uid == 1),
    );  
    $items[] = array(
Tao Starbow's avatar
Tao Starbow committed
44
45
46
47
48
49
50
51
52
53
54
55
56
57
      'path' => 'admin/content/scanner/scan', 
      'title' => t('Search'), 
      'access' => ($user->uid == 1),
      'type' => MENU_DEFAULT_LOCAL_TASK,
    );  
    $items[] = array( // Shows up on scanner page as tab.
      'path' => 'admin/content/scanner/settings',
      'callback' => 'drupal_get_form',
      'callback arguments' => array('scanner_admin_form'), 
      'access' => ($user->uid == 1),
      'type' => MENU_LOCAL_TASK,
      'title' => t('Settings'),
    );  
    $items[] = array( // Shows up on admin page.
58
59
60
61
      'path' => 'admin/settings/scanner',
      'callback' => 'drupal_get_form',
      'callback arguments' => array('scanner_admin_form'), 
      'access' => ($user->uid == 1),
Tao Starbow's avatar
Tao Starbow committed
62
      'title' => t('Search and Replace'),
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
    );  
  }
  return $items;
}

/**
 * Menu callback; presents the scan form and results.
 */
function scanner_view() {
  $search = arg(3);
  $search = isset($search) ? base64_decode($search) : NULL;
  if (!is_null($search) && !isset($_POST['form_id'])) { // doing submit?
    $replace = arg(4);
    $replace = isset($replace) ? base64_decode($replace) : NULL;
    if (!is_null($replace)) {
      $results = scanner_replace($search, $replace);
    }
    else {
      $results = scanner_search($search);
    }
    if ($results) {
      $results = theme('box', t('Scan Results'), $results);
    }
    else {
      $results = theme('box', t('Your scan yielded no results'));
    }

    // Construct the search form.
    
    $output = drupal_get_form('scanner_form', $search, $replace);
    $output .= $results;

    return $output;
  }
  
  return drupal_get_form('scanner_form', $search);
}

/**
 * The search and replace form.
 *
 * @param str $search - regex to search for.
 * @param str $replace - string to substitute.
 * @return $form
 */
function scanner_form($search=NULL, $replace=NULL) {
  $form = array();
  $form['find'] = array (
    '#type' => 'textfield',
    '#title' => t('Scan for text'),
    '#default_value' => $search,
  );
  $form['replace'] = array (
    '#type' => 'textfield',
    '#title' => t('Replacement text'),
    '#default_value' => $replace,
  );

  $form['submit_find'] = array(
    '#type' => 'submit',
    '#value' => t('Find'),
  );
  $form['submit_replace'] = array(
    '#type' => 'submit',
    '#value' => t('Replace'),
  ); 
  return $form;
}

/**
 * Submit the search and replace form.
 *   This uses a trick taken from search.module, with is to store the
 * search key in the url and pass it to a new page to actually do the
 * work.  We also base64 encode the keys, so that the regexp special
 * characters can be safely passed in the URL.
 *
 * @param $form_id
 * @param $form_values
 * @return the new path that will be goto'ed.
 */
function scanner_form_submit($form_id, $form_values) {
  $search = trim($form_values['find']);
  if ($search == '') {
    form_set_error('find', t('Please enter some keywords.'));
  }

  $path = 'admin/content/scanner/'. base64_encode($search);
  
//  drupal_set_message( print_r($form_values, TRUE) );
  
  if($form_values['op']=='Replace') {
    $replace = trim($form_values['replace']);
    if ($replace) {
      $path .= '/'. base64_encode($replace);
    }
  }
    
  return $path;
}

/**
 * Go to the db and search all the selected field and tables for the
 * regexp string. Also does a php regexp match on the node content to
 * show all of the hits on the node.
 *
 * @param str $search - the regexp.
 * @return The themed results.
 */
function scanner_search( $search ) {
Tao Starbow's avatar
Tao Starbow committed
172
173
  $mode = variable_get('scanner_mode', 0);
  drupal_set_message( "Looking for $search. Mode = $mode." );
174
175
  
  $tables_map = _scanner_get_selected_tables_map();
Tao Starbow's avatar
Tao Starbow committed
176
177
178
179
180
181
182
183
  if( $mode == 'cs' ) { // case sensitive
    $flag = null;
    $where = "CAST(t.%s AS BINARY) REGEXP '%s'"; // BINARY to force case sensative.
  }
  else { // case insensitive
    $flag = 'i';
    $where = "t.%s REGEXP '%s'";
  }  
184
185
186
187
  
  foreach( $tables_map as $map ) {
    $table = $map['table'];
    $field = $map['field'];
Tao Starbow's avatar
Tao Starbow committed
188
    $on = $map['on'] ? $map['on'] : 'vid';
189
    $result = db_query( "SELECT t.%s as content, t.nid, n.title FROM {%s} t ".
Tao Starbow's avatar
Tao Starbow committed
190
191
192
193
                        " INNER JOIN {node} n ON t.%s = n.%s ".
                        " WHERE $where",
                         $field, $table, $on, $on, $field, $search);
    drupal_set_message("Scanning $field in $table on $on.");
194
195
196
197
    while( $obj = db_fetch_object($result) ){
      $content = $obj->content;
      $matches = array();
      $text = '';
Tao Starbow's avatar
Tao Starbow committed
198
      $hits = preg_match_all("/(.{0,8})($search)(.{0,8})/$flag", $content, $matches, PREG_SET_ORDER);
199
200
201
202
203
      if ($hits > 0) {
        foreach( $matches as $match ) {
          if( $match[1] ) {
            $text .= '...'. htmlentities($match[1]);
          }
Tao Starbow's avatar
Tao Starbow committed
204
          $text .= '<b>'. htmlentities($match[2]) .'</b>'; // Sadly destroys unicode, but needed to show html.
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
          if( $match[3] ) {
            $text .= htmlentities($match[3]) .'... ';
          }
        }
      }
      else {
        $text = "<div class='warning'>Can't display hit. RegEx mismatch.</div>";
      }
      $results[] = array(
        'title' => $obj->title,
        'link' => url('node/'.$obj->nid),
        'text' => $text,
      );
    }
  }
  return theme('scanner_results', $results);  
}

/**
Tao Starbow's avatar
Tao Starbow committed
224
 * Do a search and replace in the db.
225
226
227
228
229
230
231
232
233
234
235
 *
 * @param str $search - regexp.
 * @param str $replace - replacement text.
 */
function scanner_replace($search, $replace) {
  $output = "<p>Replacing <b>$search</b> with <b>$replace</b></p>";
  $updated = 0;
  $tables_map = _scanner_get_selected_tables_map();
  foreach( $tables_map as $map ) { 
    $table = $map['table'];
    $field = $map['field'];
Tao Starbow's avatar
Tao Starbow committed
236
237
238
239
    // Only set most recent version.
    $sql = "UPDATE {".$table."} t, {node} n SET t.$field=REPLACE(t.$field, '$search', '$replace') ".
           "WHERE n.vid = t.vid AND CAST(t.$field AS BINARY) REGEXP ('$search')";    
//    $output .= "<p>$sql</p>";
240
    $output .= "Updating $field in $table.<br/>";
Tao Starbow's avatar
Tao Starbow committed
241
242
243
244
245
246
247
248
249
250
251
252
253
    if( db_query( $sql ) ) {
      $updated += db_affected_rows();
      
      // Special case for node_revisions.title, sync node.title.
      if ($field == 'title' && $table == 'node_revisions') { 
        db_query("UPDATE {node} n SET n.title=REPLACE(n.title, '%s', '%s') ".
                 "WHERE CAST(n.title AS BINARY) REGEXP ('%s')", 
                 $search, $replace, $search );
      }
    }
    else {
      $output .= "<p>Bad SQL: $sql</p>";
    }
254
255
256
257
258
  }
  $output .= "<p>Updated $updated rows.</p>";
  return $output;
}

Tao Starbow's avatar
Tao Starbow committed
259
260
261
262
// ***************************************************************************
// Settings ******************************************************************
// ***************************************************************************

263
264
265
266
267
268
269
270
271
272
273
274
275
276
/**
 * Search and Replace Settings form.
 *
 * @return $form
 */
function scanner_admin_form() {
  drupal_set_title('Scanner Settings');
  $form['settings'] = array(
    '#type' => 'fieldset',
    '#title' => t('Scanner Settings'),
    '#collapsible' => TRUE,
  );
  $table_map = _scanner_get_selected_tables_map();
  foreach($table_map as $item) {
Tao Starbow's avatar
Tao Starbow committed
277
278
279
280
281
    $output .= '<li><b>'. $item['field'] .'</b> in <b>'. $item['table'] .'*** </b>';
    if( $item['on'] ) {
      $output .= 'on <b>'. $item['on'] .'</b>';
    }
    $output .= '</li>';
282
283
  }
  $form['settings']['info']['#value'] = '<p>Fields that will be searched.</p><ul>'. $output .'</ul>';
Tao Starbow's avatar
Tao Starbow committed
284
285
286
287
288
289
290
291
292
293
294
  $form['settings']['scanner_mode'] = array(
    '#type' => 'radios',
    '#title' => t('Search Mode'),
    '#options' => array(
      'cs' => t('Case Sensitive'),
      'ci' => t('Case Insensitive'),
    ),
    '#default_value' => variable_get('scanner_mode', 'cs'),
  );
  
  
295
296
  $form['tables'] = array(
    '#type' => 'fieldset',
Tao Starbow's avatar
Tao Starbow committed
297
    '#title' => t('Fields that can be searched'),
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
    '#collapsible' => TRUE,
  );
  $table_map = _scanner_get_all_tables_map();
  foreach($table_map as $item) {
    $key = 'scanner_'.$item['field'].'_'.$item['table'];
    $form['tables'][$key] = array(
      '#type' => 'checkbox',
      '#title' => '<b>'. $item['field'] .'</b> '. t('in') .' <b>'. $item['table'] .'</b>',
      '#default_value' => variable_get($key, true), // default to checked
    );
  }

  $form['scanner_custom'] = array(
    '#type' => 'textarea',
    '#title' => t('Custom Fields'),
    '#default_value' => variable_get('scanner_custom', NULL), 
Tao Starbow's avatar
Tao Starbow committed
314
    '#description' => "one per row, <i>field</i> in <i>table</i> on <i>vid or nid</i>",
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
  );
  
  return system_settings_form($form);
}

/**
 * Valiadate the settings form.
 *
 */
function scanner_admin_form_validate() {
  
}


// ***************************************************************************
// Internal Utility Functions ************************************************
// ***************************************************************************

/**
 * Get all text fields.
Tao Starbow's avatar
Tao Starbow committed
335
336
 * This is all very fragle based on how CCK stores fields.
 * Works for CCK 1.6.
337
338
339
340
 *
 * @return map of fields and tables.
 */
function _scanner_get_all_tables_map() {
Tao Starbow's avatar
Tao Starbow committed
341
  $tables_map[] = array('table' => 'node_revisions', 'field' => 'title'); 
342
343
344
  $tables_map[] = array('table' => 'node_revisions', 'field' => 'body'); 
  $tables_map[] = array('table' => 'node_revisions', 'field' => 'teaser'); 
  
Tao Starbow's avatar
Tao Starbow committed
345
346
347
  $results = db_query("SELECT nfi.field_name, nfi.type_name, nf.db_storage ".
                      "FROM {node_field_instance} nfi INNER JOIN {node_field} nf USING (field_name) ".
                      "WHERE nfi.widget_type='text'");
348
  while($field=db_fetch_array($results)){
Tao Starbow's avatar
Tao Starbow committed
349
350
351
352
353
354
355
356
357
358
    if( $field['db_storage'] ) {
      $table = 'content_type_'. $field['type_name']; 
    }
    else {
      $table = 'content_'. $field['field_name']; 
    }
    $tables_map[] = array(  // Modify to match current CCK storage rules.
      'table' => $table, 
      'field' => $field['field_name'] .'_value', 
    ); 
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
  }
  return $tables_map;
}

/**
 * Get the fields that have been selected for scanning.
 *
 * @return map of selected fields and tables.
 */
function _scanner_get_selected_tables_map() {
  $tables_map = _scanner_get_all_tables_map();
  foreach($tables_map as $i=>$item) {
    $key = 'scanner_'.$item['field'].'_'.$item['table'];
    if(!variable_get($key, true)) {
      unset($tables_map[$i]);
    }
  }
  $custom = variable_get('scanner_custom', NULL);
Tao Starbow's avatar
Tao Starbow committed
377
  preg_match_all( '/(.*) in (.*) on (.*)/', $custom, $matches, PREG_SET_ORDER );
378
  foreach($matches as $match){
Tao Starbow's avatar
Tao Starbow committed
379
    $tables_map[] = array('table'=>trim($match[2]), 'field'=>trim($match[1]), 'on'=>trim($match[3])); 
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
  }
  return $tables_map;
}

// ***************************************************************************
// Theme Functions ***********************************************************
// ***************************************************************************

/**
 * The the search results.
 *
 * @param map $results
 * @return html str.
 */
function theme_scanner_results($results) {
  $output = '<p>Matched '. count($results) .' nodes.</p>';
  $output .= '<dl class="scanner-results">';

  foreach ($results as $entry) {
    $output .= theme('scanner_item', $entry, $type);
  }
  $output .= '</dl>';
  $output .= theme('pager', NULL, 10, 0);

  return $output;
}

/**
 * Theme each search result hit.
 *
 * @param map $item.
 * @return html str.
 */
function theme_scanner_item($item) {
  $output = ' <dt class="title"><a href="'. check_url($item['link']) .'">'. check_plain($item['title']) .'</a></dt>';
  $info = array();

  if (is_array($item['extra'])) {
    $info = array_merge($info, $item['extra']);
  }
  $output .= ' <dd>'. $item['text'] .'</dd>';
  return $output;
}