# loader.tcl # # walk a directory and load the files into a forum. # create the forum if needed # # see http://xarg.net/writing/tuning/forums-scale # davis@xarg.net proc _walk dir { set files [list] foreach f [glob -nocomplain [file join $dir *]] { set type [file type $f] switch $type { directory { set files [concat $files [_walk $f]] } file { lappend files $f } default { # Goofy file types -- just ignore them } } } return $files } proc _load {dir} { set sma 400.0 set lma 400.0 set count 0 set skip 0 foreach post [_walk $dir] { regsub "^$dir/?" $post {} tail if {![regexp {(.*)/([0-9]*)$} $tail match group id]} { ns_write "No match $post\n" } else { if {![expr {($count + $skip) % 1000}]} { ns_write "Space used::::------------------------------\n" space_used ns_write "------------------------------\n" } # read the file and chop it up... set tstart [clock clicks] set fp [open $post] set body [read $fp] close $fp if {![regexp -line -indices {(^\s*$)} $body split]} { set text {} } { set text [string range $body [expr [lindex $split 1] + 1] end] set header [string range $body 0 [expr [lindex $split 0] - 1]] } if {![regexp -line {^From:\s*(.*)$} $header match name]} { set name anonymous set email anonymous } else { if {![regexp {([^\[\<\(\{]*@[^\(\<\[\]\)\>\}\s]*)} $name match email]} { set email anonymous } else { set email [string tolower $email] } } if {![regexp -line {^Message-ID:\s*(.*)$} $header match message_id]} { set message_id fake-$group-$id } if {![regexp -line {^Subject:\s*(.*)$} $header match subject]} { set subject "(none)" } if {![regexp -line {^Date:\s*(.*)$} $header match date]} { set date {} } if {![regexp -line {^References:\s*(.*)$} $header match references]} { set references {} } else { set references [split [string trim $references] " "] } ns_write "-----------------------------------------------------------------------------\ngroup: $group id: $id\n From: $email\n Name: $name\n ID: $message_id\n Subject: $subject\n Date: $date\n References: $references\n" set package_id [_node $group] set cstart [clock clicks] # get forum set fname "comp.os.[string map {/ .} $group]" if {![db_0or1row get_forum {select forum_id from forums_forums where package_id = :package_id and name = :fname}]} { set forum_id [forum::new -name $fname -presentation_type threaded -package_id $package_id] } # check if duped... if {[db_0or1row exists {select 1 where exists (select 1 from uu_message_map where key = :message_id and forum_id = :forum_id)}]} { ns_write "\nalready inserted... skip\#$skip\n\n" incr skip continue } # find possible parent id set parent_id {} for {set i [llength $references]} {$i >= 0} {incr i -1} { set ref [lindex $references $i] if {[db_0or1row get_ref { select message_id as parent_id from uu_message_map where key = :ref and forum_id = :forum_id order by message_id limit 1}]} { break } } if {![db_0or1row get_user {select party_id as user_id from parties where email = :email}]} { set new_user_id [db_nextval acs_object_id_seq] set user_id [ad_user_new $email $name TEST foof "" "" "" "t" "approved" $new_user_id] } set istart [clock clicks] # insert message set new_id [db_nextval acs_object_id_seq] db_dml mapit {insert into uu_message_map values (:new_id,:message_id, :forum_id)} forum::message::new \ -forum_id $forum_id \ -message_id $new_id \ -parent_id $parent_id \ -subject $subject \ -content $text \ -html_p "f" \ -user_id $user_id # display timing info... set tend [clock clicks] set sma [expr {($sma * 9.0 + ($tend - $tstart)/1000)/10.0}] set lma [expr {($lma * 999.0 + ($tend - $tstart)/1000)/1000.0}] incr count ns_write "\nTook: [expr {($tend - $tstart)/1000}]ms sma: $sma lma: $lma \#$count skip\#$skip (parse:[expr {($cstart - $tstart)/1000}]ms check:[expr {($istart - $cstart)/1000}] ins:[expr {($tend - $istart)/1000}])\n\n" } } } proc space_used {} { for {set __i 1} {${__i} < [info level]} {incr __i} { uplevel $__i { set __count 0 foreach __v [info locals] { if {[array exists $__v]} { set __b [string bytelength [array get $__v]] } else { set __b [string bytelength [set $__v]] } ns_write "$__v: $__b\n" incr __count $b } ns_write "Total at [info level]: $__count\n" } } } proc _node {group} { set base_path /nn set base_name comp.os. # JCD forum per group array set node [site_node::get -url ${base_path}] return $node(package_id) array set node [site_node::get -url ${base_path}/$group] if {![string equal $node(url) ${base_path}/$group/]} { # Our forum node does not exist so lets create it. set name $base_name set path $base_path foreach elem [split $group /] { append name $elem array unset node array set node [site_node::get -url $path/$elem] if {![string equal $node(url) $path/$elem/]} { ns_write "Create and mount forum for $name at $path/$elem under $node(url)\n" set package_id [site_node_mount_application \ $node(node_id) \ $elem \ forums \ $name \ ] # create the forum in the new node... forum::new -name $name -presentation_type threaded -package_id $package_id } append name . append path /$elem } array set node [site_node::get -url ${base_path}/$group] } return $node(package_id) } # directory_p t object_type apm_service package_key acs-subsite package_id 2565 pattern_p t instance_name {Main Site} node_id 2592 parent_id {} url / object_id 2565 # directory_p t object_type apm_package package_key jcd-qa package_id 9483 pattern_p t instance_name qa node_id 9481 parent_id 2592 url /qa/ object_id 9483 ReturnHeaders text/plain _load /web/oatest/packages/jcd-qa/www/data